logo

drewdevault.com

[mirror] blog and personal website of Drew DeVault git clone https://hacktivis.me/git/mirror/drewdevault.com.git

dynlib.html (9726B)


  1. <!doctype html>
  2. <html lang="en">
  3. <meta charset="utf-8" />
  4. <title>Dynamic linking</title>
  5. <style>
  6. body { max-width: 720px; margin: 0 auto }
  7. img { display: block; margin: 0 auto }
  8. small { display: block; text-align: center }
  9. th, td { padding-right: 4rem; text-align: left }
  10. </style>
  11. <h1>Dynamic linking</h1>
  12. <h2>Do your installed programs share dynamic libraries?</h2>
  13. <p>
  14. Findings: <strong>not really</strong>
  15. <p>
  16. Over half of your libraries are used by fewer than 0.1% of your executables.
  17. <img src="https://l.sr.ht/PSEG.svg" alt="A plot showing that the number of times a dynamic library is used shows exponential decay" />
  18. <small>Number of times each dynamic library is required by a program</small>
  19. <p>
  20. <strong>libs.awk</strong>
  21. <pre>
  22. /\t.*\.so.*/ {
  23. n=split($1, p, "/")
  24. split(p[n], l, ".")
  25. lib=l[1]
  26. if (libs[lib] == "") {
  27. libs[lib] = 0
  28. }
  29. libs[lib] += 1
  30. }
  31. END {
  32. for (lib in libs) {
  33. print libs[lib] "\t" lib
  34. }
  35. }
  36. </pre>
  37. <p>
  38. <strong>Usage</strong>
  39. <pre>
  40. $ find /usr/bin -type f -executable -print \
  41. | xargs ldd 2&gt;/dev/null \
  42. | awk -f libs.awk \
  43. | sort -rn &gt; results.txt
  44. $ awk '{ print NR "\t" $1 }' &lt; results.txt &gt; nresults.txt
  45. $ gnuplot
  46. gnuplot&gt; plot 'nresults.txt'
  47. </pre>
  48. <p>
  49. <a href="/dynlib.txt">my results</a>
  50. <p>
  51. <pre>
  52. $ find /usr/bin -type f -executable -print | wc -l
  53. 5688
  54. $ head -n20 &lt; results.txt
  55. 4496 libc
  56. 4484 linux-vdso
  57. 4483 ld-linux-x86-64
  58. 2654 libm
  59. 2301 libdl
  60. 2216 libpthread
  61. 1419 libgcc_s
  62. 1301 libz
  63. 1144 libstdc++
  64. 805 liblzma
  65. 785 librt
  66. 771 libXdmcp
  67. 771 libxcb
  68. 771 libXau
  69. 755 libX11
  70. 703 libpcre
  71. 667 libglib-2
  72. 658 libffi
  73. 578 libresolv
  74. 559 libXext
  75. </pre>
  76. <h2>Is loading dynamically linked programs faster?</h2>
  77. <p>
  78. Findings: <strong>definitely not</strong>
  79. <table>
  80. <thead>
  81. <tr>
  82. <th>Linkage</th>
  83. <th>Avg. startup time</th>
  84. </tr>
  85. </thead>
  86. <tbody>
  87. <tr>
  88. <td>Dynamic</td>
  89. <td style="text-align: right">137263 ns</td>
  90. </tr>
  91. <tr>
  92. <td>Static</td>
  93. <td style="text-align: right">64048 ns</td>
  94. </tr>
  95. </tbody>
  96. </table>
  97. <p>
  98. <strong>ex.c</strong>
  99. <pre>
  100. #include &lt;stdio.h&gt;
  101. #include &lt;stdlib.h&gt;
  102. #include &lt;time.h&gt;
  103. #include &lt;unistd.h&gt;
  104. int main(int argc, char *argv[]) {
  105. struct timespec ts;
  106. clock_gettime(CLOCK_MONOTONIC, &amp;ts);
  107. fprintf(stdout, "%ld\t", ts.tv_nsec);
  108. fflush(stdout);
  109. if (argc == 1) {
  110. char *args[] = { "", "", NULL };
  111. execvp(argv[0], args);
  112. } else {
  113. fprintf(stdout, "\n");
  114. }
  115. return 0;
  116. }
  117. </pre>
  118. <p>
  119. <strong>test.sh</strong>
  120. <pre>
  121. #!/bin/sh
  122. i=0
  123. while [ $i -lt 1000 ]
  124. do
  125. ./ex
  126. i=$((i+1))
  127. done
  128. </pre>
  129. <p>
  130. <strong>My results</strong>
  131. <pre>
  132. $ musl-gcc -o ex ex.c
  133. $ ./test.sh | awk 'BEGIN { sum = 0 } { sum += $2-$1 } END { print sum / NR }'
  134. 137263
  135. $ musl-gcc -static -o ex ex.c
  136. $ ./test.sh | awk 'BEGIN { sum = 0 } { sum += $2-$1 } END { print sum / NR }'
  137. 64048
  138. </pre>
  139. <h2>Wouldn't statically linked executables be huge?</h2>
  140. <p>
  141. Findings: <strong>not really</strong>
  142. <p>
  143. On average, dynamically linked executables use only 4.6% of the symbols on
  144. offer from their dependencies. A good linker will remove unused symbols.
  145. <img src="https://l.sr.ht/WzUp.svg" alt="A box plot showing most results are &lt;5%, with outliers evenly distributed up to 100%" />
  146. <small>% of symbols requested by dynamically linked programs from the libraries that it depends on</small>
  147. <p>
  148. <strong>nsyms.go</strong>
  149. <pre>
  150. package main
  151. import (
  152. "bufio"
  153. "fmt"
  154. "os"
  155. "os/exec"
  156. "path/filepath"
  157. "strings"
  158. )
  159. func main() {
  160. ldd := exec.Command("ldd", os.Args[1])
  161. rc, err := ldd.StdoutPipe()
  162. if err != nil {
  163. panic(err)
  164. }
  165. ldd.Start()
  166. var libpaths []string
  167. scan := bufio.NewScanner(rc)
  168. for scan.Scan() {
  169. line := scan.Text()[1:] /* \t */
  170. sp := strings.Split(line, " ")
  171. var lib string
  172. if strings.Contains(line, "=&gt;") {
  173. lib = sp[2]
  174. } else {
  175. lib = sp[0]
  176. }
  177. if !filepath.IsAbs(lib) {
  178. lib = "/usr/lib/" + lib
  179. }
  180. libpaths = append(libpaths, lib)
  181. }
  182. ldd.Wait()
  183. rc.Close()
  184. syms := make(map[string]interface{})
  185. for _, path := range libpaths {
  186. objdump := exec.Command("objdump", "-T", path)
  187. rc, err := objdump.StdoutPipe()
  188. if err != nil {
  189. panic(err)
  190. }
  191. objdump.Start()
  192. scan := bufio.NewScanner(rc)
  193. for i := 0; scan.Scan(); i++ {
  194. if i &lt; 4 {
  195. continue
  196. }
  197. line := scan.Text()
  198. sp := strings.Split(line, " ")
  199. if len(sp) &lt; 5 {
  200. continue
  201. }
  202. sym := sp[len(sp)-1]
  203. syms[sym] = nil
  204. }
  205. objdump.Wait()
  206. rc.Close()
  207. }
  208. objdump := exec.Command("objdump", "-R", os.Args[1])
  209. rc, err = objdump.StdoutPipe()
  210. if err != nil {
  211. panic(err)
  212. }
  213. objdump.Start()
  214. used := make(map[string]interface{})
  215. scan = bufio.NewScanner(rc)
  216. for i := 0; scan.Scan(); i++ {
  217. if i &lt; 5 {
  218. continue
  219. }
  220. sp := strings.Split(scan.Text(), " ")
  221. if len(sp) &lt; 3 {
  222. continue
  223. }
  224. sym := sp[len(sp)-1]
  225. used[sym] = nil
  226. }
  227. objdump.Wait()
  228. rc.Close()
  229. if len(syms) != 0 &amp;&amp; len(used) != 0 &amp;&amp; len(used) &lt;= len(syms) {
  230. fmt.Printf("%50s\t%d\t%d\t%f\n", os.Args[1], len(syms), len(used),
  231. float64(len(used)) / float64(len(syms)))
  232. }
  233. }
  234. </pre>
  235. <p>
  236. <strong>Usage</strong>
  237. <pre>
  238. $ find /usr/bin -type f -executable -print | xargs -n1 ./nsyms &gt; results.txt
  239. $ awk '{ n += $4 } END { print n / NR }' &lt; results.txt
  240. </pre>
  241. <p>
  242. <a href="/nsyms.txt">my results</a>
  243. <h2>Will security vulnerabilities in libraries that have been statically linked
  244. cause large or unmanagable updates?</h2>
  245. <p>
  246. Findings: <strong>not really</strong>
  247. <p>
  248. Not including libc, the only libraries which had "critical" or "high" severity
  249. vulnerabilities in 2019 which affected over 100 binaries on my system were dbus,
  250. gnutls, cairo, libssh2, and curl. 265 binaries were affected by the rest.
  251. <p>
  252. The total download cost to upgrade all binaries on my system which were affected
  253. by CVEs in 2019 is 3.8 GiB. This is reduced to 1.0 GiB if you eliminate glibc.
  254. <p>
  255. It is also unknown if any of these vulnerabilities would have been introduced
  256. <em>after</em> the last build date for a given statically linked binary; if so
  257. that binary would not need to be updated. Many vulnerabilities are also limited
  258. to a specific code path or use-case, and binaries which do not invoke that code
  259. path in their dependencies will not be affected. A process to ascertain this
  260. information in the wake of a vulnerability could be automated.
  261. <p>
  262. <a href="https://lists.archlinux.org/pipermail/arch-security/">arch-security</a>
  263. <p>
  264. <strong>extractcves.py</strong>
  265. <pre>
  266. import email.utils
  267. import mailbox
  268. import re
  269. import shlex
  270. import time
  271. pacman_re = re.compile(r'pacman -Syu .*')
  272. severity_re = re.compile(r'Severity: (.*)')
  273. mbox = mailbox.mbox("arch-security.mbox")
  274. for m in mbox.items():
  275. m = m[1]
  276. date = m["Date"]
  277. for part in m.walk():
  278. if part.is_multipart():
  279. continue
  280. content_type = part.get_content_type()
  281. [charset] = part.get_charsets("utf-8")
  282. if content_type == 'text/plain':
  283. body = part.get_payload(decode=True).decode(charset)
  284. break
  285. pkgs = pacman_re.findall(body)
  286. severity = severity_re.findall(body)
  287. date = email.utils.parsedate(date)
  288. if len(pkgs) == 0 or date is None:
  289. continue
  290. if date[0] &lt;= 2018 or date[0] &gt; 2019:
  291. continue
  292. severity = severity[0]
  293. args = shlex.split(pkgs[0])
  294. pkg = args[2].split("&gt;=")[0]
  295. print(pkg, severity)
  296. </pre>
  297. <pre>
  298. $ python3 extractcves.py | grep Critical &gt; cves.txt
  299. $ xargs pacman -Ql &lt; cves.txt | grep \\.so | awk '{print $1}' | sort -u&gt;affected.txt
  300. # Manually remove packages like Firefox, Thunderbird, etc; write remainder.txt
  301. $ xargs pacman -Ql &lt; remainder.txt | grep '/usr/lib/.*.so$' | awk '{ print $2 }' &gt; libs.txt
  302. $ ldd /usr/bin/* &gt;ldd.txt
  303. $ ./scope.sh &lt;libs.txt | sort -nr &gt;sobjects.txt
  304. </pre>
  305. <p>
  306. <a href="/sobjects.txt">sobjects.txt</a> is a sorted list of shared objects and
  307. the number of executables that link to them. To find the total size of affected
  308. binaries, I ran the following command:
  309. <pre style="overflow-x: scroll">
  310. # With libc
  311. $ egrep -la 'libc.so|libm.so|libdl.so|libpthread.so|librt.so|libresolv.so|libdbus-1.so|libgnutls.so|libcairo.so|libutil.so|libssh2.so|libcurl.so|libcairo-gobject.so|libcrypt.so|libspice-server.so|libarchive.so|libSDL2-2.0.so|libmvec.so|libmagic.so|libtextstyle.so|libgettextlib-0.20.2.so|libgettextsrc-0.20.2.so|libMagickWand-7.Q16HDRI.so|libMagickCore-7.Q16HDRI.so|libbfd-2.34.0.so|libpolkit-gobject-1.so|libwebkit2gtk-4.0.so|libjavascriptcoregtk-4.0.so|libpolkit-agent-1.so|libgs.so|libctf.so|libSDL.so|libopcodes-2.34.0.so|libQt5WebEngine.so|libQt5WebEngineCore.so|libctf-nobfd.so|libcairo-script-interpreter.so' /usr/bin/* | xargs wc -c
  312. # Without libc
  313. $ egrep -la 'libdbus-1.so|libgnutls.so|libcairo.so|libssh2.so|libcurl.so|libcairo-gobject.so|libcrypt.so|libspice-server.so|libarchive.so|libSDL2-2.0.so|libmvec.so|libmagic.so|libtextstyle.so|libgettextlib-0.20.2.so|libgettextsrc-0.20.2.so|libMagickWand-7.Q16HDRI.so|libMagickCore-7.Q16HDRI.so|libbfd-2.34.0.so|libpolkit-gobject-1.so|libwebkit2gtk-4.0.so|libjavascriptcoregtk-4.0.so|libpolkit-agent-1.so|libgs.so|libctf.so|libSDL.so|libopcodes-2.34.0.so|libQt5WebEngine.so|libQt5WebEngineCore.so|libctf-nobfd.so|libcairo-script-interpreter.so' /usr/bin/* | xargs wc -c
  314. </pre>
  315. <h2>Doesn't static linking prevent <abbr title="address space layout randomization, a security technique">ASLR</abbr> from working?</h2>
  316. <p>
  317. <strong>No</strong>.
  318. <p>
  319. We've had ASLR for statically linked binaries for some time now. It's called <a href="https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81498">static PIE</a>.
  320. <h2>Test environment</h2>
  321. <ul>
  322. <li>Arch Linux, up-to-date as of 2020-06-25</li>
  323. <li>2188 packages installed</li>
  324. <li>gcc 10.1.0</li>
  325. </ul>