dynlib.html (9726B)
- <!doctype html>
- <html lang="en">
- <meta charset="utf-8" />
- <title>Dynamic linking</title>
- <style>
- body { max-width: 720px; margin: 0 auto }
- img { display: block; margin: 0 auto }
- small { display: block; text-align: center }
- th, td { padding-right: 4rem; text-align: left }
- </style>
- <h1>Dynamic linking</h1>
- <h2>Do your installed programs share dynamic libraries?</h2>
- <p>
- Findings: <strong>not really</strong>
- <p>
- Over half of your libraries are used by fewer than 0.1% of your executables.
- <img src="https://l.sr.ht/PSEG.svg" alt="A plot showing that the number of times a dynamic library is used shows exponential decay" />
- <small>Number of times each dynamic library is required by a program</small>
- <p>
- <strong>libs.awk</strong>
- <pre>
- /\t.*\.so.*/ {
- n=split($1, p, "/")
- split(p[n], l, ".")
- lib=l[1]
- if (libs[lib] == "") {
- libs[lib] = 0
- }
- libs[lib] += 1
- }
- END {
- for (lib in libs) {
- print libs[lib] "\t" lib
- }
- }
- </pre>
- <p>
- <strong>Usage</strong>
- <pre>
- $ find /usr/bin -type f -executable -print \
- | xargs ldd 2>/dev/null \
- | awk -f libs.awk \
- | sort -rn > results.txt
- $ awk '{ print NR "\t" $1 }' < results.txt > nresults.txt
- $ gnuplot
- gnuplot> plot 'nresults.txt'
- </pre>
- <p>
- <a href="/dynlib.txt">my results</a>
- <p>
- <pre>
- $ find /usr/bin -type f -executable -print | wc -l
- 5688
- $ head -n20 < results.txt
- 4496 libc
- 4484 linux-vdso
- 4483 ld-linux-x86-64
- 2654 libm
- 2301 libdl
- 2216 libpthread
- 1419 libgcc_s
- 1301 libz
- 1144 libstdc++
- 805 liblzma
- 785 librt
- 771 libXdmcp
- 771 libxcb
- 771 libXau
- 755 libX11
- 703 libpcre
- 667 libglib-2
- 658 libffi
- 578 libresolv
- 559 libXext
- </pre>
- <h2>Is loading dynamically linked programs faster?</h2>
- <p>
- Findings: <strong>definitely not</strong>
- <table>
- <thead>
- <tr>
- <th>Linkage</th>
- <th>Avg. startup time</th>
- </tr>
- </thead>
- <tbody>
- <tr>
- <td>Dynamic</td>
- <td style="text-align: right">137263 ns</td>
- </tr>
- <tr>
- <td>Static</td>
- <td style="text-align: right">64048 ns</td>
- </tr>
- </tbody>
- </table>
- <p>
- <strong>ex.c</strong>
- <pre>
- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
- #include <unistd.h>
- int main(int argc, char *argv[]) {
- struct timespec ts;
- clock_gettime(CLOCK_MONOTONIC, &ts);
- fprintf(stdout, "%ld\t", ts.tv_nsec);
- fflush(stdout);
- if (argc == 1) {
- char *args[] = { "", "", NULL };
- execvp(argv[0], args);
- } else {
- fprintf(stdout, "\n");
- }
- return 0;
- }
- </pre>
- <p>
- <strong>test.sh</strong>
- <pre>
- #!/bin/sh
- i=0
- while [ $i -lt 1000 ]
- do
- ./ex
- i=$((i+1))
- done
- </pre>
- <p>
- <strong>My results</strong>
- <pre>
- $ musl-gcc -o ex ex.c
- $ ./test.sh | awk 'BEGIN { sum = 0 } { sum += $2-$1 } END { print sum / NR }'
- 137263
- $ musl-gcc -static -o ex ex.c
- $ ./test.sh | awk 'BEGIN { sum = 0 } { sum += $2-$1 } END { print sum / NR }'
- 64048
- </pre>
- <h2>Wouldn't statically linked executables be huge?</h2>
- <p>
- Findings: <strong>not really</strong>
- <p>
- On average, dynamically linked executables use only 4.6% of the symbols on
- offer from their dependencies. A good linker will remove unused symbols.
- <img src="https://l.sr.ht/WzUp.svg" alt="A box plot showing most results are <5%, with outliers evenly distributed up to 100%" />
- <small>% of symbols requested by dynamically linked programs from the libraries that it depends on</small>
- <p>
- <strong>nsyms.go</strong>
- <pre>
- package main
- import (
- "bufio"
- "fmt"
- "os"
- "os/exec"
- "path/filepath"
- "strings"
- )
- func main() {
- ldd := exec.Command("ldd", os.Args[1])
- rc, err := ldd.StdoutPipe()
- if err != nil {
- panic(err)
- }
- ldd.Start()
- var libpaths []string
- scan := bufio.NewScanner(rc)
- for scan.Scan() {
- line := scan.Text()[1:] /* \t */
- sp := strings.Split(line, " ")
- var lib string
- if strings.Contains(line, "=>") {
- lib = sp[2]
- } else {
- lib = sp[0]
- }
- if !filepath.IsAbs(lib) {
- lib = "/usr/lib/" + lib
- }
- libpaths = append(libpaths, lib)
- }
- ldd.Wait()
- rc.Close()
- syms := make(map[string]interface{})
- for _, path := range libpaths {
- objdump := exec.Command("objdump", "-T", path)
- rc, err := objdump.StdoutPipe()
- if err != nil {
- panic(err)
- }
- objdump.Start()
- scan := bufio.NewScanner(rc)
- for i := 0; scan.Scan(); i++ {
- if i < 4 {
- continue
- }
- line := scan.Text()
- sp := strings.Split(line, " ")
- if len(sp) < 5 {
- continue
- }
- sym := sp[len(sp)-1]
- syms[sym] = nil
- }
- objdump.Wait()
- rc.Close()
- }
- objdump := exec.Command("objdump", "-R", os.Args[1])
- rc, err = objdump.StdoutPipe()
- if err != nil {
- panic(err)
- }
- objdump.Start()
- used := make(map[string]interface{})
- scan = bufio.NewScanner(rc)
- for i := 0; scan.Scan(); i++ {
- if i < 5 {
- continue
- }
- sp := strings.Split(scan.Text(), " ")
- if len(sp) < 3 {
- continue
- }
- sym := sp[len(sp)-1]
- used[sym] = nil
- }
- objdump.Wait()
- rc.Close()
- if len(syms) != 0 && len(used) != 0 && len(used) <= len(syms) {
- fmt.Printf("%50s\t%d\t%d\t%f\n", os.Args[1], len(syms), len(used),
- float64(len(used)) / float64(len(syms)))
- }
- }
- </pre>
- <p>
- <strong>Usage</strong>
- <pre>
- $ find /usr/bin -type f -executable -print | xargs -n1 ./nsyms > results.txt
- $ awk '{ n += $4 } END { print n / NR }' < results.txt
- </pre>
- <p>
- <a href="/nsyms.txt">my results</a>
- <h2>Will security vulnerabilities in libraries that have been statically linked
- cause large or unmanagable updates?</h2>
- <p>
- Findings: <strong>not really</strong>
- <p>
- Not including libc, the only libraries which had "critical" or "high" severity
- vulnerabilities in 2019 which affected over 100 binaries on my system were dbus,
- gnutls, cairo, libssh2, and curl. 265 binaries were affected by the rest.
- <p>
- The total download cost to upgrade all binaries on my system which were affected
- by CVEs in 2019 is 3.8 GiB. This is reduced to 1.0 GiB if you eliminate glibc.
- <p>
- It is also unknown if any of these vulnerabilities would have been introduced
- <em>after</em> the last build date for a given statically linked binary; if so
- that binary would not need to be updated. Many vulnerabilities are also limited
- to a specific code path or use-case, and binaries which do not invoke that code
- path in their dependencies will not be affected. A process to ascertain this
- information in the wake of a vulnerability could be automated.
- <p>
- <a href="https://lists.archlinux.org/pipermail/arch-security/">arch-security</a>
- <p>
- <strong>extractcves.py</strong>
- <pre>
- import email.utils
- import mailbox
- import re
- import shlex
- import time
- pacman_re = re.compile(r'pacman -Syu .*')
- severity_re = re.compile(r'Severity: (.*)')
- mbox = mailbox.mbox("arch-security.mbox")
- for m in mbox.items():
- m = m[1]
- date = m["Date"]
- for part in m.walk():
- if part.is_multipart():
- continue
- content_type = part.get_content_type()
- [charset] = part.get_charsets("utf-8")
- if content_type == 'text/plain':
- body = part.get_payload(decode=True).decode(charset)
- break
- pkgs = pacman_re.findall(body)
- severity = severity_re.findall(body)
- date = email.utils.parsedate(date)
- if len(pkgs) == 0 or date is None:
- continue
- if date[0] <= 2018 or date[0] > 2019:
- continue
- severity = severity[0]
- args = shlex.split(pkgs[0])
- pkg = args[2].split(">=")[0]
- print(pkg, severity)
- </pre>
- <pre>
- $ python3 extractcves.py | grep Critical > cves.txt
- $ xargs pacman -Ql < cves.txt | grep \\.so | awk '{print $1}' | sort -u>affected.txt
- # Manually remove packages like Firefox, Thunderbird, etc; write remainder.txt
- $ xargs pacman -Ql < remainder.txt | grep '/usr/lib/.*.so$' | awk '{ print $2 }' > libs.txt
- $ ldd /usr/bin/* >ldd.txt
- $ ./scope.sh <libs.txt | sort -nr >sobjects.txt
- </pre>
- <p>
- <a href="/sobjects.txt">sobjects.txt</a> is a sorted list of shared objects and
- the number of executables that link to them. To find the total size of affected
- binaries, I ran the following command:
- <pre style="overflow-x: scroll">
- # With libc
- $ egrep -la 'libc.so|libm.so|libdl.so|libpthread.so|librt.so|libresolv.so|libdbus-1.so|libgnutls.so|libcairo.so|libutil.so|libssh2.so|libcurl.so|libcairo-gobject.so|libcrypt.so|libspice-server.so|libarchive.so|libSDL2-2.0.so|libmvec.so|libmagic.so|libtextstyle.so|libgettextlib-0.20.2.so|libgettextsrc-0.20.2.so|libMagickWand-7.Q16HDRI.so|libMagickCore-7.Q16HDRI.so|libbfd-2.34.0.so|libpolkit-gobject-1.so|libwebkit2gtk-4.0.so|libjavascriptcoregtk-4.0.so|libpolkit-agent-1.so|libgs.so|libctf.so|libSDL.so|libopcodes-2.34.0.so|libQt5WebEngine.so|libQt5WebEngineCore.so|libctf-nobfd.so|libcairo-script-interpreter.so' /usr/bin/* | xargs wc -c
- # Without libc
- $ egrep -la 'libdbus-1.so|libgnutls.so|libcairo.so|libssh2.so|libcurl.so|libcairo-gobject.so|libcrypt.so|libspice-server.so|libarchive.so|libSDL2-2.0.so|libmvec.so|libmagic.so|libtextstyle.so|libgettextlib-0.20.2.so|libgettextsrc-0.20.2.so|libMagickWand-7.Q16HDRI.so|libMagickCore-7.Q16HDRI.so|libbfd-2.34.0.so|libpolkit-gobject-1.so|libwebkit2gtk-4.0.so|libjavascriptcoregtk-4.0.so|libpolkit-agent-1.so|libgs.so|libctf.so|libSDL.so|libopcodes-2.34.0.so|libQt5WebEngine.so|libQt5WebEngineCore.so|libctf-nobfd.so|libcairo-script-interpreter.so' /usr/bin/* | xargs wc -c
- </pre>
- <h2>Doesn't static linking prevent <abbr title="address space layout randomization, a security technique">ASLR</abbr> from working?</h2>
- <p>
- <strong>No</strong>.
- <p>
- We've had ASLR for statically linked binaries for some time now. It's called <a href="https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81498">static PIE</a>.
- <h2>Test environment</h2>
- <ul>
- <li>Arch Linux, up-to-date as of 2020-06-25</li>
- <li>2188 packages installed</li>
- <li>gcc 10.1.0</li>
- </ul>