logo

oasis

Own branch of Oasis Linux (upstream: <https://git.sr.ht/~mcf/oasis/>) git clone https://anongit.hacktivis.me/git/oasis.git
commit: 2519c56dbfea9c271daa3c7f53dc9d8c80b4a310
parent 806ccd3dee22f25655da3efcd2718048654b7a8e
Author: Michael Forney <mforney@mforney.org>
Date:   Sun,  5 Jul 2020 14:54:22 -0700

musl: Update to latest git for integrated mallocng

Diffstat:

M.gitmodules1-
Mpkg/musl/aarch64.lua34+++++++++++++++++++++++++++++++++-
Mpkg/musl/base.lua17+++++++++--------
Dpkg/musl/patch/0001-Use-mallocng-22b183076de777d39e40129fe529221d2688a6e.patch1991-------------------------------------------------------------------------------
Mpkg/musl/ver2+-
Mpkg/musl/x86_64.lua35+++++++++++++++++++----------------
6 files changed, 62 insertions(+), 2018 deletions(-)

diff --git a/.gitmodules b/.gitmodules @@ -191,7 +191,6 @@ [submodule "pkg/musl/src"] path = pkg/musl/src url = git://git.musl-libc.org/musl - ignore = all [submodule "pkg/ncompress/src"] path = pkg/ncompress/src url = https://github.com/vapier/ncompress.git diff --git a/pkg/musl/aarch64.lua b/pkg/musl/aarch64.lua @@ -1,13 +1,45 @@ return { srcs={ - -- <cd src && printf "\t\t'%s',\n" src/*/aarch64/*.[sS] + -- <cd src && printf "\t\t'%s',\n" src/*/aarch64/*.[csS] 'src/fenv/aarch64/fenv.s', 'src/ldso/aarch64/dlsym.s', 'src/ldso/aarch64/tlsdesc.s', + 'src/math/aarch64/ceil.c', + 'src/math/aarch64/ceilf.c', + 'src/math/aarch64/fabs.c', + 'src/math/aarch64/fabsf.c', + 'src/math/aarch64/floor.c', + 'src/math/aarch64/floorf.c', + 'src/math/aarch64/fma.c', + 'src/math/aarch64/fmaf.c', + 'src/math/aarch64/fmax.c', + 'src/math/aarch64/fmaxf.c', + 'src/math/aarch64/fmin.c', + 'src/math/aarch64/fminf.c', + 'src/math/aarch64/llrint.c', + 'src/math/aarch64/llrintf.c', + 'src/math/aarch64/llround.c', + 'src/math/aarch64/llroundf.c', + 'src/math/aarch64/lrint.c', + 'src/math/aarch64/lrintf.c', + 'src/math/aarch64/lround.c', + 'src/math/aarch64/lroundf.c', + 'src/math/aarch64/nearbyint.c', + 'src/math/aarch64/nearbyintf.c', + 'src/math/aarch64/rint.c', + 'src/math/aarch64/rintf.c', + 'src/math/aarch64/round.c', + 'src/math/aarch64/roundf.c', + 'src/math/aarch64/sqrt.c', + 'src/math/aarch64/sqrtf.c', + 'src/math/aarch64/trunc.c', + 'src/math/aarch64/truncf.c', 'src/setjmp/aarch64/longjmp.s', 'src/setjmp/aarch64/setjmp.s', 'src/signal/aarch64/restore.s', 'src/signal/aarch64/sigsetjmp.s', + 'src/string/aarch64/memcpy.S', + 'src/string/aarch64/memset.S', 'src/thread/aarch64/__set_thread_area.s', 'src/thread/aarch64/__unmapself.s', 'src/thread/aarch64/clone.s', diff --git a/pkg/musl/base.lua b/pkg/musl/base.lua @@ -1,6 +1,6 @@ return { srcs={ - -- <cd src && printf "\t\t'%s',\n" src/*/*.c + -- <cd src && printf "\t\t'%s',\n" src/*/*.c src/malloc/mallocng/*.c 'src/aio/aio.c', 'src/aio/aio_suspend.c', 'src/aio/lio_listio.c', @@ -302,14 +302,10 @@ return { 'src/locale/uselocale.c', 'src/locale/wcscoll.c', 'src/locale/wcsxfrm.c', - 'src/malloc/aligned_alloc.c', - 'src/malloc/donate.c', - 'src/malloc/free.c', - 'src/malloc/malloc.c', - 'src/malloc/malloc_usable_size.c', + 'src/malloc/calloc.c', + 'src/malloc/lite_malloc.c', 'src/malloc/memalign.c', 'src/malloc/posix_memalign.c', - 'src/malloc/realloc.c', 'src/malloc/replaced.c', 'src/math/__cos.c', 'src/math/__cosdf.c', @@ -863,7 +859,6 @@ return { 'src/stdio/__stdio_seek.c', 'src/stdio/__stdio_write.c', 'src/stdio/__stdout_write.c', - 'src/stdio/__string_read.c', 'src/stdio/__toread.c', 'src/stdio/__towrite.c', 'src/stdio/__uflow.c', @@ -1327,6 +1322,12 @@ return { 'src/unistd/usleep.c', 'src/unistd/write.c', 'src/unistd/writev.c', + 'src/malloc/mallocng/aligned_alloc.c', + 'src/malloc/mallocng/donate.c', + 'src/malloc/mallocng/free.c', + 'src/malloc/mallocng/malloc.c', + 'src/malloc/mallocng/malloc_usable_size.c', + 'src/malloc/mallocng/realloc.c', }, hdrs={ -- <cd src/include && printf "\t\t'%s',\n" *.h */*.h diff --git a/pkg/musl/patch/0001-Use-mallocng-22b183076de777d39e40129fe529221d2688a6e.patch b/pkg/musl/patch/0001-Use-mallocng-22b183076de777d39e40129fe529221d2688a6e.patch @@ -1,1991 +0,0 @@ -From cd9e6fead630cabfe6a232eebd8e81c90bc385d1 Mon Sep 17 00:00:00 2001 -From: Michael Forney <mforney@mforney.org> -Date: Sun, 10 May 2020 15:48:10 -0700 -Subject: [PATCH] Use mallocng 22b183076de777d39e40129fe529221d2688a6ed - -Based on instructions in https://www.openwall.com/lists/musl/2020/06/09/3 -adapted to 1.2.0. ---- - src/internal/malloc_impl.h | 36 -- - src/malloc/DESIGN | 22 - - src/malloc/README.md | 31 ++ - src/malloc/aligned_alloc.c | 49 +- - src/malloc/donate.c | 39 ++ - src/malloc/expand_heap.c | 71 --- - src/malloc/free.c | 143 ++++++ - src/malloc/glue.h | 97 ++++ - src/malloc/lite_malloc.c | 59 --- - src/malloc/malloc.c | 826 +++++++++++++------------------- - src/malloc/malloc_usable_size.c | 13 +- - src/malloc/memalign.c | 52 +- - src/malloc/meta.h | 284 +++++++++++ - src/malloc/posix_memalign.c | 3 +- - src/malloc/realloc.c | 51 ++ - src/malloc/replaced.c | 3 + - 16 files changed, 1041 insertions(+), 738 deletions(-) - delete mode 100644 src/malloc/DESIGN - create mode 100644 src/malloc/README.md - create mode 100644 src/malloc/donate.c - delete mode 100644 src/malloc/expand_heap.c - create mode 100644 src/malloc/free.c - create mode 100644 src/malloc/glue.h - delete mode 100644 src/malloc/lite_malloc.c - create mode 100644 src/malloc/meta.h - create mode 100644 src/malloc/realloc.c - create mode 100644 src/malloc/replaced.c - -diff --git a/src/internal/malloc_impl.h b/src/internal/malloc_impl.h -index 59785a7f..a9ffa478 100644 ---- a/src/internal/malloc_impl.h -+++ b/src/internal/malloc_impl.h -@@ -3,44 +3,8 @@ - - #include <sys/mman.h> - --hidden void *__expand_heap(size_t *); -- - hidden void __malloc_donate(char *, char *); - --hidden void *__memalign(size_t, size_t); -- --struct chunk { -- size_t psize, csize; -- struct chunk *next, *prev; --}; -- --struct bin { -- volatile int lock[2]; -- struct chunk *head; -- struct chunk *tail; --}; -- --#define SIZE_ALIGN (4*sizeof(size_t)) --#define SIZE_MASK (-SIZE_ALIGN) --#define OVERHEAD (2*sizeof(size_t)) --#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN) --#define DONTCARE 16 --#define RECLAIM 163840 -- --#define CHUNK_SIZE(c) ((c)->csize & -2) --#define CHUNK_PSIZE(c) ((c)->psize & -2) --#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c))) --#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c))) --#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD) --#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD) --#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head)) -- --#define C_INUSE ((size_t)1) -- --#define IS_MMAPPED(c) !((c)->csize & (C_INUSE)) -- --hidden void __bin_chunk(struct chunk *); -- - hidden extern int __malloc_replaced; - - #endif -diff --git a/src/malloc/DESIGN b/src/malloc/DESIGN -deleted file mode 100644 -index 58b0523f..00000000 ---- a/src/malloc/DESIGN -+++ /dev/null -@@ -1,22 +0,0 @@ -- -- --In principle, this memory allocator is roughly equivalent to Doug --Lea's dlmalloc with fine-grained locking. -- -- -- --malloc: -- --Uses a freelist binned by chunk size, with a bitmap to optimize --searching for the smallest non-empty bin which can satisfy an --allocation. If no free chunks are available, it creates a new chunk of --the requested size and attempts to merge it with any existing free --chunk immediately below the newly created chunk. -- --Whether the chunk was obtained from a bin or newly created, it's --likely to be larger than the requested allocation. malloc always --finishes its work by passing the new chunk to realloc, which will --split it into two chunks and free the tail portion. -- -- -- -diff --git a/src/malloc/README.md b/src/malloc/README.md -new file mode 100644 -index 00000000..2d8be226 ---- /dev/null -+++ b/src/malloc/README.md -@@ -0,0 +1,31 @@ -+# Next-gen malloc for musl libc - Working draft -+ -+This is a draft of the upcoming next-gen `malloc` implementation for -+musl libc. It is essentially feature-complete, but is not yet -+integrated with musl and lacks some additional hardening and -+optimizations that the final version is expected to include. -+ -+The included `Makefile` builds static and shared library files that -+programs can link with, or use with `LD_PRELOAD` (for the shared -+version). -+ -+## High-level design -+ -+This allocator organizes memory dynamically into small slab-style -+groups of up to 32 identical-size allocation units with status -+controlled by bitmasks, and utilizes a mix of in-band and out-of-band -+metadata to isolate sensitive state from regions easily accessible -+through out-of-bounds writes, while avoiding the need for expensive -+global data structures. -+ -+The closest analogue among other well-known allocators is probably -+OpenBSD's omalloc. -+ -+Base allocation granularity and alignment is 16 bytes. Large -+allocations are made individually by mmap, but they also have -+out-of-band metadata records treating them as special one-member -+groups, allowing `realloc` and `free` to validate them. Smaller -+allocations come from groups of one of 48 size classes, spaced -+linearly up to 128 (the first 8 classes), then roughly geometrically -+with four steps per doubling, but adjusted to divide powers of two -+with minimal remainder (waste). -diff --git a/src/malloc/aligned_alloc.c b/src/malloc/aligned_alloc.c -index b6143f30..437a7fe7 100644 ---- a/src/malloc/aligned_alloc.c -+++ b/src/malloc/aligned_alloc.c -@@ -1,7 +1,52 @@ - #include <stdlib.h> --#include "malloc_impl.h" -+#include <errno.h> -+#include "meta.h" - - void *aligned_alloc(size_t align, size_t len) - { -- return __memalign(align, len); -+ if ((align & -align) != align) { -+ errno = EINVAL; -+ return 0; -+ } -+ -+ if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) { -+ errno = ENOMEM; -+ return 0; -+ } -+ -+ if (align <= UNIT) align = UNIT; -+ -+ unsigned char *p = malloc(len + align - UNIT); -+ struct meta *g = get_meta(p); -+ int idx = get_slot_index(p); -+ size_t stride = get_stride(g); -+ unsigned char *start = g->mem->storage + stride*idx; -+ unsigned char *end = g->mem->storage + stride*(idx+1) - 4; -+ size_t adj = -(uintptr_t)p & (align-1); -+ -+ if (!adj) { -+ set_size(p, end, len); -+ return p; -+ } -+ p += adj; -+ uint32_t offset = (size_t)(p-g->mem->storage)/UNIT; -+ if (offset <= 0xffff) { -+ *(uint16_t *)(p-2) = offset; -+ p[-4] = 0; -+ } else { -+ // use a 32-bit offset if 16-bit doesn't fit. for this, -+ // 16-bit field must be zero, [-4] byte nonzero. -+ *(uint16_t *)(p-2) = 0; -+ *(uint32_t *)(p-8) = offset; -+ p[-4] = 1; -+ } -+ p[-3] = idx; -+ set_size(p, end, len); -+ // store offset to aligned enframing. this facilitates cycling -+ // offset and also iteration of heap for debugging/measurement. -+ // for extreme overalignment it won't fit but these are classless -+ // allocations anyway. -+ *(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT; -+ start[-3] = 7<<5; -+ return p; - } -diff --git a/src/malloc/donate.c b/src/malloc/donate.c -new file mode 100644 -index 00000000..41d850f3 ---- /dev/null -+++ b/src/malloc/donate.c -@@ -0,0 +1,39 @@ -+#include <stdlib.h> -+#include <stdint.h> -+#include <limits.h> -+#include <string.h> -+#include <sys/mman.h> -+#include <errno.h> -+ -+#include "meta.h" -+ -+static void donate(unsigned char *base, size_t len) -+{ -+ uintptr_t a = (uintptr_t)base; -+ uintptr_t b = a + len; -+ a += -a & (UNIT-1); -+ b -= b & (UNIT-1); -+ memset(base, 0, len); -+ for (int sc=47; sc>0 && b>a; sc-=4) { -+ if (b-a < (size_classes[sc]+1)*UNIT) continue; -+ struct meta *m = alloc_meta(); -+ m->avail_mask = 0; -+ m->freed_mask = 1; -+ m->mem = (void *)a; -+ m->mem->meta = m; -+ m->last_idx = 0; -+ m->freeable = 0; -+ m->sizeclass = sc; -+ m->maplen = 0; -+ *((unsigned char *)m->mem+UNIT-4) = 0; -+ *((unsigned char *)m->mem+UNIT-3) = 255; -+ m->mem->storage[size_classes[sc]*UNIT-4] = 0; -+ queue(&ctx.active[sc], m); -+ a += (size_classes[sc]+1)*UNIT; -+ } -+} -+ -+void __malloc_donate(char *start, char *end) -+{ -+ donate((void *)start, end-start); -+} -diff --git a/src/malloc/expand_heap.c b/src/malloc/expand_heap.c -deleted file mode 100644 -index e6a3d7a0..00000000 ---- a/src/malloc/expand_heap.c -+++ /dev/null -@@ -1,71 +0,0 @@ --#include <limits.h> --#include <stdint.h> --#include <errno.h> --#include <sys/mman.h> --#include "libc.h" --#include "syscall.h" --#include "malloc_impl.h" -- --/* This function returns true if the interval [old,new] -- * intersects the 'len'-sized interval below &libc.auxv -- * (interpreted as the main-thread stack) or below &b -- * (the current stack). It is used to defend against -- * buggy brk implementations that can cross the stack. */ -- --static int traverses_stack_p(uintptr_t old, uintptr_t new) --{ -- const uintptr_t len = 8<<20; -- uintptr_t a, b; -- -- b = (uintptr_t)libc.auxv; -- a = b > len ? b-len : 0; -- if (new>a && old<b) return 1; -- -- b = (uintptr_t)&b; -- a = b > len ? b-len : 0; -- if (new>a && old<b) return 1; -- -- return 0; --} -- --/* Expand the heap in-place if brk can be used, or otherwise via mmap, -- * using an exponential lower bound on growth by mmap to make -- * fragmentation asymptotically irrelevant. The size argument is both -- * an input and an output, since the caller needs to know the size -- * allocated, which will be larger than requested due to page alignment -- * and mmap minimum size rules. The caller is responsible for locking -- * to prevent concurrent calls. */ -- --void *__expand_heap(size_t *pn) --{ -- static uintptr_t brk; -- static unsigned mmap_step; -- size_t n = *pn; -- -- if (n > SIZE_MAX/2 - PAGE_SIZE) { -- errno = ENOMEM; -- return 0; -- } -- n += -n & PAGE_SIZE-1; -- -- if (!brk) { -- brk = __syscall(SYS_brk, 0); -- brk += -brk & PAGE_SIZE-1; -- } -- -- if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n) -- && __syscall(SYS_brk, brk+n)==brk+n) { -- *pn = n; -- brk += n; -- return (void *)(brk-n); -- } -- -- size_t min = (size_t)PAGE_SIZE << mmap_step/2; -- if (n < min) n = min; -- void *area = __mmap(0, n, PROT_READ|PROT_WRITE, -- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); -- if (area == MAP_FAILED) return 0; -- *pn = n; -- mmap_step++; -- return area; --} -diff --git a/src/malloc/free.c b/src/malloc/free.c -new file mode 100644 -index 00000000..d4b1e5ac ---- /dev/null -+++ b/src/malloc/free.c -@@ -0,0 +1,143 @@ -+#define _BSD_SOURCE -+#include <stdlib.h> -+#include <sys/mman.h> -+ -+#include "meta.h" -+ -+struct mapinfo { -+ void *base; -+ size_t len; -+}; -+ -+static struct mapinfo nontrivial_free(struct meta *, int); -+ -+static struct mapinfo free_group(struct meta *g) -+{ -+ struct mapinfo mi = { 0 }; -+ int sc = g->sizeclass; -+ if (sc < 48) { -+ ctx.usage_by_class[sc] -= g->last_idx+1; -+ } -+ if (g->maplen) { -+ step_seq(); -+ record_seq(sc); -+ mi.base = g->mem; -+ mi.len = g->maplen*4096UL; -+ } else { -+ void *p = g->mem; -+ struct meta *m = get_meta(p); -+ int idx = get_slot_index(p); -+ g->mem->meta = 0; -+ // not checking size/reserved here; it's intentionally invalid -+ mi = nontrivial_free(m, idx); -+ } -+ free_meta(g); -+ return mi; -+} -+ -+static int okay_to_free(struct meta *g) -+{ -+ int sc = g->sizeclass; -+ -+ if (!g->freeable) return 0; -+ -+ // always free individual mmaps not suitable for reuse -+ if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc]) -+ return 1; -+ -+ // always free groups allocated inside another group's slot -+ // since recreating them should not be expensive and they -+ // might be blocking freeing of a much larger group. -+ if (!g->maplen) return 1; -+ -+ // if there is another non-full group, free this one to -+ // consolidate future allocations, reduce fragmentation. -+ if (g->next != g) return 1; -+ -+ // free any group in a size class that's not bouncing -+ if (!is_bouncing(sc)) return 1; -+ -+ size_t cnt = g->last_idx+1; -+ size_t usage = ctx.usage_by_class[sc]; -+ -+ // if usage is high enough that a larger count should be -+ // used, free the low-count group so a new one will be made. -+ if (9*cnt <= usage && cnt < 20) -+ return 1; -+ -+ // otherwise, keep the last group in a bouncing class. -+ return 0; -+} -+ -+static struct mapinfo nontrivial_free(struct meta *g, int i) -+{ -+ uint32_t self = 1u<<i; -+ int sc = g->sizeclass; -+ uint32_t mask = g->freed_mask | g->avail_mask; -+ -+ if (mask+self == (2u<<g->last_idx)-1 && okay_to_free(g)) { -+ // any multi-slot group is necessarily on an active list -+ // here, but single-slot groups might or might not be. -+ if (g->next) { -+ assert(sc < 48); -+ int activate_new = (ctx.active[sc]==g); -+ dequeue(&ctx.active[sc], g); -+ if (activate_new && ctx.active[sc]) -+ activate_group(ctx.active[sc]); -+ } -+ return free_group(g); -+ } else if (!mask) { -+ assert(sc < 48); -+ // might still be active if there were no allocations -+ // after last available slot was taken. -+ if (ctx.active[sc] != g) { -+ queue(&ctx.active[sc], g); -+ } -+ } -+ a_or(&g->freed_mask, self); -+ return (struct mapinfo){ 0 }; -+} -+ -+void free(void *p) -+{ -+ if (!p) return; -+ -+ struct meta *g = get_meta(p); -+ int idx = get_slot_index(p); -+ size_t stride = get_stride(g); -+ unsigned char *start = g->mem->storage + stride*idx; -+ unsigned char *end = start + stride - 4; -+ get_nominal_size(p, end); -+ uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1; -+ ((unsigned char *)p)[-3] = 255; -+ // invalidate offset to group header, and cycle offset of -+ // used region within slot if current offset is zero. -+ *(uint16_t *)((char *)p-2) = 0; -+ -+ // release any whole pages contained in the slot to be freed -+ // unless it's a single-slot group that will be unmapped. -+ if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) { -+ unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1)); -+ size_t len = (end-base) & -PGSZ; -+ if (len) madvise(base, len, MADV_FREE); -+ } -+ -+ // atomic free without locking if this is neither first or last slot -+ for (;;) { -+ uint32_t freed = g->freed_mask; -+ uint32_t avail = g->avail_mask; -+ uint32_t mask = freed | avail; -+ assert(!(mask&self)); -+ if (!freed || mask+self==all) break; -+ if (!MT) -+ g->freed_mask = freed+self; -+ else if (a_cas(&g->freed_mask, freed, freed+self)!=freed) -+ continue; -+ return; -+ } -+ -+ wrlock(); -+ struct mapinfo mi = nontrivial_free(g, idx); -+ unlock(); -+ if (mi.len) munmap(mi.base, mi.len); -+} -diff --git a/src/malloc/glue.h b/src/malloc/glue.h -new file mode 100644 -index 00000000..4e0a1231 ---- /dev/null -+++ b/src/malloc/glue.h -@@ -0,0 +1,97 @@ -+#ifndef MALLOC_GLUE_H -+#define MALLOC_GLUE_H -+ -+#include <stdint.h> -+#include <sys/mman.h> -+#include <pthread.h> -+#include <unistd.h> -+#include <elf.h> -+#include <string.h> -+#include "atomic.h" -+#include "syscall.h" -+#include "libc.h" -+#include "lock.h" -+ -+// use macros to appropriately namespace these. -+#define size_classes __malloc_size_classes -+#define ctx __malloc_context -+#define alloc_meta __malloc_alloc_meta -+ -+#if USE_REAL_ASSERT -+#include <assert.h> -+#else -+#undef assert -+#define assert(x) do { if (!(x)) a_crash(); } while(0) -+#endif -+ -+#define brk(p) ((uintptr_t)__syscall(SYS_brk, p)) -+ -+#define mmap __mmap -+#define madvise __madvise -+#define mremap __mremap -+ -+#ifndef a_clz_32 -+#define a_clz_32 a_clz_32 -+static inline int a_clz_32(uint32_t x) -+{ -+#ifdef __GNUC__ -+ return __builtin_clz(x); -+#else -+ x--; -+ x |= x >> 1; -+ x |= x >> 2; -+ x |= x >> 4; -+ x |= x >> 8; -+ x |= x >> 16; -+ x++; -+ return 31-a_ctz_32(x); -+#endif -+} -+#endif -+ -+static inline uint64_t get_random_secret() -+{ -+ uint64_t secret = (uintptr_t)&secret * 1103515245; -+ for (size_t i=0; libc.auxv[i]; i+=2) -+ if (libc.auxv[i]==AT_RANDOM) -+ memcpy(&secret, (char *)libc.auxv[i+1], sizeof secret); -+ return secret; -+} -+ -+#ifndef PAGESIZE -+#define PAGESIZE PAGE_SIZE -+#endif -+ -+static inline size_t get_page_size() -+{ -+ return PAGESIZE; -+} -+ -+// no portable "is multithreaded" predicate so assume true -+#define MT (libc.threaded) -+ -+#define RDLOCK_IS_EXCLUSIVE 1 -+ -+__attribute__((__visibility__("hidden"))) -+extern int __malloc_lock[1]; -+ -+#define LOCK_OBJ_DEF \ -+int __malloc_lock[1]; -+ -+static inline void rdlock() -+{ -+ if (MT) LOCK(__malloc_lock); -+} -+static inline void wrlock() -+{ -+ if (MT) LOCK(__malloc_lock); -+} -+static inline void unlock() -+{ -+ UNLOCK(__malloc_lock); -+} -+static inline void upgradelock() -+{ -+} -+ -+#endif -diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c -deleted file mode 100644 -index 050d84f6..00000000 ---- a/src/malloc/lite_malloc.c -+++ /dev/null -@@ -1,59 +0,0 @@ --#include <stdlib.h> --#include <stdint.h> --#include <limits.h> --#include <errno.h> --#include "lock.h" --#include "malloc_impl.h" -- --#define ALIGN 16 -- --static void *__simple_malloc(size_t n) --{ -- static char *cur, *end; -- static volatile int lock[1]; -- size_t align=1, pad; -- void *p; -- -- if (!n) n++; -- while (align<n && align<ALIGN) -- align += align; -- -- LOCK(lock); -- -- pad = -(uintptr_t)cur & align-1; -- -- if (n <= SIZE_MAX/2 + ALIGN) n += pad; -- -- if (n > end-cur) { -- size_t m = n; -- char *new = __expand_heap(&m); -- if (!new) { -- UNLOCK(lock); -- return 0; -- } -- if (new != end) { -- cur = new; -- n -= pad; -- pad = 0; -- } -- end = new + m; -- } -- -- p = cur + pad; -- cur += n; -- UNLOCK(lock); -- return p; --} -- --weak_alias(__simple_malloc, malloc); -- --static void *__simple_calloc(size_t m, size_t n) --{ -- if (n && m > (size_t)-1/n) { -- errno = ENOMEM; -- return 0; -- } -- return __simple_malloc(n * m); --} -- --weak_alias(__simple_calloc, calloc); -diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c -index 96982596..c55a1855 100644 ---- a/src/malloc/malloc.c -+++ b/src/malloc/malloc.c -@@ -1,351 +1,382 @@ --#define _GNU_SOURCE - #include <stdlib.h> --#include <string.h> --#include <limits.h> - #include <stdint.h> --#include <errno.h> -+#include <limits.h> -+#include <string.h> - #include <sys/mman.h> --#include "libc.h" --#include "atomic.h" --#include "pthread_impl.h" --#include "malloc_impl.h" -- --#if defined(__GNUC__) && defined(__PIC__) --#define inline inline __attribute__((always_inline)) --#endif -- --static struct { -- volatile uint64_t binmap; -- struct bin bins[64]; -- volatile int free_lock[2]; --} mal; -- --int __malloc_replaced; -- --/* Synchronization tools */ -- --static inline void lock(volatile int *lk) --{ -- if (libc.threads_minus_1) -- while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1); --} -- --static inline void unlock(volatile int *lk) --{ -- if (lk[0]) { -- a_store(lk, 0); -- if (lk[1]) __wake(lk, 1, 1); -- } --} -- --static inline void lock_bin(int i) --{ -- lock(mal.bins[i].lock); -- if (!mal.bins[i].head) -- mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i); --} -- --static inline void unlock_bin(int i) --{ -- unlock(mal.bins[i].lock); --} -+#include <errno.h> - --static int first_set(uint64_t x) --{ --#if 1 -- return a_ctz_64(x); --#else -- static const char debruijn64[64] = { -- 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, -- 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, -- 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, -- 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 -- }; -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- if (sizeof(long) < 8) { -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + debruijn32[(y&-y)*0x076be629 >> 27]; -- } -- return debruijn32[(y&-y)*0x076be629 >> 27]; -- } -- return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58]; --#endif --} -+#include "meta.h" -+ -+LOCK_OBJ_DEF; -+ -+const uint16_t size_classes[] = { -+ 1, 2, 3, 4, 5, 6, 7, 8, -+ 9, 10, 12, 15, -+ 18, 20, 25, 31, -+ 36, 42, 50, 63, -+ 72, 84, 102, 127, -+ 146, 170, 204, 255, -+ 292, 340, 409, 511, -+ 584, 682, 818, 1023, -+ 1169, 1364, 1637, 2047, -+ 2340, 2730, 3276, 4095, -+ 4680, 5460, 6552, 8191, -+}; - --static const unsigned char bin_tab[60] = { -- 32,33,34,35,36,36,37,37,38,38,39,39, -- 40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43, -- 44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45, -- 46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47, -+static const uint8_t small_cnt_tab[][3] = { -+ { 30, 30, 30 }, -+ { 31, 15, 15 }, -+ { 20, 10, 10 }, -+ { 31, 15, 7 }, -+ { 25, 12, 6 }, -+ { 21, 10, 5 }, -+ { 18, 8, 4 }, -+ { 31, 15, 7 }, -+ { 28, 14, 6 }, - }; - --static int bin_index(size_t x) --{ -- x = x / SIZE_ALIGN - 1; -- if (x <= 32) return x; -- if (x < 512) return bin_tab[x/8-4]; -- if (x > 0x1c00) return 63; -- return bin_tab[x/128-4] + 16; --} -+static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 }; - --static int bin_index_up(size_t x) --{ -- x = x / SIZE_ALIGN - 1; -- if (x <= 32) return x; -- x--; -- if (x < 512) return bin_tab[x/8-4] + 1; -- return bin_tab[x/128-4] + 17; --} -+struct malloc_context ctx = { 0 }; - --#if 0 --void __dump_heap(int x) -+struct meta *alloc_meta(void) - { -- struct chunk *c; -- int i; -- for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c)) -- fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n", -- c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)), -- c->csize & 15, -- NEXT_CHUNK(c)->psize & 15); -- for (i=0; i<64; i++) { -- if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) { -- fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head); -- if (!(mal.binmap & 1ULL<<i)) -- fprintf(stderr, "missing from binmap!\n"); -- } else if (mal.binmap & 1ULL<<i) -- fprintf(stderr, "binmap wrongly contains %d!\n", i); -- } --} -+ struct meta *m; -+ unsigned char *p; -+ if (!ctx.init_done) { -+#ifndef PAGESIZE -+ ctx.pagesize = get_page_size(); - #endif -- --static struct chunk *expand_heap(size_t n) --{ -- static int heap_lock[2]; -- static void *end; -- void *p; -- struct chunk *w; -- -- /* The argument n already accounts for the caller's chunk -- * overhead needs, but if the heap can't be extended in-place, -- * we need room for an extra zero-sized sentinel chunk. */ -- n += SIZE_ALIGN; -- -- lock(heap_lock); -- -- p = __expand_heap(&n); -- if (!p) { -- unlock(heap_lock); -- return 0; -+ ctx.secret = get_random_secret(); -+ ctx.init_done = 1; - } -- -- /* If not just expanding existing space, we need to make a -- * new sentinel chunk below the allocated space. */ -- if (p != end) { -- /* Valid/safe because of the prologue increment. */ -- n -= SIZE_ALIGN; -- p = (char *)p + SIZE_ALIGN; -- w = MEM_TO_CHUNK(p); -- w->psize = 0 | C_INUSE; -+ size_t pagesize = PGSZ; -+ if (pagesize < 4096) pagesize = 4096; -+ if ((m = dequeue_head(&ctx.free_meta_head))) return m; -+ if (!ctx.avail_meta_count) { -+ int need_unprotect = 1; -+ if (!ctx.avail_meta_area_count && ctx.brk!=-1) { -+ uintptr_t new = ctx.brk + pagesize; -+ int need_guard = 0; -+ if (!ctx.brk) { -+ need_guard = 1; -+ ctx.brk = brk(0); -+ // some ancient kernels returned _ebss -+ // instead of next page as initial brk. -+ ctx.brk += -ctx.brk & (pagesize-1); -+ new = ctx.brk + 2*pagesize; -+ } -+ if (brk(new) != new) { -+ ctx.brk = -1; -+ } else { -+ if (need_guard) mmap((void *)ctx.brk, pagesize, -+ PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0); -+ ctx.brk = new; -+ ctx.avail_meta_areas = (void *)(new - pagesize); -+ ctx.avail_meta_area_count = pagesize>>12; -+ need_unprotect = 0; -+ } -+ } -+ if (!ctx.avail_meta_area_count) { -+ size_t n = 2UL << ctx.meta_alloc_shift; -+ p = mmap(0, n*pagesize, PROT_NONE, -+ MAP_PRIVATE|MAP_ANON, -1, 0); -+ if (p==MAP_FAILED) return 0; -+ ctx.avail_meta_areas = p + pagesize; -+ ctx.avail_meta_area_count = (n-1)*(pagesize>>12); -+ ctx.meta_alloc_shift++; -+ } -+ p = ctx.avail_meta_areas; -+ if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0; -+ if (need_unprotect) -+ if (mprotect(p, pagesize, PROT_READ|PROT_WRITE) -+ && errno != ENOSYS) -+ return 0; -+ ctx.avail_meta_area_count--; -+ ctx.avail_meta_areas = p + 4096; -+ if (ctx.meta_area_tail) { -+ ctx.meta_area_tail->next = (void *)p; -+ } else { -+ ctx.meta_area_head = (void *)p; -+ } -+ ctx.meta_area_tail = (void *)p; -+ ctx.meta_area_tail->check = ctx.secret; -+ ctx.avail_meta_count = ctx.meta_area_tail->nslots -+ = (4096-sizeof(struct meta_area))/sizeof *m; -+ ctx.avail_meta = ctx.meta_area_tail->slots; - } -- -- /* Record new heap end and fill in footer. */ -- end = (char *)p + n; -- w = MEM_TO_CHUNK(end); -- w->psize = n | C_INUSE; -- w->csize = 0 | C_INUSE; -- -- /* Fill in header, which may be new or may be replacing a -- * zero-size sentinel header at the old end-of-heap. */ -- w = MEM_TO_CHUNK(p); -- w->csize = n | C_INUSE; -- -- unlock(heap_lock); -- -- return w; -+ ctx.avail_meta_count--; -+ m = ctx.avail_meta++; -+ m->prev = m->next = 0; -+ return m; - } - --static int adjust_size(size_t *n) -+static uint32_t try_avail(struct meta **pm) - { -- /* Result of pointer difference must fit in ptrdiff_t. */ -- if (*n-1 > PTRDIFF_MAX - SIZE_ALIGN - PAGE_SIZE) { -- if (*n) { -- errno = ENOMEM; -- return -1; -+ struct meta *m = *pm; -+ uint32_t first; -+ if (!m) return 0; -+ uint32_t mask = m->avail_mask; -+ if (!mask) { -+ if (!m) return 0; -+ if (!m->freed_mask) { -+ dequeue(pm, m); -+ m = *pm; -+ if (!m) return 0; - } else { -- *n = SIZE_ALIGN; -- return 0; -+ m = m->next; -+ *pm = m; - } -- } -- *n = (*n + OVERHEAD + SIZE_ALIGN - 1) & SIZE_MASK; -- return 0; --} - --static void unbin(struct chunk *c, int i) --{ -- if (c->prev == c->next) -- a_and_64(&mal.binmap, ~(1ULL<<i)); -- c->prev->next = c->next; -- c->next->prev = c->prev; -- c->csize |= C_INUSE; -- NEXT_CHUNK(c)->psize |= C_INUSE; --} -+ mask = m->freed_mask; - --static int alloc_fwd(struct chunk *c) --{ -- int i; -- size_t k; -- while (!((k=c->csize) & C_INUSE)) { -- i = bin_index(k); -- lock_bin(i); -- if (c->csize == k) { -- unbin(c, i); -- unlock_bin(i); -- return 1; -+ // skip fully-free group unless it's the only one -+ // or it's a permanently non-freeable group -+ if (mask == (2u<<m->last_idx)-1 && m->freeable) { -+ m = m->next; -+ *pm = m; -+ mask = m->freed_mask; - } -- unlock_bin(i); -- } -- return 0; --} - --static int alloc_rev(struct chunk *c) --{ -- int i; -- size_t k; -- while (!((k=c->psize) & C_INUSE)) { -- i = bin_index(k); -- lock_bin(i); -- if (c->psize == k) { -- unbin(PREV_CHUNK(c), i); -- unlock_bin(i); -- return 1; -+ // activate more slots in a not-fully-active group -+ // if needed, but only as a last resort. prefer using -+ // any other group with free slots. this avoids -+ // touching & dirtying as-yet-unused pages. -+ if (!(mask & ((2u<<m->mem->active_idx)-1))) { -+ if (m->next != m) { -+ m = m->next; -+ *pm = m; -+ } else { -+ int cnt = m->mem->active_idx + 2; -+ int size = size_classes[m->sizeclass]*UNIT; -+ int span = UNIT + size*cnt; -+ // activate up to next 4k boundary -+ while ((span^(span+size-1)) < 4096) { -+ cnt++; -+ span += size; -+ } -+ if (cnt > m->last_idx+1) -+ cnt = m->last_idx+1; -+ m->mem->active_idx = cnt-1; -+ } - } -- unlock_bin(i); -+ mask = activate_group(m); -+ assert(mask); -+ decay_bounces(m->sizeclass); - } -- return 0; -+ first = mask&-mask; -+ m->avail_mask = mask-first; -+ return first; - } - -+static int alloc_slot(int, size_t); - --/* pretrim - trims a chunk _prior_ to removing it from its bin. -- * Must be called with i as the ideal bin for size n, j the bin -- * for the _free_ chunk self, and bin j locked. */ --static int pretrim(struct chunk *self, size_t n, int i, int j) -+static struct meta *alloc_group(int sc, size_t req) - { -- size_t n1; -- struct chunk *next, *split; -- -- /* We cannot pretrim if it would require re-binning. */ -- if (j < 40) return 0; -- if (j < i+3) { -- if (j != 63) return 0; -- n1 = CHUNK_SIZE(self); -- if (n1-n <= MMAP_THRESHOLD) return 0; -+ size_t size = UNIT*size_classes[sc]; -+ int i = 0, cnt; -+ unsigned char *p; -+ struct meta *m = alloc_meta(); -+ if (!m) return 0; -+ size_t usage = ctx.usage_by_class[sc]; -+ size_t pagesize = PGSZ; -+ int active_idx; -+ if (sc < 9) { -+ while (i<2 && 4*small_cnt_tab[sc][i] > usage) -+ i++; -+ cnt = small_cnt_tab[sc][i]; - } else { -- n1 = CHUNK_SIZE(self); -+ // lookup max number of slots fitting in power-of-two size -+ // from a table, along with number of factors of two we -+ // can divide out without a remainder or reaching 1. -+ cnt = med_cnt_tab[sc&3]; -+ -+ // reduce cnt to avoid excessive eagar allocation. -+ while (!(cnt&1) && 4*cnt > usage) -+ cnt >>= 1; -+ -+ // data structures don't support groups whose slot offsets -+ // in units don't fit in 16 bits. -+ while (size*cnt >= 65536*UNIT) -+ cnt >>= 1; - } -- if (bin_index(n1-n) != j) return 0; -- -- next = NEXT_CHUNK(self); -- split = (void *)((char *)self + n); -- -- split->prev = self->prev; -- split->next = self->next; -- split->prev->next = split; -- split->next->prev = split; -- split->psize = n | C_INUSE; -- split->csize = n1-n; -- next->psize = n1-n; -- self->csize = n | C_INUSE; -- return 1; --} - --static void trim(struct chunk *self, size_t n) --{ -- size_t n1 = CHUNK_SIZE(self); -- struct chunk *next, *split; -+ // If we selected a count of 1 above but it's not sufficient to use -+ // mmap, increase to 2. Then it might be; if not it will nest. -+ if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2; -+ -+ // All choices of size*cnt are "just below" a power of two, so anything -+ // larger than half the page size should be allocated as whole pages. -+ if (size*cnt+UNIT > pagesize/2) { -+ // check/update bounce counter to start/increase retention -+ // of freed maps, and inhibit use of low-count, odd-size -+ // small mappings and single-slot groups if activated. -+ int nosmall = is_bouncing(sc); -+ account_bounce(sc); -+ step_seq(); -+ -+ // since the following count reduction opportunities have -+ // an absolute memory usage cost, don't overdo them. count -+ // coarse usage as part of usage. -+ if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1]; -+ -+ // try to drop to a lower count if the one found above -+ // increases usage by more than 25%. these reduced counts -+ // roughly fill an integral number of pages, just not a -+ // power of two, limiting amount of unusable space. -+ if (4*cnt > usage && !nosmall) { -+ if (0); -+ else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2; -+ else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3; -+ else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3; -+ else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5; -+ } -+ size_t needed = size*cnt + UNIT; -+ needed += -needed & (pagesize-1); -+ -+ // produce an individually-mmapped allocation if usage is low, -+ // bounce counter hasn't triggered, and either it saves memory -+ // or it avoids eagar slot allocation without wasting too much. -+ if (!nosmall && cnt<=7) { -+ req += 4 + UNIT; -+ req += -req & (pagesize-1); -+ if (req<size+UNIT || (req>=4*pagesize && 2*cnt>usage)) { -+ cnt = 1; -+ needed = req; -+ } -+ } - -- if (n >= n1 - DONTCARE) return; -+ p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); -+ if (p==MAP_FAILED) { -+ free_meta(m); -+ return 0; -+ } -+ m->maplen = needed>>12; -+ ctx.mmap_counter++; -+ active_idx = (4096-UNIT)/size-1; -+ if (active_idx > cnt-1) active_idx = cnt-1; -+ if (active_idx < 0) active_idx = 0; -+ } else { -+ int j = size_to_class(UNIT+cnt*size-4); -+ int idx = alloc_slot(j, UNIT+cnt*size-4); -+ if (idx < 0) { -+ free_meta(m); -+ return 0; -+ } -+ struct meta *g = ctx.active[j]; -+ p = enframe(g, idx, UNIT*size_classes[j]-4, ctx.mmap_counter); -+ m->maplen = 0; -+ p[-3] = (p[-3]&31) | (6<<5); -+ for (int i=0; i<=cnt; i++) -+ p[UNIT+i*size-4] = 0; -+ active_idx = cnt-1; -+ } -+ ctx.usage_by_class[sc] += cnt; -+ m->avail_mask = (2u<<active_idx)-1; -+ m->freed_mask = (2u<<(cnt-1))-1 - m->avail_mask; -+ m->mem = (void *)p; -+ m->mem->meta = m; -+ m->mem->active_idx = active_idx; -+ m->last_idx = cnt-1; -+ m->freeable = 1; -+ m->sizeclass = sc; -+ return m; -+} - -- next = NEXT_CHUNK(self); -- split = (void *)((char *)self + n); -+static int alloc_slot(int sc, size_t req) -+{ -+ uint32_t first = try_avail(&ctx.active[sc]); -+ if (first) return a_ctz_32(first); - -- split->psize = n | C_INUSE; -- split->csize = n1-n | C_INUSE; -- next->psize = n1-n | C_INUSE; -- self->csize = n | C_INUSE; -+ struct meta *g = alloc_group(sc, req); -+ if (!g) return -1; - -- __bin_chunk(split); -+ g->avail_mask--; -+ queue(&ctx.active[sc], g); -+ return 0; - } - - void *malloc(size_t n) - { -- struct chunk *c; -- int i, j; -- -- if (adjust_size(&n) < 0) return 0; -- -- if (n > MMAP_THRESHOLD) { -- size_t len = n + OVERHEAD + PAGE_SIZE - 1 & -PAGE_SIZE; -- char *base = __mmap(0, len, PROT_READ|PROT_WRITE, -- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); -- if (base == (void *)-1) return 0; -- c = (void *)(base + SIZE_ALIGN - OVERHEAD); -- c->csize = len - (SIZE_ALIGN - OVERHEAD); -- c->psize = SIZE_ALIGN - OVERHEAD; -- return CHUNK_TO_MEM(c); -- } -- -- i = bin_index_up(n); -- for (;;) { -- uint64_t mask = mal.binmap & -(1ULL<<i); -- if (!mask) { -- c = expand_heap(n); -- if (!c) return 0; -- if (alloc_rev(c)) { -- struct chunk *x = c; -- c = PREV_CHUNK(c); -- NEXT_CHUNK(x)->psize = c->csize = -- x->csize + CHUNK_SIZE(c); -- } -- break; -- } -- j = first_set(mask); -- lock_bin(j); -- c = mal.bins[j].head; -- if (c != BIN_TO_CHUNK(j)) { -- if (!pretrim(c, n, i, j)) unbin(c, j); -- unlock_bin(j); -- break; -+ if (size_overflows(n)) return 0; -+ struct meta *g; -+ uint32_t mask, first; -+ int sc; -+ int idx; -+ int ctr; -+ -+ if (n >= MMAP_THRESHOLD) { -+ size_t needed = n + 4 + UNIT; -+ void *p = mmap(0, needed, PROT_READ|PROT_WRITE, -+ MAP_PRIVATE|MAP_ANON, -1, 0); -+ if (p==MAP_FAILED) return 0; -+ wrlock(); -+ step_seq(); -+ g = alloc_meta(); -+ if (!g) { -+ unlock(); -+ munmap(p, needed); -+ return 0; - } -- unlock_bin(j); -+ g->mem = p; -+ g->mem->meta = g; -+ g->last_idx = 0; -+ g->freeable = 1; -+ g->sizeclass = 63; -+ g->maplen = (needed+4095)/4096; -+ g->avail_mask = g->freed_mask = 0; -+ // use a global counter to cycle offset in -+ // individually-mmapped allocations. -+ ctx.mmap_counter++; -+ idx = 0; -+ goto success; - } - -- /* Now patch up in case we over-allocated */ -- trim(c, n); -- -- return CHUNK_TO_MEM(c); --} -+ sc = size_to_class(n); -+ -+ rdlock(); -+ g = ctx.active[sc]; -+ -+ // use coarse size classes initially when there are not yet -+ // any groups of desired size. this allows counts of 2 or 3 -+ // to be allocated at first rather than having to start with -+ // 7 or 5, the min counts for even size classes. -+ if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) { -+ size_t usage = ctx.usage_by_class[sc|1]; -+ // if a new group may be allocated, count it toward -+ // usage in deciding if we can use coarse class. -+ if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask -+ && !ctx.active[sc|1]->freed_mask)) -+ usage += 3; -+ if (usage <= 12) -+ sc |= 1; -+ g = ctx.active[sc]; -+ } - --static size_t mal0_clear(char *p, size_t pagesz, size_t n) --{ --#ifdef __GNUC__ -- typedef uint64_t __attribute__((__may_alias__)) T; --#else -- typedef unsigned char T; --#endif -- char *pp = p + n; -- size_t i = (uintptr_t)pp & (pagesz - 1); - for (;;) { -- pp = memset(pp - i, 0, i); -- if (pp - p < pagesz) return pp - p; -- for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T)) -- if (((T *)pp)[-1] | ((T *)pp)[-2]) -- break; -+ mask = g ? g->avail_mask : 0; -+ first = mask&-mask; -+ if (!first) break; -+ if (RDLOCK_IS_EXCLUSIVE || !MT) -+ g->avail_mask = mask-first; -+ else if (a_cas(&g->avail_mask, mask, mask-first)!=mask) -+ continue; -+ idx = a_ctz_32(first); -+ goto success; -+ } -+ upgradelock(); -+ -+ idx = alloc_slot(sc, n); -+ if (idx < 0) { -+ unlock(); -+ return 0; - } -+ g = ctx.active[sc]; -+ -+success: -+ ctr = ctx.mmap_counter; -+ unlock(); -+ return enframe(g, idx, n, ctr); - } - - void *calloc(size_t m, size_t n) -@@ -357,192 +388,5 @@ void *calloc(size_t m, size_t n) - n *= m; - void *p = malloc(n); - if (!p) return p; -- if (!__malloc_replaced) { -- if (IS_MMAPPED(MEM_TO_CHUNK(p))) -- return p; -- if (n >= PAGE_SIZE) -- n = mal0_clear(p, PAGE_SIZE, n); -- } -- return memset(p, 0, n); --} -- --void *realloc(void *p, size_t n) --{ -- struct chunk *self, *next; -- size_t n0, n1; -- void *new; -- -- if (!p) return malloc(n); -- -- if (adjust_size(&n) < 0) return 0; -- -- self = MEM_TO_CHUNK(p); -- n1 = n0 = CHUNK_SIZE(self); -- -- if (IS_MMAPPED(self)) { -- size_t extra = self->psize; -- char *base = (char *)self - extra; -- size_t oldlen = n0 + extra; -- size_t newlen = n + extra; -- /* Crash on realloc of freed chunk */ -- if (extra & 1) a_crash(); -- if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) { -- n0 = n; -- goto copy_free_ret; -- } -- newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE; -- if (oldlen == newlen) return p; -- base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE); -- if (base == (void *)-1) -- goto copy_realloc; -- self = (void *)(base + extra); -- self->csize = newlen - extra; -- return CHUNK_TO_MEM(self); -- } -- -- next = NEXT_CHUNK(self); -- -- /* Crash on corrupted footer (likely from buffer overflow) */ -- if (next->psize != self->csize) a_crash(); -- -- /* Merge adjacent chunks if we need more space. This is not -- * a waste of time even if we fail to get enough space, because our -- * subsequent call to free would otherwise have to do the merge. */ -- if (n > n1 && alloc_fwd(next)) { -- n1 += CHUNK_SIZE(next); -- next = NEXT_CHUNK(next); -- } -- /* FIXME: find what's wrong here and reenable it..? */ -- if (0 && n > n1 && alloc_rev(self)) { -- self = PREV_CHUNK(self); -- n1 += CHUNK_SIZE(self); -- } -- self->csize = n1 | C_INUSE; -- next->psize = n1 | C_INUSE; -- -- /* If we got enough space, split off the excess and return */ -- if (n <= n1) { -- //memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD); -- trim(self, n); -- return CHUNK_TO_MEM(self); -- } -- --copy_realloc: -- /* As a last resort, allocate a new chunk and copy to it. */ -- new = malloc(n-OVERHEAD); -- if (!new) return 0; --copy_free_ret: -- memcpy(new, p, n0-OVERHEAD); -- free(CHUNK_TO_MEM(self)); -- return new; --} -- --void __bin_chunk(struct chunk *self) --{ -- struct chunk *next = NEXT_CHUNK(self); -- size_t final_size, new_size, size; -- int reclaim=0; -- int i; -- -- final_size = new_size = CHUNK_SIZE(self); -- -- /* Crash on corrupted footer (likely from buffer overflow) */ -- if (next->psize != self->csize) a_crash(); -- -- for (;;) { -- if (self->psize & next->csize & C_INUSE) { -- self->csize = final_size | C_INUSE; -- next->psize = final_size | C_INUSE; -- i = bin_index(final_size); -- lock_bin(i); -- lock(mal.free_lock); -- if (self->psize & next->csize & C_INUSE) -- break; -- unlock(mal.free_lock); -- unlock_bin(i); -- } -- -- if (alloc_rev(self)) { -- self = PREV_CHUNK(self); -- size = CHUNK_SIZE(self); -- final_size += size; -- if (new_size+size > RECLAIM && (new_size+size^size) > size) -- reclaim = 1; -- } -- -- if (alloc_fwd(next)) { -- size = CHUNK_SIZE(next); -- final_size += size; -- if (new_size+size > RECLAIM && (new_size+size^size) > size) -- reclaim = 1; -- next = NEXT_CHUNK(next); -- } -- } -- -- if (!(mal.binmap & 1ULL<<i)) -- a_or_64(&mal.binmap, 1ULL<<i); -- -- self->csize = final_size; -- next->psize = final_size; -- unlock(mal.free_lock); -- -- self->next = BIN_TO_CHUNK(i); -- self->prev = mal.bins[i].tail; -- self->next->prev = self; -- self->prev->next = self; -- -- /* Replace middle of large chunks with fresh zero pages */ -- if (reclaim) { -- uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE; -- uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE; --#if 1 -- __madvise((void *)a, b-a, MADV_DONTNEED); --#else -- __mmap((void *)a, b-a, PROT_READ|PROT_WRITE, -- MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0); --#endif -- } -- -- unlock_bin(i); --} -- --static void unmap_chunk(struct chunk *self) --{ -- size_t extra = self->psize; -- char *base = (char *)self - extra; -- size_t len = CHUNK_SIZE(self) + extra; -- /* Crash on double free */ -- if (extra & 1) a_crash(); -- __munmap(base, len); --} -- --void free(void *p) --{ -- if (!p) return; -- -- struct chunk *self = MEM_TO_CHUNK(p); -- -- if (IS_MMAPPED(self)) -- unmap_chunk(self); -- else -- __bin_chunk(self); --} -- --void __malloc_donate(char *start, char *end) --{ -- size_t align_start_up = (SIZE_ALIGN-1) & (-(uintptr_t)start - OVERHEAD); -- size_t align_end_down = (SIZE_ALIGN-1) & (uintptr_t)end; -- -- /* Getting past this condition ensures that the padding for alignment -- * and header overhead will not overflow and will leave a nonzero -- * multiple of SIZE_ALIGN bytes between start and end. */ -- if (end - start <= OVERHEAD + align_start_up + align_end_down) -- return; -- start += align_start_up + OVERHEAD; -- end -= align_end_down; -- -- struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end); -- c->psize = n->csize = C_INUSE; -- c->csize = n->psize = C_INUSE | (end-start); -- __bin_chunk(c); -+ return n >= MMAP_THRESHOLD ? p : memset(p, 0, n); - } -diff --git a/src/malloc/malloc_usable_size.c b/src/malloc/malloc_usable_size.c -index 672b518a..bdfc9918 100644 ---- a/src/malloc/malloc_usable_size.c -+++ b/src/malloc/malloc_usable_size.c -@@ -1,9 +1,12 @@ --#include <malloc.h> --#include "malloc_impl.h" -- --hidden void *(*const __realloc_dep)(void *, size_t) = realloc; -+#include <stdlib.h> -+#include "meta.h" - - size_t malloc_usable_size(void *p) - { -- return p ? CHUNK_SIZE(MEM_TO_CHUNK(p)) - OVERHEAD : 0; -+ struct meta *g = get_meta(p); -+ int idx = get_slot_index(p); -+ size_t stride = get_stride(g); -+ unsigned char *start = g->mem->storage + stride*idx; -+ unsigned char *end = start + stride - 4; -+ return get_nominal_size(p, end); - } -diff --git a/src/malloc/memalign.c b/src/malloc/memalign.c -index cf9dfbda..f78ab00f 100644 ---- a/src/malloc/memalign.c -+++ b/src/malloc/memalign.c -@@ -1,54 +1,6 @@ - #include <stdlib.h> --#include <stdint.h> --#include <errno.h> --#include "malloc_impl.h" - --void *__memalign(size_t align, size_t len) -+void *memalign(size_t align, size_t len) - { -- unsigned char *mem, *new; -- -- if ((align & -align) != align) { -- errno = EINVAL; -- return 0; -- } -- -- if (len > SIZE_MAX - align || __malloc_replaced) { -- errno = ENOMEM; -- return 0; -- } -- -- if (align <= SIZE_ALIGN) -- return malloc(len); -- -- if (!(mem = malloc(len + align-1))) -- return 0; -- -- new = (void *)((uintptr_t)mem + align-1 & -align); -- if (new == mem) return mem; -- -- struct chunk *c = MEM_TO_CHUNK(mem); -- struct chunk *n = MEM_TO_CHUNK(new); -- -- if (IS_MMAPPED(c)) { -- /* Apply difference between aligned and original -- * address to the "extra" field of mmapped chunk. */ -- n->psize = c->psize + (new-mem); -- n->csize = c->csize - (new-mem); -- return new; -- } -- -- struct chunk *t = NEXT_CHUNK(c); -- -- /* Split the allocated chunk into two chunks. The aligned part -- * that will be used has the size in its footer reduced by the -- * difference between the aligned and original addresses, and -- * the resulting size copied to its header. A new header and -- * footer are written for the split-off part to be freed. */ -- n->psize = c->csize = C_INUSE | (new-mem); -- n->csize = t->psize -= new-mem; -- -- __bin_chunk(c); -- return new; -+ return aligned_alloc(align, len); - } -- --weak_alias(__memalign, memalign); -diff --git a/src/malloc/meta.h b/src/malloc/meta.h -new file mode 100644 -index 00000000..cefb2b32 ---- /dev/null -+++ b/src/malloc/meta.h -@@ -0,0 +1,284 @@ -+#ifndef MALLOC_META_H -+#define MALLOC_META_H -+ -+#include <stdint.h> -+#include <errno.h> -+#include <limits.h> -+#include "glue.h" -+ -+__attribute__((__visibility__("hidden"))) -+extern const uint16_t size_classes[]; -+ -+#define MMAP_THRESHOLD 131052 -+ -+#define UNIT 16 -+ -+struct group { -+ struct meta *meta; -+ unsigned char active_idx:5; -+ char pad[UNIT - sizeof(struct meta *) - 1]; -+ unsigned char storage[]; -+}; -+ -+struct meta { -+ struct meta *prev, *next; -+ struct group *mem; -+ volatile int avail_mask, freed_mask; -+ uintptr_t last_idx:5; -+ uintptr_t freeable:1; -+ uintptr_t sizeclass:6; -+ uintptr_t maplen:8*sizeof(uintptr_t)-12; -+}; -+ -+struct meta_area { -+ uint64_t check; -+ struct meta_area *next; -+ int nslots; -+ struct meta slots[]; -+}; -+ -+struct malloc_context { -+ uint64_t secret; -+#ifndef PAGESIZE -+ size_t pagesize; -+#endif -+ int init_done; -+ unsigned mmap_counter; -+ struct meta *free_meta_head; -+ struct meta *avail_meta; -+ size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift; -+ struct meta_area *meta_area_head, *meta_area_tail; -+ unsigned char *avail_meta_areas; -+ struct meta *active[48]; -+ size_t usage_by_class[48]; -+ uint8_t unmap_seq[32], bounces[32]; -+ uint8_t seq; -+ uintptr_t brk; -+}; -+ -+__attribute__((__visibility__("hidden"))) -+extern struct malloc_context ctx; -+ -+#ifdef PAGESIZE -+#define PGSZ PAGESIZE -+#else -+#define PGSZ ctx.pagesize -+#endif -+ -+__attribute__((__visibility__("hidden"))) -+struct meta *alloc_meta(void); -+ -+static inline void queue(struct meta **phead, struct meta *m) -+{ -+ assert(!m->next); -+ assert(!m->prev); -+ if (*phead) { -+ struct meta *head = *phead; -+ m->next = head; -+ m->prev = head->prev; -+ m->next->prev = m->prev->next = m; -+ } else { -+ m->prev = m->next = m; -+ *phead = m; -+ } -+} -+ -+static inline void dequeue(struct meta **phead, struct meta *m) -+{ -+ if (m->next != m) { -+ m->prev->next = m->next; -+ m->next->prev = m->prev; -+ if (*phead == m) *phead = m->next; -+ } else { -+ *phead = 0; -+ } -+ m->prev = m->next = 0; -+} -+ -+static inline struct meta *dequeue_head(struct meta **phead) -+{ -+ struct meta *m = *phead; -+ if (m) dequeue(phead, m); -+ return m; -+} -+ -+static inline void free_meta(struct meta *m) -+{ -+ *m = (struct meta){0}; -+ queue(&ctx.free_meta_head, m); -+} -+ -+static inline uint32_t activate_group(struct meta *m) -+{ -+ assert(!m->avail_mask); -+ uint32_t mask, act = (2u<<m->mem->active_idx)-1; -+ do mask = m->freed_mask; -+ while (a_cas(&m->freed_mask, mask, mask&~act)!=mask); -+ return m->avail_mask = mask & act; -+} -+ -+static inline int get_slot_index(const unsigned char *p) -+{ -+ return p[-3] & 31; -+} -+ -+static inline struct meta *get_meta(const unsigned char *p) -+{ -+ assert(!((uintptr_t)p & 15)); -+ int offset = *(const uint16_t *)(p - 2); -+ int index = get_slot_index(p); -+ if (p[-4]) { -+ assert(!offset); -+ offset = *(uint32_t *)(p - 8); -+ assert(offset > 0xffff); -+ } -+ const struct group *base = (const void *)(p - UNIT*offset - UNIT); -+ const struct meta *meta = base->meta; -+ assert(meta->mem == base); -+ assert(index <= meta->last_idx); -+ assert(!(meta->avail_mask & (1u<<index))); -+ assert(!(meta->freed_mask & (1u<<index))); -+ const struct meta_area *area = (void *)((uintptr_t)meta & -4096); -+ assert(area->check == ctx.secret); -+ if (meta->sizeclass < 48) { -+ assert(offset >= size_classes[meta->sizeclass]*index); -+ assert(offset < size_classes[meta->sizeclass]*(index+1)); -+ } else { -+ assert(meta->sizeclass == 63); -+ } -+ if (meta->maplen) { -+ assert(offset <= meta->maplen*4096UL/UNIT - 1); -+ } -+ return (struct meta *)meta; -+} -+ -+static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end) -+{ -+ size_t reserved = p[-3] >> 5; -+ if (reserved >= 5) { -+ assert(reserved == 5); -+ reserved = *(const uint32_t *)(end-4); -+ assert(reserved >= 5); -+ assert(!end[-5]); -+ } -+ assert(reserved <= end-p); -+ assert(!*(end-reserved)); -+ // also check the slot's overflow byte -+ assert(!*end); -+ return end-reserved-p; -+} -+ -+static inline size_t get_stride(const struct meta *g) -+{ -+ if (!g->last_idx && g->maplen) { -+ return g->maplen*4096UL - UNIT; -+ } else { -+ return UNIT*size_classes[g->sizeclass]; -+ } -+} -+ -+static inline void set_size(unsigned char *p, unsigned char *end, size_t n) -+{ -+ int reserved = end-p-n; -+ if (reserved) end[-reserved] = 0; -+ if (reserved >= 5) { -+ *(uint32_t *)(end-4) = reserved; -+ end[-5] = 0; -+ reserved = 5; -+ } -+ p[-3] = (p[-3]&31) + (reserved<<5); -+} -+ -+static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) -+{ -+ size_t stride = get_stride(g); -+ size_t slack = (stride-4-n)/UNIT; -+ unsigned char *p = g->mem->storage + stride*idx; -+ unsigned char *end = p+stride-4; -+ // cycle offset within slot to increase interval to address -+ // reuse, facilitate trapping double-free. -+ int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255; -+ assert(!p[-4]); -+ if (off > slack) { -+ size_t m = slack; -+ m |= m>>1; m |= m>>2; m |= m>>4; -+ off &= m; -+ if (off > slack) off -= slack+1; -+ assert(off <= slack); -+ } -+ if (off) { -+ // store offset in unused header at offset zero -+ // if enframing at non-zero offset. -+ *(uint16_t *)(p-2) = off; -+ p[-3] = 7<<5; -+ p += UNIT*off; -+ // for nonzero offset there is no permanent check -+ // byte, so make one. -+ p[-4] = 0; -+ } -+ *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT; -+ p[-3] = idx; -+ set_size(p, end, n); -+ return p; -+} -+ -+static inline int size_to_class(size_t n) -+{ -+ n = (n+3)>>4; -+ if (n<10) return n; -+ n++; -+ int i = (28-a_clz_32(n))*4 + 8; -+ if (n>size_classes[i+1]) i+=2; -+ if (n>size_classes[i]) i++; -+ return i; -+} -+ -+static inline int size_overflows(size_t n) -+{ -+ if (n >= SIZE_MAX/2 - 4096) { -+ errno = ENOMEM; -+ return 1; -+ } -+ return 0; -+} -+ -+static inline void step_seq(void) -+{ -+ if (ctx.seq==255) { -+ for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0; -+ ctx.seq = 1; -+ } else { -+ ctx.seq++; -+ } -+} -+ -+static inline void record_seq(int sc) -+{ -+ if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq; -+} -+ -+static inline void account_bounce(int sc) -+{ -+ if (sc-7U < 32) { -+ int seq = ctx.unmap_seq[sc-7]; -+ if (seq && ctx.seq-seq < 10) { -+ if (ctx.bounces[sc-7]+1 < 100) -+ ctx.bounces[sc-7]++; -+ else -+ ctx.bounces[sc-7] = 150; -+ } -+ } -+} -+ -+static inline void decay_bounces(int sc) -+{ -+ if (sc-7U < 32 && ctx.bounces[sc-7]) -+ ctx.bounces[sc-7]--; -+} -+ -+static inline int is_bouncing(int sc) -+{ -+ return (sc-7U < 32 && ctx.bounces[sc-7] >= 100); -+} -+ -+#endif -diff --git a/src/malloc/posix_memalign.c b/src/malloc/posix_memalign.c -index 2ea8bd8a..ad4d8f47 100644 ---- a/src/malloc/posix_memalign.c -+++ b/src/malloc/posix_memalign.c -@@ -1,11 +1,10 @@ - #include <stdlib.h> - #include <errno.h> --#include "malloc_impl.h" - - int posix_memalign(void **res, size_t align, size_t len) - { - if (align < sizeof(void *)) return EINVAL; -- void *mem = __memalign(align, len); -+ void *mem = aligned_alloc(align, len); - if (!mem) return errno; - *res = mem; - return 0; -diff --git a/src/malloc/realloc.c b/src/malloc/realloc.c -new file mode 100644 -index 00000000..bc7f5e3b ---- /dev/null -+++ b/src/malloc/realloc.c -@@ -0,0 +1,51 @@ -+#define _GNU_SOURCE -+#include <stdlib.h> -+#include <sys/mman.h> -+#include <string.h> -+#include "meta.h" -+ -+void *realloc(void *p, size_t n) -+{ -+ if (!p) return malloc(n); -+ if (size_overflows(n)) return 0; -+ -+ struct meta *g = get_meta(p); -+ int idx = get_slot_index(p); -+ size_t stride = get_stride(g); -+ unsigned char *start = g->mem->storage + stride*idx; -+ unsigned char *end = start + stride - 4; -+ size_t old_size = get_nominal_size(p, end); -+ size_t avail_size = end-(unsigned char *)p; -+ void *new; -+ -+ // only resize in-place if size class matches -+ if (n <= avail_size && n<MMAP_THRESHOLD -+ && size_to_class(n)+1 >= g->sizeclass) { -+ set_size(p, end, n); -+ return p; -+ } -+ -+ // use mremap if old and new size are both mmap-worthy -+ if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) { -+ assert(g->sizeclass==63); -+ size_t base = (unsigned char *)p-start; -+ size_t needed = (n + base + UNIT + 4 + 4095) & -4096; -+ new = g->maplen*4096UL == needed ? g->mem : -+ mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE); -+ if (new!=MAP_FAILED) { -+ g->mem = new; -+ g->maplen = needed/4096; -+ p = g->mem->storage + base; -+ end = g->mem->storage + (needed - UNIT) - 4; -+ *end = 0; -+ set_size(p, end, n); -+ return p; -+ } -+ } -+ -+ new = malloc(n); -+ if (!new) return 0; -+ memcpy(new, p, n < old_size ? n : old_size); -+ free(p); -+ return new; -+} -diff --git a/src/malloc/replaced.c b/src/malloc/replaced.c -new file mode 100644 -index 00000000..1403a271 ---- /dev/null -+++ b/src/malloc/replaced.c -@@ -0,0 +1,3 @@ -+#include "meta.h" -+ -+int __malloc_replaced; --- -2.27.0 - diff --git a/pkg/musl/ver b/pkg/musl/ver @@ -1 +1 @@ -1.2.0 r1 +1.2.0-79-g0a005f49 r1 diff --git a/pkg/musl/x86_64.lua b/pkg/musl/x86_64.lua @@ -1,6 +1,6 @@ return { srcs={ - -- <cd src && printf "\t\t'%s',\n" src/*/x86_64/*.[sS] + -- <cd src && printf "\t\t'%s',\n" src/*/x86_64/*.[csS] 'src/fenv/x86_64/fenv.s', 'src/ldso/x86_64/dlsym.s', 'src/ldso/x86_64/tlsdesc.s', @@ -13,26 +13,29 @@ return { 'src/math/x86_64/exp2l.s', 'src/math/x86_64/expl.s', 'src/math/x86_64/expm1l.s', - 'src/math/x86_64/fabs.s', - 'src/math/x86_64/fabsf.s', - 'src/math/x86_64/fabsl.s', + 'src/math/x86_64/fabs.c', + 'src/math/x86_64/fabsf.c', + 'src/math/x86_64/fabsl.c', 'src/math/x86_64/floorl.s', - 'src/math/x86_64/fmodl.s', - 'src/math/x86_64/llrint.s', - 'src/math/x86_64/llrintf.s', - 'src/math/x86_64/llrintl.s', + 'src/math/x86_64/fma.c', + 'src/math/x86_64/fmaf.c', + 'src/math/x86_64/fmodl.c', + 'src/math/x86_64/llrint.c', + 'src/math/x86_64/llrintf.c', + 'src/math/x86_64/llrintl.c', 'src/math/x86_64/log10l.s', 'src/math/x86_64/log1pl.s', 'src/math/x86_64/log2l.s', 'src/math/x86_64/logl.s', - 'src/math/x86_64/lrint.s', - 'src/math/x86_64/lrintf.s', - 'src/math/x86_64/lrintl.s', - 'src/math/x86_64/remainderl.s', - 'src/math/x86_64/rintl.s', - 'src/math/x86_64/sqrt.s', - 'src/math/x86_64/sqrtf.s', - 'src/math/x86_64/sqrtl.s', + 'src/math/x86_64/lrint.c', + 'src/math/x86_64/lrintf.c', + 'src/math/x86_64/lrintl.c', + 'src/math/x86_64/remainderl.c', + 'src/math/x86_64/remquol.c', + 'src/math/x86_64/rintl.c', + 'src/math/x86_64/sqrt.c', + 'src/math/x86_64/sqrtf.c', + 'src/math/x86_64/sqrtl.c', 'src/math/x86_64/truncl.s', 'src/process/x86_64/vfork.s', 'src/setjmp/x86_64/longjmp.s',