4 files changed, 0 insertions, 747 deletions
diff --git a/lib/lock_acquire.asm b/lib/lock_acquire.asm
deleted file mode 100644
index f32ba6a..0000000
--- a/lib/lock_acquire.asm
+++ /dev/null
@@ -1,204 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-;       rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-;       rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-;       rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-;       rax, rdx
-; - overwritten by syscalls (all others preserved):
-;       rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_GETTID  186
-%define SYS_FUTEX   202
-
-; Relevant operations for futex
-%define FUTEX_LOCK_PI      6
-%define FUTEX_PRIVATE_FLAG 0x80
-
-; Relevant bits for futex dword
-%define FUTEX_TID_MASK   0x3fffffff
-%define FUTEX_OWNER_DIED 0x40000000
-%define FUTEX_WAITERS    0x80000000
-
-
-; Acquire a lock if possible, or wait until it gets released. Argument:
-;	rdi: struct{u32,u32,u32}* = handle of lock to acquire
-; Returns zero on success, or a standard error code.
-global linen_lock_acquire
-linen_lock_acquire:
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Check validity of argument ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Return EINVAL if rdi is NULL or otherwise invalid
-		mov eax, -22 ; (EINVAL = -22)
-
-		test rdi, rdi
-		jz acquire_return ; rdi is NULL
-
-		; rdi is nonzero, so let's just assume it's a valid pointer;
-		; if that assumption is wrong we'll get a segmentation fault.
-		; But we don't yet trust that [rdi] is a valid lock handle!
-		; To verify this we check the canary value stored at [rdi + 8].
-		mov ecx, [rdi + 8]
-		cmp ecx, 0xCAFEBABE
-		jnz acquire_return
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Check ownership of lock ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Lock owners are identified by their TID; let's find ours.
-		; The gettid system call simply returns our Linux thread ID.
-		; See: man 2 gettid
-
-		; gettid: rax = system call ID
-		mov eax, SYS_GETTID
-		; gettid: rax = gettid()
-		syscall
-
-		; Save a copy of our TID (no need for an error check)
-		mov edx, eax
-
-		; There are four possible ownership situations for the lock,
-		; which we can distinguish based on the dword value at [rdi]:
-		; - Case 1: if [rdi] contains zero, then the lock is available.
-		; - Case 2: if [rdi] has any of its highest 2 bits set, then the
-		;           lock isn't free, and kernel intervention is required.
-		; - Case 3: if the lower 30 bits of [rdi] contain our TID,
-		;           then we already own it (recursive acquisition).
-		; - Case 4: if the lower 30 bits of [rdi] contain another TID
-		;           and the high-bit flags aren't set, then we just wait
-		;           until we can acquire the lock using atomic operations
-		;           or, optionally, a futex call (usually more efficient).
-
-		; Atomically check whether the lock is owned by another thread,
-		; and if not, try to take ownership by writing our TID to [rdi].
-		; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; }
-		xor eax, eax
-		lock cmpxchg [rdi], edx
-		jz acquire_success ; case 1
-
-		; The lock isn't free, so let's check how "clean" its state is.
-		; The following flags are set by the kernel (see futex below):
-		; - FUTEX_OWNER_DIED: the lock's owner died, so it's actually free
-		;                     (but first the kernel needs to clean up)
-		; - FUTEX_WAITERS:    we aren't the only one waiting for this lock
-		;                     (so let's sleep until the kernel wakes us up)
-		; Either way, we need the kernel's help, so jump to the futex call.
-		test eax, (FUTEX_OWNER_DIED | FUTEX_WAITERS)
-		jnz acquire_futex ; case 2
-
-		; It seems someone has the lock, check who: it may already be us.
-		; If so, this is a recursive acquisition, good, let's continue.
-		and eax, FUTEX_TID_MASK
-		cmp eax, edx
-		je acquire_success ; case 3
-
-		; Someone else has the lock, but we're the only one waiting for it.
-		; System calls are expensive, so let's try a short spin loop first,
-		; hoping it'll get released soon. This is arguably unnecessary, as
-		; it's only beneficial when two threads are more or less "in sync",
-		; so in most real-world cases you can delete this with no downside.
-
-		; Loop counter
-		mov ecx, 10
-	acquire_spinloop:
-		; The "pause" instruction is specially designed for loops like this
-		; and conserves power. It causes a small delay (makes sense here).
-		pause
-
-		; Atomically check whether the lock is owned by another thread,
-		; and if not, try to take ownership by writing our TID to [rdi].
-		; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; }
-		xor eax, eax
-		lock cmpxchg [rdi], edx
-		jz acquire_success
-
-		; Decrement loop counter until zero
-		dec ecx
-		jnz acquire_spinloop
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Let the kernel handle it ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-	acquire_futex:
-		; The futex system call waits for the dword at an address (rdi)
-		; changes in a certain way, as described above and in the futex
-		; manual's section on so-called "priority-inheritance futexes".
-		; See: man 2 futex
-
-		; futex: rdi = uaddr: address of the dword to watch
-		; futex: rsi = futex_op: which futex operation we want:
-		; - FUTEX_LOCK_PI:      block until lock's owner uses FUTEX_UNLOCK_PI
-		; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
-		mov esi, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
-		; futex: r10 = timeout: in case we had a deadline (we don't)
-		xor r10, r10
-		; futex: rdx = val:   ignored when FUTEX_LOCK_PI is used
-		; futex: r8 = uaddr2: ignored when FUTEX_LOCK_PI is used
-		; futex: r9 = val3:   ignored when FUTEX_LOCK_PI is used
-		; futex: rax = system call ID
-		mov eax, SYS_FUTEX
-		; futex: rax = futex(rdi, rsi, (rdx), r10, (r8), (r9))
-		syscall
- 
-		; Sometimes the lock is released after the "lock cmpxchg" instruction
-		; but just before the futex call. In that case, futex returns EAGAIN.
-		cmp rax, -11 ; (-EAGAIN)
-		je acquire_futex
-
-		; Any other negative return value means failure
-		test rax, rax
-		jnz acquire_return
-
-		; Indicate that we made a futex call (see below for why)
-		xor edx, edx
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Update the recursion counter ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-	acquire_success:
-		; Read the recursion counter (we have the lock: no need for atomics)
-		mov ecx, [rdi + 4]
-
-		; The value in edx depends on how we came to the acquire_success label:
-		; 1) We jumped here after a successful "lock cmpxchg": edx has our TID
-		; 2) We finished a successful futex call: edx was set to 0 (see above)
-		test edx, edx
-		; Why do we care? Well, in the latter case, the futex call may have been
-		; necessary because there was a problem (i.e. FUTEX_OWNER_DIED was set),
-		; in which case the recursion counter is stale and hence must be reset.
-		; In any other case, whoever released the lock should've reset it already.
-		cmovz ecx, edx ; ecx = 0
-
-		; Increment the recursion counter and write it back to memory
-		; (if the lock is being used non-recursively, it should be 1)
-		inc ecx
-		mov [rdi + 4], ecx
-
-		; Lock acquisition was successful, so we'll return 0. In most cases
-		; eax is already 0; we only need this if the recursion counter > 1.
-		xor eax, eax
-
-	acquire_return:
-		ret
diff --git a/lib/lock_release.asm b/lib/lock_release.asm
deleted file mode 100644
index f86caa2..0000000
--- a/lib/lock_release.asm
+++ /dev/null
@@ -1,132 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-;       rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-;       rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-;       rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-;       rax, rdx
-; - overwritten by syscalls (all others preserved):
-;       rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_GETTID  186
-%define SYS_FUTEX   202
-
-; Relevant operations for futex
-%define FUTEX_UNLOCK_PI    7
-%define FUTEX_PRIVATE_FLAG 0x80
-
-; Relevant bits for futex dword
-%define FUTEX_TID_MASK   0x3fffffff
-%define FUTEX_OWNER_DIED 0x40000000
-%define FUTEX_WAITERS    0x80000000
-
-
-; Release an acquired lock if we're who acquired it. Argument:
-;	rdi: struct{u32,u32,u32}* = handle of lock to release
-; Returns zero on success, or a standard error code.
-global linen_lock_release
-linen_lock_release:
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Check validity of argument ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Return EINVAL if rdi is NULL or invalid
-		mov eax, -22 ; (EINVAL = -22)
-
-		test rdi, rdi
-		jz release_return ; rdi is NULL
-
-		; rdi is nonzero, so let's just assume it's a valid pointer;
-		; if that assumption is wrong we'll get a segmentation fault.
-		; But we don't yet trust that [rdi] is a valid lock handle!
-		; To verify this we check the canary value stored at [rdi + 8].
-		mov ecx, [rdi + 8]
-		cmp ecx, 0xCAFEBABE
-		jnz release_return
-
-		; Lock owners are identified by their TID; let's find ours.
-		; The gettid system call simply returns our Linux thread ID.
-		; See: man 2 gettid
-
-		; gettid: rax = system call ID
-		mov eax, SYS_GETTID
-		; gettid: rax = gettid()
-		syscall
-
-		; Save a copy of our TID (no need for an error check)
-		mov edx, eax
-
-		; Return EPERM if this lock currently doesn't belong to us
-		mov eax, -1 ; (EPERM = -1)
-
-		; Read the futex dword at [rdi] and keep its lowest 30 bits
-		mov ecx, [rdi]
-		and ecx, FUTEX_TID_MASK
-		; Those bits contain the owner's TID; it should be our TID
-		cmp ecx, edx
-		jne release_return
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; (Partially) release our lock ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Decrement the recursion counter. If it's still > 1, we're done here.
-		dec dword [rdi + 4]
-		jnz release_success
-		; If it reaches 0, it's time for a full release by setting [rdi] to 0.
-
-		; Restore our saved TID to eax for "lock cmpxchg" below
-		mov eax, edx
-
-		; Atomically try to set the dword at [rdi] to 0 if it was equal to our TID.
-		; if ([rdi] == eax]) { [rdi] = 0; goto release_success; } else { eax = [rdi]; }
-		xor ecx, ecx
-		lock cmpxchg [rdi], ecx
-		je release_success
-
-		; We failed because [rdi] wasn't equal to our TID. In theory,
-		; that can mean only one thing: [rdi] = (edx | FUTEX_WAITERS).
-		; In that case we need to ask the kernel to wake up the threads
-		; who are waiting (via a futex system call) for [rdi] to change.
-		; See: man 2 futex
-
-		; futex: rdi = uaddr: address of the dword to announce for
-		; futex: rsi = futex_op: which futex operation we want:
-		; - FUTEX_UNLOCK_PI:    wake up one thread sleeping via FUTEX_LOCK_PI
-		; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
-		mov esi, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: futex_op
-		; futex: rdx = val:     ignored when FUTEX_UNLOCK_PI is used
-		; futex: r10 = timeout: ignored when FUTEX_UNLOCK_PI is used
-		; futex: r8 = uaddr2:   ignored when FUTEX_UNLOCK_PI is used
-		; futex: r9 = val3:     ignored when FUTEX_UNLOCK_PI is used
-		; futex: rax = system call ID
-		mov eax, SYS_FUTEX
-		; futex: rax = futex(rdi, rsi, (rdx), (r10), (r8), (r9))
-		syscall
-
-		; Check result of futex: nonzero means failure
-		test rax, rax
-		jnz release_return
-
-	release_success:
-		xor eax, eax
-
-	release_return:
-		ret
diff --git a/lib/thread_create.asm b/lib/thread_create.asm
deleted file mode 100644
index 9a6fe78..0000000
--- a/lib/thread_create.asm
+++ /dev/null
@@ -1,264 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-;       rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-;       rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-;       rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-;       rax, rdx
-; - overwritten by syscalls (all others preserved):
-;       rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_MMAP      9
-%define SYS_MPROTECT 10
-%define SYS_CLONE    56
-%define SYS_EXIT     60
-
-; Relevant flags for mmap
-%define MAP_SHARED    0x00001
-%define MAP_PRIVATE   0x00002
-%define MAP_ANONYMOUS 0x00020
-;%define MAP_GROWSDOWN 0x00100 ; Insecure, segfaults anyway
-%define MAP_LOCKED    0x02000
-%define MAP_POPULATE  0x08000
-%define MAP_STACK     0x20000
-
-; Relevant flags for mprotect
-%define PROT_READ  0x1
-%define PROT_WRITE 0x2
-
-; Relevant flags for clone
-%define CLONE_VM             0x00000100
-%define CLONE_FS             0x00000200
-%define CLONE_FILES          0x00000400
-%define CLONE_SIGHAND        0x00000800
-%define CLONE_PARENT         0x00008000
-%define CLONE_THREAD         0x00010000
-%define CLONE_SYSVSEM        0x00040000
-%define CLONE_SETTLS         0x00080000
-%define CLONE_PARENT_SETTID  0x00100000
-%define CLONE_CHILD_CLEARTID 0x00200000
-%define CLONE_CHILD_SETTID   0x01000000
-%define CLONE_IO             0x80000000
-
-
-%define STACK_SIZE 2097152 ; 2 MiB stack
-%define GUARD_PAGE    4096 ; 4 KiB guard page
-
-
-; Create a new thread executing a given function. Arguments:
-;	rdi: struct{u32,u32}** = where to put the thread handle
-;	rsi: void* (*)(void*)  = function to make the child run
-;	rdx: void*             = single argument for function
-; Returns zero on success, or a standard error code.
-global linen_thread_create
-linen_thread_create:
-		; Callee-save registers
-		push rbx
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Check validity of arguments ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Return EINVAL if any argument is NULL
-		mov eax, -22 ; (EINVAL = -22)
-		test rdi, rdi
-		jz create_return ; Nowhere to store the thread handle
-		test rsi, rsi
-		jz create_return ; No function for the thread to run
-
-		; Note: we allow rdx to be NULL; in that case the worst that can happen
-		; is a segmentation fault in the user's code (not really our problem).
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Allocate a stack and guard page ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Save these registers: we'll clobber them for the mmap call
-		mov rbx, rdi
-		push rdx
-		push rsi
-
-		; The mmap system call does many things, in this case allocate memory.
-		; See: man 2 mmap
-
-		; mmap: rdi = addr: address for mapping; 0 lets kernel choose
-		xor edi, edi
-		; mmap: rsi = length: size of buffer to allocate
-		mov esi, (STACK_SIZE + GUARD_PAGE)
-		; mmap: rdx = prot: mprotect-style access permissions
-		mov edx, (PROT_WRITE | PROT_READ)
-		; mmap: r10 = flags: configuration flags for mapping:
-		; - MAP_ANONYMOUS: there is no file backing this buffer
-		; - MAP_PRIVATE:   only this process can see thread's stack
-		; - MAP_STACK:     no-op; inform kernel that this is a stack
-		mov r10, (MAP_ANONYMOUS | MAP_PRIVATE | MAP_STACK)
-		; mmap: r8 = fd: ignored for MAP_ANONYMOUS, recommended -1
-		mov r8, -1
-		; mmap: r9 = offset: should be 0 when MAP_ANONYMOUS is used
-		xor r9, r9
-		; mmap: rax = system call ID
-		mov eax, SYS_MMAP
-		; mmap: rax = mmap(rdi, rsi, rdx, r10, r8, 9)
-		syscall
-
-		; Pop these now before we start branching. Those registers
-		; won't be used by the next system calls, so they're safe.
-		pop r8 ; function
-		pop r9 ; argument
-
-		; Check result of mmap: negative means failure,
-		; otherwise rax is the address of the new mapping.
-		test rax, rax
-		js create_return
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Revoke guard page's R/W permissions ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Keep in mind that stacks grow downward, so the guard page is at
-		; the lowest address of the newly-allocated buffer, i.e. at [rax].
-
-		; The mprotect system call changes the permissions of a memory region.
-		; See: man 2 mprotect
-
-		; mprotect: rdi = addr: lower address of region to control
-		mov rdi, rax
-		; mprotect: rsi = len: size of region, one page in this case
-		mov esi, GUARD_PAGE
-		; mprotect: rdx = prot: access permissions; zero for none
-		xor edx, edx
-		; mprotect: rax = system call ID
-		mov eax, SYS_MPROTECT
-		; mprotect: rax = mprotect(rdi, rsi, rdx)
-		syscall
-
-		; Check result of mprotect: nonzero means failure
-		test rax, rax
-		jnz create_return
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Spawn a thread with the new stack ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; The clone system call spawns a new thread, cloned from a parent.
-		; Both threads end up running the same code, i.e. it returns "twice",
-		; once in the parent (0 if success) and once in the child (the TID).
-		; See: man 2 clone
-
-		; clone: rsi = stack
-		; Currently rdi points to the lowest byte of the stack area.
-		; Again, stacks grow downward, so we calculate the address of
-		; the top qword to use as the child thread's starting point.
-		lea rsi, [rdi + (STACK_SIZE + GUARD_PAGE - 8)]
-
-		; clone: rdi = flags: settings for cloned thread
-		; These flags make the parent and child share resources:
-		; - CLONE_VM:      memory address space
-		; - CLONE_FS:      filesystem information, e.g. working directory
-		; - CLONE_FILES:   file descriptor table
-		; - CLONE_IO:      I/O scheduler context
-		; - CLONE_SIGHAND: signal handlers
-		; - CLONE_PARENT:  parent process (implied by CLONE_THREAD?)
-		; - CLONE_THREAD:  shared PID, distinguish by TID instead (I think?)
-		; These flags are relevant for a threading API:
-		; - CLONE_CHILD_SETTID:   store child's TID at supplied address (in r10)
-		; - CLONE_CHILD_CLEARTID: set stored TID to zero when child finishes
-		;                         (this will be used for joining threads)
-		mov edi, (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_IO \
-		        | CLONE_SIGHAND | CLONE_PARENT | CLONE_THREAD \
-		        | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
-
-		; clone: rdx = parent_tid: ignored unless CLONE_PARENT_SETTID is used
-
-		; clone: r10 = child_tid: address to store new thread's TID
-		; We use "bottom" of stack (rsi), i.e. where child will start.
-		mov r10, rsi
-
-		; clone: r8 = tls: ignored unless CLONE_SETTLS is used
-
-		; clone: rax = system call ID
-		mov eax, SYS_CLONE
-		; clone: rax = clone(rdi, rsi, (rdx), r10, (r8));
-		syscall
-
-		; Ideally, both parent and new-born child are executing this code now.
-
-		; Check result of clone:
-		test rax, rax
-		js create_return   ; Negative means failure
-		jnz create_success ; Positive means we're in the parent thread
-		; Zero means we're in the child thread
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Initialization in child thread ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Best practice is to clear the frame pointer
-		xor ebp, ebp
-
-		; Move argument into place and call supplied function
-		mov rdi, r9
-		call r8
-
-		; Once done, leave function's return value lying around
-		push rax
-
-		; Exit the thread with return value 0
-		xor edi, edi
-		mov rax, SYS_EXIT
-		syscall ; (never returns)
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Clean up in parent thread ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-	create_success:
-		; We use the highest dword of the child's stack buffer as a futex
-		; to detect when it has finished (see CLONE_CHILD_CLEARTID above).
-		; That dword's address also acts as a thread handle for our API,
-		; so we store it at the address the caller supplied (now in rbx).
-		mov [rbx], rsi
-
-		; We place a canary value in the unused dword at the top:
-		; checking this value tells us if a thread handle is valid.
-		mov dword [rsi + 4], 0xDEADBEEF
-
-		; "Sketch" of child's stack buffer's layout:
-		;
-		; (bottom of range allocated by mmap)
-		; 4 KiB: guard page, unused
-		; (bottom of usable buffer)
-		; ...
-		; ... Child is currently doing work here ...
-		; ...
-		; qword: return address of function called by child (from r8)
-		; dword: futex to detect when child has returned (address: rsi)
-		; dword: canary value to know if handle is valid (address: rsi + 4)
-		; (top of range allocated by mmap = top of usable buffer)
-
-		; Return 0 for success
-		xor eax, eax
-
-	create_return:
-		; Restore callee-save registers
-		pop rbx
-
-		ret
-
diff --git a/lib/thread_finish.asm b/lib/thread_finish.asm
deleted file mode 100644
index 860b0a4..0000000
--- a/lib/thread_finish.asm
+++ /dev/null
@@ -1,147 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-;       rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-;       rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-;       rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-;       rax, rdx
-; - overwritten by syscalls (all others preserved):
-;       rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_MUNMAP  11
-%define SYS_FUTEX  202
-
-; Relevant operations for futex
-%define FUTEX_WAIT         0
-%define FUTEX_PRIVATE_FLAG 0x80
-
-
-%define STACK_SIZE 2097152 ; 2 MiB stack
-%define GUARD_PAGE    4096 ; 4 KiB guard page
-
-
-; Wait for thread to exit, save its return value, and clean up. Arguments:
-;	rdi: struct{u32,u32}* = handle of the thread to wait for
-;	rsi: void**           = where to put void* returned by thread
-; Returns zero on success, or a standard error code.
-global linen_thread_finish
-linen_thread_finish:
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Check validity of arguments ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; Return EINVAL if rdi is NULL or otherwise invalid
-		mov eax, -22 ; (EINVAL = -22)
-
-		test rdi, rdi
-		jz finish_return ; rdi is NULL
-
-		; rdi is nonzero, so let's just assume it's a valid pointer;
-		; if that assumption is wrong we'll get a segmentation fault.
-		; But we don't yet trust that [rdi] is a valid thread handle!
-		; To verify this we check the canary value stored at [rdi + 4].
-		mov ecx, [rdi + 4]
-		cmp ecx, 0xDEADBEEF
-		jnz finish_return
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Wait until thread is finished ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-		; We'll clobber rsi if we need to set up a futex call
-		mov r8, rsi
-
-	finish_retry:
-		; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID:
-		; [rdi] contains the child thread's TID, and will get automatically
-		; cleared (to 0) when the child exits; this is what we'll watch for.
-
-		; Atomically check whether the target thread is still running.
-		; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; }
-		xor eax, eax
-		lock cmpxchg [rdi], eax
-		jz finish_success
-
-		; The thread is still busy, so block until it's done.
-		; The futex system call waits until the dword at an
-		; address (rdi) deviates from an expected value (eax).
-		; See: man 2 futex
-
-		; futex: rdi = uaddr: address of the dword to watch
-		; futex: rsi = futex_op: which futex operation we want:
-		; - FUTEX_WAIT:         block until the value at [rdi] changes
-		; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
-		mov esi, FUTEX_WAIT
-		; futex: rdx = val: the expected value at [rdi] before it changes
-		mov edx, eax
-		; futex: r10 = timeout: in case we had a deadline (we don't)
-		xor r10, r10
-		; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
-		; futex: r9 = val3:   ignored when FUTEX_WAIT is used
-		; futex: rax = system call ID
-		mov eax, SYS_FUTEX
-		; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
-		syscall
-
-		; Sometimes the thread exits after the "lock cmpxchg" instruction
-		; but before the futex call. In that case, futex returns EAGAIN.
-		cmp rax, -11 ; (EAGAIN = -11)
-		je finish_retry
-
-		; Any other nonzero return value means failure
-		test rax, rax
-		jnz finish_return
-
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-		;;;; Clean up after thread's exit ;;;;
-		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-	finish_success:
-		; The thread left its function return value on the stack, read it
-		mov rdx, [rdi - 8]
-
-		; The munmap system call destroys mappings created by mmap.
-		; In this case that means deallocating the stack buffer.
-		; See: man 2 munmap
-
-		; munmap: rdi = addr: lowest address of region to unmap
-		; Our rdi is near the buffer's top, so we must subtract
-		sub rdi, (STACK_SIZE + GUARD_PAGE - 8)
-		; munmap: rsi = length: size of region starting from rdi
-		mov esi, (STACK_SIZE + GUARD_PAGE)
-		; munmap: rax = system call ID
-		mov eax, SYS_MUNMAP
-		; munmap: rax = munmap(rdi, rsi)
-		syscall
-
-		; Check result of munmap: nonzero means failure
-		test rax, rax
-		jnz finish_return
-
-		; Check if caller gave a location (r8) to save the return value (rdx)
-		test r8, r8
-		jz finish_return ; caller doesn't care: gave NULL pointer
-		mov [r8], rdx
-		; Note: if munmap failed, the buffer is still there, so we
-		; can safely return an error without losing the return value.
-
-	finish_return:
-		ret
-