1 files changed, 147 insertions, 0 deletions
diff --git a/src/thread_finish.asm b/src/thread_finish.asm
new file mode 100644
index 0000000..860b0a4
--- /dev/null
+++ b/src/thread_finish.asm
@@ -0,0 +1,147 @@
+; Under MIT license, see /LICENSE.txt
+
+
+; Cheat sheet for Linux' x86_64 calling convention:
+;
+; - free to overwrite (caller should save them):
+;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
+; - caller expects be kept (callee should save them):
+;       rbx, rbp, r12-r15
+;
+; - for passing paramters to functions:
+;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
+; - for getting return values from functions:
+;       rax, rdx, xmm0
+;
+; - for passing parameters to syscalls:
+;       rax, rdi, rsi, rdx, r10, r8, r9
+; - for getting return values from syscalls:
+;       rax, rdx
+; - overwritten by syscalls (all others preserved):
+;       rcx, r11
+
+
+section .text
+
+
+; Relevant system call IDs
+%define SYS_MUNMAP  11
+%define SYS_FUTEX  202
+
+; Relevant operations for futex
+%define FUTEX_WAIT         0
+%define FUTEX_PRIVATE_FLAG 0x80
+
+
+%define STACK_SIZE 2097152 ; 2 MiB stack
+%define GUARD_PAGE    4096 ; 4 KiB guard page
+
+
+; Wait for thread to exit, save its return value, and clean up. Arguments:
+;	rdi: struct{u32,u32}* = handle of the thread to wait for
+;	rsi: void**           = where to put void* returned by thread
+; Returns zero on success, or a standard error code.
+global linen_thread_finish
+linen_thread_finish:
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Check validity of arguments ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+		; Return EINVAL if rdi is NULL or otherwise invalid
+		mov eax, -22 ; (EINVAL = -22)
+
+		test rdi, rdi
+		jz finish_return ; rdi is NULL
+
+		; rdi is nonzero, so let's just assume it's a valid pointer;
+		; if that assumption is wrong we'll get a segmentation fault.
+		; But we don't yet trust that [rdi] is a valid thread handle!
+		; To verify this we check the canary value stored at [rdi + 4].
+		mov ecx, [rdi + 4]
+		cmp ecx, 0xDEADBEEF
+		jnz finish_return
+
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Wait until thread is finished ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+		; We'll clobber rsi if we need to set up a futex call
+		mov r8, rsi
+
+	finish_retry:
+		; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID:
+		; [rdi] contains the child thread's TID, and will get automatically
+		; cleared (to 0) when the child exits; this is what we'll watch for.
+
+		; Atomically check whether the target thread is still running.
+		; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; }
+		xor eax, eax
+		lock cmpxchg [rdi], eax
+		jz finish_success
+
+		; The thread is still busy, so block until it's done.
+		; The futex system call waits until the dword at an
+		; address (rdi) deviates from an expected value (eax).
+		; See: man 2 futex
+
+		; futex: rdi = uaddr: address of the dword to watch
+		; futex: rsi = futex_op: which futex operation we want:
+		; - FUTEX_WAIT:         block until the value at [rdi] changes
+		; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
+		mov esi, FUTEX_WAIT
+		; futex: rdx = val: the expected value at [rdi] before it changes
+		mov edx, eax
+		; futex: r10 = timeout: in case we had a deadline (we don't)
+		xor r10, r10
+		; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
+		; futex: r9 = val3:   ignored when FUTEX_WAIT is used
+		; futex: rax = system call ID
+		mov eax, SYS_FUTEX
+		; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
+		syscall
+
+		; Sometimes the thread exits after the "lock cmpxchg" instruction
+		; but before the futex call. In that case, futex returns EAGAIN.
+		cmp rax, -11 ; (EAGAIN = -11)
+		je finish_retry
+
+		; Any other nonzero return value means failure
+		test rax, rax
+		jnz finish_return
+
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Clean up after thread's exit ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+	finish_success:
+		; The thread left its function return value on the stack, read it
+		mov rdx, [rdi - 8]
+
+		; The munmap system call destroys mappings created by mmap.
+		; In this case that means deallocating the stack buffer.
+		; See: man 2 munmap
+
+		; munmap: rdi = addr: lowest address of region to unmap
+		; Our rdi is near the buffer's top, so we must subtract
+		sub rdi, (STACK_SIZE + GUARD_PAGE - 8)
+		; munmap: rsi = length: size of region starting from rdi
+		mov esi, (STACK_SIZE + GUARD_PAGE)
+		; munmap: rax = system call ID
+		mov eax, SYS_MUNMAP
+		; munmap: rax = munmap(rdi, rsi)
+		syscall
+
+		; Check result of munmap: nonzero means failure
+		test rax, rax
+		jnz finish_return
+
+		; Check if caller gave a location (r8) to save the return value (rdx)
+		test r8, r8
+		jz finish_return ; caller doesn't care: gave NULL pointer
+		mov [r8], rdx
+		; Note: if munmap failed, the buffer is still there, so we
+		; can safely return an error without losing the return value.
+
+	finish_return:
+		ret
+