; Under MIT license, see /LICENSE.txt ; Cheat sheet for Linux' x86_64 calling convention: ; ; - free to overwrite (caller should save them): ; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 ; - caller expects be kept (callee should save them): ; rbx, rbp, r12-r15 ; ; - for passing paramters to functions: ; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7 ; - for getting return values from functions: ; rax, rdx, xmm0 ; ; - for passing parameters to syscalls: ; rax, rdi, rsi, rdx, r10, r8, r9 ; - for getting return values from syscalls: ; rax, rdx ; - overwritten by syscalls (all others preserved): ; rcx, r11 section .text ; Relevant system call IDs %define SYS_MUNMAP 11 %define SYS_FUTEX 202 ; Relevant operations for futex %define FUTEX_WAIT 0 %define FUTEX_PRIVATE_FLAG 0x80 %define STACK_SIZE 2097152 ; 2 MiB stack %define GUARD_PAGE 4096 ; 4 KiB guard page ; Wait for thread to exit, save its return value, and clean up. Arguments: ; rdi: struct{u32,u32}* = handle of the thread to wait for ; rsi: void** = where to put void* returned by thread ; Returns zero on success, or a standard error code. global linen_thread_finish linen_thread_finish: ; It's handy to have a register that's 0 for a while xor ecx, ecx ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Check validity of arguments ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Return EINVAL (-22) if rdi is NULL or otherwise invalid lea eax, [rcx - 22] ; mov eax, -22 test rdi, rdi jz finish_return ; rdi is NULL ; rdi is nonzero, so let's just assume it's a valid pointer; ; if that assumption is wrong we'll get a segmentation fault. ; But we don't yet trust that [rdi] is a valid thread handle! ; To verify this we check the canary value stored at [rdi + 4]. cmp dword [rdi + 4], 0xDEADBEEF ; Oh CISC... jnz finish_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Wait until thread is finished ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; We'll clobber rsi if we need to set up a futex call mov r8, rsi finish_retry: ; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID: ; [rdi] contains the child thread's TID, and will get automatically ; cleared (to 0) when the child exits; this is what we'll watch for. ; Atomically check whether the target thread is still running. ; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; } xor eax, eax lock cmpxchg [rdi], eax jz finish_success ; The thread is still busy, so block until it's done. ; The futex system call waits until the dword at an ; address (rdi) deviates from an expected value (eax). ; See: man 2 futex ; futex: rdi = uaddr: address of the dword to watch ; futex: rsi = futex_op: which futex operation we want: ; - FUTEX_WAIT = 0: block until the value at [rdi] changes ; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why xor esi, esi ; mov esi, FUTEX_WAIT ; futex: rdx = val: the expected value at [rdi] before it changes mov edx, eax ; futex: r10 = timeout: in case we had a deadline (we don't) xor r10, r10 ; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used ; futex: r9 = val3: ignored when FUTEX_WAIT is used ; futex: rax = system call ID xor eax, eax mov al, SYS_FUTEX ; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9)) syscall ; Sometimes the thread exits after the "lock cmpxchg" instruction ; but before the futex call. In that case, futex returns EAGAIN. cmp eax, -11 ; (EAGAIN = -11) je finish_retry ; Any other nonzero return value means failure test eax, eax jnz finish_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Clean up after thread's exit ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; finish_success: ; The thread left its function return value on the stack, read it mov rdx, [rdi - 8] ; Remove the canary value so the thread handle becomes invalid. We're about ; to deallocate this memory anyway, so it's optional, but maybe the address ; becomes valid again later in the program's life, who knows? Play it safe. mov [rdi + 4], eax ; eax = 0 for all paths ; The munmap system call destroys mappings created by mmap. ; In this case that means deallocating the stack buffer. ; See: man 2 munmap ; munmap: rdi = addr: lowest address of region to unmap ; Our rdi is near the buffer's top, so we must subtract sub rdi, (STACK_SIZE + GUARD_PAGE - 8) ; munmap: rsi = length: size of region starting from rdi mov esi, (STACK_SIZE + GUARD_PAGE) ; munmap: rax = system call ID mov al, SYS_MUNMAP ; munmap: rax = munmap(rdi, rsi) syscall ; Check result of munmap: nonzero means failure test eax, eax jnz finish_return ; Check if caller gave a location (r8) to save the return value (rdx) test r8, r8 jz finish_return ; caller doesn't care: gave NULL pointer mov [r8], rdx ; Note: if munmap failed, the buffer is still there, so we ; can safely return an error without losing the return value. finish_return: ret