summaryrefslogtreecommitdiff
path: root/lib/thread_finish.asm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/thread_finish.asm')
-rw-r--r--lib/thread_finish.asm161
1 files changed, 161 insertions, 0 deletions
diff --git a/lib/thread_finish.asm b/lib/thread_finish.asm
new file mode 100644
index 0000000..ee6d676
--- /dev/null
+++ b/lib/thread_finish.asm
@@ -0,0 +1,161 @@
+; MIT license:
+;
+; Copyright (c) 2023 Marcus R.A. Newman (prefetch@prefetch.eu)
+;
+; Permission is hereby granted, free of charge, to any person obtaining a copy
+; of this software and associated documentation files (the "Software"), to deal
+; in the Software without restriction, including without limitation the rights
+; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+; copies of the Software, and to permit persons to whom the Software is
+; furnished to do so, subject to the following conditions:
+;
+; The above copyright notice and this permission notice shall be included in
+; all copies or substantial portions of the Software.
+;
+; THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+; SOFTWARE.
+
+
+; Cheat sheet for Linux' x86_64 calling convention:
+;
+; - free to overwrite; the caller should save them:
+; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
+; - caller expects no change; callee should save them:
+; rbx, rbp, r12-r15
+;
+; - for passing paramters to functions:
+; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
+; - for getting return values from functions:
+; rax, rdx, xmm0
+;
+; - for passing parameters to syscalls:
+; rax, rdi, rsi, rdx, r10, r8, r9
+; - for getting return values from syscalls:
+; rax, rdx
+; - overwritten by syscalls (all others preserved):
+; rcx, r11
+
+
+section .text
+
+
+; Relevant system call IDs
+%define SYS_MUNMAP 11
+%define SYS_FUTEX 202
+
+; Relevant flags for futex
+%define FUTEX_WAIT 0x00
+%define FUTEX_PRIVATE_FLAG 0x80
+
+
+%define STACK_SIZE 2097152 ; 2 MiB stack
+%define GUARD_PAGE 4096 ; 4 KiB guard page
+
+
+; Wait for thread to exit, save its return value, and clean up. Arguments:
+; rdi: u32* = handle of the thread to wait for
+; rsi: void** = where to put the void* returned by the thread
+; Returns zero on success, or a standard error code.
+global linen_thread_finish
+linen_thread_finish:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;; Check validity of arguments ;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ ; Return EINVAL if rdi is NULL or otherwise invalid
+ mov eax, -22 ; (EINVAL = -22)
+
+ test rdi, rdi
+ jz join_end ; rdi is NULL
+
+ ; rdi is nonzero, so let's just assume it's a valid pointer.
+ ; If that assumption is wrong we'll get a segmentation fault.
+ ; But we don't yet trust that [rdi] is a valid thread handle!
+ ; To verify this, we check the canary value stored at [rdi + 4].
+ mov ecx, [rdi + 4]
+ cmp ecx, 0xDEADBEEF
+ jnz join_end
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;; Wait until thread is finished ;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ ; We'll clobber rsi if we need to set up a futex call
+ mov r8, rsi
+
+ ; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID:
+ ; [rdi] contains the child thread's TID, and will get automatically
+ ; cleared (to 0) when the child exits; this is what we'll watch for.
+
+ ; Atomically check whether the target thread is still running.
+ ; if ([rdi] == 0) { goto join_done; } else { eax = [rdi]; }
+ xor eax, eax
+ lock cmpxchg [rdi], eax
+ jz join_done
+
+ ; The thread is still busy, so block until it's done.
+ ; The futex system call waits until the dword at an
+ ; address (rdi) deviates from an expected value (eax).
+ ; See: man 2 futex
+
+ ; futex: rdi = uaddr: address of the dword to watch
+ ; futex: rsi = futex_op: which futex operation we want
+ ; - FUTEX_WAIT: block until the value at [rdi] changes
+ ; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
+ mov esi, FUTEX_WAIT
+ ; futex: rdx = val: the expected value at [rdi] before it changes
+ mov edx, eax
+ ; futex: r10 = timeout: in case we had a deadline (we don't)
+ xor r10, r10
+ ; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
+ ; futex: r9 = val3: ignored when FUTEX WAIT is used
+ ; futex: rax = system call ID
+ mov eax, SYS_FUTEX
+ ; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
+ syscall
+
+ ; Check result of futex: negative means failure
+ test rax, rax
+ jnz join_end
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;; Clean up after thread's exit ;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ join_done:
+ ; The thread left its function return value on the stack, read it
+ mov rdx, [rdi - 8]
+
+ ; The munmap system call destroys mappings created by mmap.
+ ; In this case that means deallocating the stack buffer.
+ ; See: man 2 munmap
+
+ ; munmap: rdi = addr: lowest address of region to unmap
+ ; Our rdi is near the buffer's top, so we must subtract
+ sub rdi, (STACK_SIZE + GUARD_PAGE - 8)
+ ; munmap: rsi = length: size of region starting from rdi
+ mov esi, (STACK_SIZE + GUARD_PAGE)
+ ; munmap: rax = system call ID
+ mov eax, SYS_MUNMAP
+ ; munmap: rax = munmap(rdi, rsi)
+ syscall
+
+ ; Check result of munmap: nonzero means failure
+ test rax, rax
+ jnz join_end
+
+ ; Check if caller gave a location (r8) to save the return value (rdx)
+ test r8, r8
+ jz join_end ; caller doesn't care: gave NULL pointer
+ mov [r8], rdx
+ ; Note: if munmap failed, the buffer is still there, so we
+ ; can safely return an error without losing the return value.
+
+ join_end:
+ ret
+