summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/lock_acquire.asm204
-rw-r--r--lib/lock_release.asm132
-rw-r--r--lib/thread_create.asm264
-rw-r--r--lib/thread_finish.asm147
4 files changed, 0 insertions, 747 deletions
diff --git a/lib/lock_acquire.asm b/lib/lock_acquire.asm
deleted file mode 100644
index f32ba6a..0000000
--- a/lib/lock_acquire.asm
+++ /dev/null
@@ -1,204 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-; rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-; rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-; rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-; rax, rdx
-; - overwritten by syscalls (all others preserved):
-; rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_GETTID 186
-%define SYS_FUTEX 202
-
-; Relevant operations for futex
-%define FUTEX_LOCK_PI 6
-%define FUTEX_PRIVATE_FLAG 0x80
-
-; Relevant bits for futex dword
-%define FUTEX_TID_MASK 0x3fffffff
-%define FUTEX_OWNER_DIED 0x40000000
-%define FUTEX_WAITERS 0x80000000
-
-
-; Acquire a lock if possible, or wait until it gets released. Argument:
-; rdi: struct{u32,u32,u32}* = handle of lock to acquire
-; Returns zero on success, or a standard error code.
-global linen_lock_acquire
-linen_lock_acquire:
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Check validity of argument ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Return EINVAL if rdi is NULL or otherwise invalid
- mov eax, -22 ; (EINVAL = -22)
-
- test rdi, rdi
- jz acquire_return ; rdi is NULL
-
- ; rdi is nonzero, so let's just assume it's a valid pointer;
- ; if that assumption is wrong we'll get a segmentation fault.
- ; But we don't yet trust that [rdi] is a valid lock handle!
- ; To verify this we check the canary value stored at [rdi + 8].
- mov ecx, [rdi + 8]
- cmp ecx, 0xCAFEBABE
- jnz acquire_return
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Check ownership of lock ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Lock owners are identified by their TID; let's find ours.
- ; The gettid system call simply returns our Linux thread ID.
- ; See: man 2 gettid
-
- ; gettid: rax = system call ID
- mov eax, SYS_GETTID
- ; gettid: rax = gettid()
- syscall
-
- ; Save a copy of our TID (no need for an error check)
- mov edx, eax
-
- ; There are four possible ownership situations for the lock,
- ; which we can distinguish based on the dword value at [rdi]:
- ; - Case 1: if [rdi] contains zero, then the lock is available.
- ; - Case 2: if [rdi] has any of its highest 2 bits set, then the
- ; lock isn't free, and kernel intervention is required.
- ; - Case 3: if the lower 30 bits of [rdi] contain our TID,
- ; then we already own it (recursive acquisition).
- ; - Case 4: if the lower 30 bits of [rdi] contain another TID
- ; and the high-bit flags aren't set, then we just wait
- ; until we can acquire the lock using atomic operations
- ; or, optionally, a futex call (usually more efficient).
-
- ; Atomically check whether the lock is owned by another thread,
- ; and if not, try to take ownership by writing our TID to [rdi].
- ; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; }
- xor eax, eax
- lock cmpxchg [rdi], edx
- jz acquire_success ; case 1
-
- ; The lock isn't free, so let's check how "clean" its state is.
- ; The following flags are set by the kernel (see futex below):
- ; - FUTEX_OWNER_DIED: the lock's owner died, so it's actually free
- ; (but first the kernel needs to clean up)
- ; - FUTEX_WAITERS: we aren't the only one waiting for this lock
- ; (so let's sleep until the kernel wakes us up)
- ; Either way, we need the kernel's help, so jump to the futex call.
- test eax, (FUTEX_OWNER_DIED | FUTEX_WAITERS)
- jnz acquire_futex ; case 2
-
- ; It seems someone has the lock, check who: it may already be us.
- ; If so, this is a recursive acquisition, good, let's continue.
- and eax, FUTEX_TID_MASK
- cmp eax, edx
- je acquire_success ; case 3
-
- ; Someone else has the lock, but we're the only one waiting for it.
- ; System calls are expensive, so let's try a short spin loop first,
- ; hoping it'll get released soon. This is arguably unnecessary, as
- ; it's only beneficial when two threads are more or less "in sync",
- ; so in most real-world cases you can delete this with no downside.
-
- ; Loop counter
- mov ecx, 10
- acquire_spinloop:
- ; The "pause" instruction is specially designed for loops like this
- ; and conserves power. It causes a small delay (makes sense here).
- pause
-
- ; Atomically check whether the lock is owned by another thread,
- ; and if not, try to take ownership by writing our TID to [rdi].
- ; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; }
- xor eax, eax
- lock cmpxchg [rdi], edx
- jz acquire_success
-
- ; Decrement loop counter until zero
- dec ecx
- jnz acquire_spinloop
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Let the kernel handle it ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- acquire_futex:
- ; The futex system call waits for the dword at an address (rdi)
- ; changes in a certain way, as described above and in the futex
- ; manual's section on so-called "priority-inheritance futexes".
- ; See: man 2 futex
-
- ; futex: rdi = uaddr: address of the dword to watch
- ; futex: rsi = futex_op: which futex operation we want:
- ; - FUTEX_LOCK_PI: block until lock's owner uses FUTEX_UNLOCK_PI
- ; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
- mov esi, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
- ; futex: r10 = timeout: in case we had a deadline (we don't)
- xor r10, r10
- ; futex: rdx = val: ignored when FUTEX_LOCK_PI is used
- ; futex: r8 = uaddr2: ignored when FUTEX_LOCK_PI is used
- ; futex: r9 = val3: ignored when FUTEX_LOCK_PI is used
- ; futex: rax = system call ID
- mov eax, SYS_FUTEX
- ; futex: rax = futex(rdi, rsi, (rdx), r10, (r8), (r9))
- syscall
-
- ; Sometimes the lock is released after the "lock cmpxchg" instruction
- ; but just before the futex call. In that case, futex returns EAGAIN.
- cmp rax, -11 ; (-EAGAIN)
- je acquire_futex
-
- ; Any other negative return value means failure
- test rax, rax
- jnz acquire_return
-
- ; Indicate that we made a futex call (see below for why)
- xor edx, edx
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Update the recursion counter ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- acquire_success:
- ; Read the recursion counter (we have the lock: no need for atomics)
- mov ecx, [rdi + 4]
-
- ; The value in edx depends on how we came to the acquire_success label:
- ; 1) We jumped here after a successful "lock cmpxchg": edx has our TID
- ; 2) We finished a successful futex call: edx was set to 0 (see above)
- test edx, edx
- ; Why do we care? Well, in the latter case, the futex call may have been
- ; necessary because there was a problem (i.e. FUTEX_OWNER_DIED was set),
- ; in which case the recursion counter is stale and hence must be reset.
- ; In any other case, whoever released the lock should've reset it already.
- cmovz ecx, edx ; ecx = 0
-
- ; Increment the recursion counter and write it back to memory
- ; (if the lock is being used non-recursively, it should be 1)
- inc ecx
- mov [rdi + 4], ecx
-
- ; Lock acquisition was successful, so we'll return 0. In most cases
- ; eax is already 0; we only need this if the recursion counter > 1.
- xor eax, eax
-
- acquire_return:
- ret
diff --git a/lib/lock_release.asm b/lib/lock_release.asm
deleted file mode 100644
index f86caa2..0000000
--- a/lib/lock_release.asm
+++ /dev/null
@@ -1,132 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-; rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-; rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-; rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-; rax, rdx
-; - overwritten by syscalls (all others preserved):
-; rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_GETTID 186
-%define SYS_FUTEX 202
-
-; Relevant operations for futex
-%define FUTEX_UNLOCK_PI 7
-%define FUTEX_PRIVATE_FLAG 0x80
-
-; Relevant bits for futex dword
-%define FUTEX_TID_MASK 0x3fffffff
-%define FUTEX_OWNER_DIED 0x40000000
-%define FUTEX_WAITERS 0x80000000
-
-
-; Release an acquired lock if we're who acquired it. Argument:
-; rdi: struct{u32,u32,u32}* = handle of lock to release
-; Returns zero on success, or a standard error code.
-global linen_lock_release
-linen_lock_release:
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Check validity of argument ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Return EINVAL if rdi is NULL or invalid
- mov eax, -22 ; (EINVAL = -22)
-
- test rdi, rdi
- jz release_return ; rdi is NULL
-
- ; rdi is nonzero, so let's just assume it's a valid pointer;
- ; if that assumption is wrong we'll get a segmentation fault.
- ; But we don't yet trust that [rdi] is a valid lock handle!
- ; To verify this we check the canary value stored at [rdi + 8].
- mov ecx, [rdi + 8]
- cmp ecx, 0xCAFEBABE
- jnz release_return
-
- ; Lock owners are identified by their TID; let's find ours.
- ; The gettid system call simply returns our Linux thread ID.
- ; See: man 2 gettid
-
- ; gettid: rax = system call ID
- mov eax, SYS_GETTID
- ; gettid: rax = gettid()
- syscall
-
- ; Save a copy of our TID (no need for an error check)
- mov edx, eax
-
- ; Return EPERM if this lock currently doesn't belong to us
- mov eax, -1 ; (EPERM = -1)
-
- ; Read the futex dword at [rdi] and keep its lowest 30 bits
- mov ecx, [rdi]
- and ecx, FUTEX_TID_MASK
- ; Those bits contain the owner's TID; it should be our TID
- cmp ecx, edx
- jne release_return
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; (Partially) release our lock ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Decrement the recursion counter. If it's still > 1, we're done here.
- dec dword [rdi + 4]
- jnz release_success
- ; If it reaches 0, it's time for a full release by setting [rdi] to 0.
-
- ; Restore our saved TID to eax for "lock cmpxchg" below
- mov eax, edx
-
- ; Atomically try to set the dword at [rdi] to 0 if it was equal to our TID.
- ; if ([rdi] == eax]) { [rdi] = 0; goto release_success; } else { eax = [rdi]; }
- xor ecx, ecx
- lock cmpxchg [rdi], ecx
- je release_success
-
- ; We failed because [rdi] wasn't equal to our TID. In theory,
- ; that can mean only one thing: [rdi] = (edx | FUTEX_WAITERS).
- ; In that case we need to ask the kernel to wake up the threads
- ; who are waiting (via a futex system call) for [rdi] to change.
- ; See: man 2 futex
-
- ; futex: rdi = uaddr: address of the dword to announce for
- ; futex: rsi = futex_op: which futex operation we want:
- ; - FUTEX_UNLOCK_PI: wake up one thread sleeping via FUTEX_LOCK_PI
- ; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
- mov esi, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: futex_op
- ; futex: rdx = val: ignored when FUTEX_UNLOCK_PI is used
- ; futex: r10 = timeout: ignored when FUTEX_UNLOCK_PI is used
- ; futex: r8 = uaddr2: ignored when FUTEX_UNLOCK_PI is used
- ; futex: r9 = val3: ignored when FUTEX_UNLOCK_PI is used
- ; futex: rax = system call ID
- mov eax, SYS_FUTEX
- ; futex: rax = futex(rdi, rsi, (rdx), (r10), (r8), (r9))
- syscall
-
- ; Check result of futex: nonzero means failure
- test rax, rax
- jnz release_return
-
- release_success:
- xor eax, eax
-
- release_return:
- ret
diff --git a/lib/thread_create.asm b/lib/thread_create.asm
deleted file mode 100644
index 9a6fe78..0000000
--- a/lib/thread_create.asm
+++ /dev/null
@@ -1,264 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-; rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-; rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-; rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-; rax, rdx
-; - overwritten by syscalls (all others preserved):
-; rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_MMAP 9
-%define SYS_MPROTECT 10
-%define SYS_CLONE 56
-%define SYS_EXIT 60
-
-; Relevant flags for mmap
-%define MAP_SHARED 0x00001
-%define MAP_PRIVATE 0x00002
-%define MAP_ANONYMOUS 0x00020
-;%define MAP_GROWSDOWN 0x00100 ; Insecure, segfaults anyway
-%define MAP_LOCKED 0x02000
-%define MAP_POPULATE 0x08000
-%define MAP_STACK 0x20000
-
-; Relevant flags for mprotect
-%define PROT_READ 0x1
-%define PROT_WRITE 0x2
-
-; Relevant flags for clone
-%define CLONE_VM 0x00000100
-%define CLONE_FS 0x00000200
-%define CLONE_FILES 0x00000400
-%define CLONE_SIGHAND 0x00000800
-%define CLONE_PARENT 0x00008000
-%define CLONE_THREAD 0x00010000
-%define CLONE_SYSVSEM 0x00040000
-%define CLONE_SETTLS 0x00080000
-%define CLONE_PARENT_SETTID 0x00100000
-%define CLONE_CHILD_CLEARTID 0x00200000
-%define CLONE_CHILD_SETTID 0x01000000
-%define CLONE_IO 0x80000000
-
-
-%define STACK_SIZE 2097152 ; 2 MiB stack
-%define GUARD_PAGE 4096 ; 4 KiB guard page
-
-
-; Create a new thread executing a given function. Arguments:
-; rdi: struct{u32,u32}** = where to put the thread handle
-; rsi: void* (*)(void*) = function to make the child run
-; rdx: void* = single argument for function
-; Returns zero on success, or a standard error code.
-global linen_thread_create
-linen_thread_create:
- ; Callee-save registers
- push rbx
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Check validity of arguments ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Return EINVAL if any argument is NULL
- mov eax, -22 ; (EINVAL = -22)
- test rdi, rdi
- jz create_return ; Nowhere to store the thread handle
- test rsi, rsi
- jz create_return ; No function for the thread to run
-
- ; Note: we allow rdx to be NULL; in that case the worst that can happen
- ; is a segmentation fault in the user's code (not really our problem).
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Allocate a stack and guard page ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Save these registers: we'll clobber them for the mmap call
- mov rbx, rdi
- push rdx
- push rsi
-
- ; The mmap system call does many things, in this case allocate memory.
- ; See: man 2 mmap
-
- ; mmap: rdi = addr: address for mapping; 0 lets kernel choose
- xor edi, edi
- ; mmap: rsi = length: size of buffer to allocate
- mov esi, (STACK_SIZE + GUARD_PAGE)
- ; mmap: rdx = prot: mprotect-style access permissions
- mov edx, (PROT_WRITE | PROT_READ)
- ; mmap: r10 = flags: configuration flags for mapping:
- ; - MAP_ANONYMOUS: there is no file backing this buffer
- ; - MAP_PRIVATE: only this process can see thread's stack
- ; - MAP_STACK: no-op; inform kernel that this is a stack
- mov r10, (MAP_ANONYMOUS | MAP_PRIVATE | MAP_STACK)
- ; mmap: r8 = fd: ignored for MAP_ANONYMOUS, recommended -1
- mov r8, -1
- ; mmap: r9 = offset: should be 0 when MAP_ANONYMOUS is used
- xor r9, r9
- ; mmap: rax = system call ID
- mov eax, SYS_MMAP
- ; mmap: rax = mmap(rdi, rsi, rdx, r10, r8, 9)
- syscall
-
- ; Pop these now before we start branching. Those registers
- ; won't be used by the next system calls, so they're safe.
- pop r8 ; function
- pop r9 ; argument
-
- ; Check result of mmap: negative means failure,
- ; otherwise rax is the address of the new mapping.
- test rax, rax
- js create_return
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Revoke guard page's R/W permissions ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Keep in mind that stacks grow downward, so the guard page is at
- ; the lowest address of the newly-allocated buffer, i.e. at [rax].
-
- ; The mprotect system call changes the permissions of a memory region.
- ; See: man 2 mprotect
-
- ; mprotect: rdi = addr: lower address of region to control
- mov rdi, rax
- ; mprotect: rsi = len: size of region, one page in this case
- mov esi, GUARD_PAGE
- ; mprotect: rdx = prot: access permissions; zero for none
- xor edx, edx
- ; mprotect: rax = system call ID
- mov eax, SYS_MPROTECT
- ; mprotect: rax = mprotect(rdi, rsi, rdx)
- syscall
-
- ; Check result of mprotect: nonzero means failure
- test rax, rax
- jnz create_return
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Spawn a thread with the new stack ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; The clone system call spawns a new thread, cloned from a parent.
- ; Both threads end up running the same code, i.e. it returns "twice",
- ; once in the parent (0 if success) and once in the child (the TID).
- ; See: man 2 clone
-
- ; clone: rsi = stack
- ; Currently rdi points to the lowest byte of the stack area.
- ; Again, stacks grow downward, so we calculate the address of
- ; the top qword to use as the child thread's starting point.
- lea rsi, [rdi + (STACK_SIZE + GUARD_PAGE - 8)]
-
- ; clone: rdi = flags: settings for cloned thread
- ; These flags make the parent and child share resources:
- ; - CLONE_VM: memory address space
- ; - CLONE_FS: filesystem information, e.g. working directory
- ; - CLONE_FILES: file descriptor table
- ; - CLONE_IO: I/O scheduler context
- ; - CLONE_SIGHAND: signal handlers
- ; - CLONE_PARENT: parent process (implied by CLONE_THREAD?)
- ; - CLONE_THREAD: shared PID, distinguish by TID instead (I think?)
- ; These flags are relevant for a threading API:
- ; - CLONE_CHILD_SETTID: store child's TID at supplied address (in r10)
- ; - CLONE_CHILD_CLEARTID: set stored TID to zero when child finishes
- ; (this will be used for joining threads)
- mov edi, (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_IO \
- | CLONE_SIGHAND | CLONE_PARENT | CLONE_THREAD \
- | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
-
- ; clone: rdx = parent_tid: ignored unless CLONE_PARENT_SETTID is used
-
- ; clone: r10 = child_tid: address to store new thread's TID
- ; We use "bottom" of stack (rsi), i.e. where child will start.
- mov r10, rsi
-
- ; clone: r8 = tls: ignored unless CLONE_SETTLS is used
-
- ; clone: rax = system call ID
- mov eax, SYS_CLONE
- ; clone: rax = clone(rdi, rsi, (rdx), r10, (r8));
- syscall
-
- ; Ideally, both parent and new-born child are executing this code now.
-
- ; Check result of clone:
- test rax, rax
- js create_return ; Negative means failure
- jnz create_success ; Positive means we're in the parent thread
- ; Zero means we're in the child thread
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Initialization in child thread ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Best practice is to clear the frame pointer
- xor ebp, ebp
-
- ; Move argument into place and call supplied function
- mov rdi, r9
- call r8
-
- ; Once done, leave function's return value lying around
- push rax
-
- ; Exit the thread with return value 0
- xor edi, edi
- mov rax, SYS_EXIT
- syscall ; (never returns)
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Clean up in parent thread ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- create_success:
- ; We use the highest dword of the child's stack buffer as a futex
- ; to detect when it has finished (see CLONE_CHILD_CLEARTID above).
- ; That dword's address also acts as a thread handle for our API,
- ; so we store it at the address the caller supplied (now in rbx).
- mov [rbx], rsi
-
- ; We place a canary value in the unused dword at the top:
- ; checking this value tells us if a thread handle is valid.
- mov dword [rsi + 4], 0xDEADBEEF
-
- ; "Sketch" of child's stack buffer's layout:
- ;
- ; (bottom of range allocated by mmap)
- ; 4 KiB: guard page, unused
- ; (bottom of usable buffer)
- ; ...
- ; ... Child is currently doing work here ...
- ; ...
- ; qword: return address of function called by child (from r8)
- ; dword: futex to detect when child has returned (address: rsi)
- ; dword: canary value to know if handle is valid (address: rsi + 4)
- ; (top of range allocated by mmap = top of usable buffer)
-
- ; Return 0 for success
- xor eax, eax
-
- create_return:
- ; Restore callee-save registers
- pop rbx
-
- ret
-
diff --git a/lib/thread_finish.asm b/lib/thread_finish.asm
deleted file mode 100644
index 860b0a4..0000000
--- a/lib/thread_finish.asm
+++ /dev/null
@@ -1,147 +0,0 @@
-; Under MIT license, see /LICENSE.txt
-
-
-; Cheat sheet for Linux' x86_64 calling convention:
-;
-; - free to overwrite (caller should save them):
-; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects be kept (callee should save them):
-; rbx, rbp, r12-r15
-;
-; - for passing paramters to functions:
-; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
-; - for getting return values from functions:
-; rax, rdx, xmm0
-;
-; - for passing parameters to syscalls:
-; rax, rdi, rsi, rdx, r10, r8, r9
-; - for getting return values from syscalls:
-; rax, rdx
-; - overwritten by syscalls (all others preserved):
-; rcx, r11
-
-
-section .text
-
-
-; Relevant system call IDs
-%define SYS_MUNMAP 11
-%define SYS_FUTEX 202
-
-; Relevant operations for futex
-%define FUTEX_WAIT 0
-%define FUTEX_PRIVATE_FLAG 0x80
-
-
-%define STACK_SIZE 2097152 ; 2 MiB stack
-%define GUARD_PAGE 4096 ; 4 KiB guard page
-
-
-; Wait for thread to exit, save its return value, and clean up. Arguments:
-; rdi: struct{u32,u32}* = handle of the thread to wait for
-; rsi: void** = where to put void* returned by thread
-; Returns zero on success, or a standard error code.
-global linen_thread_finish
-linen_thread_finish:
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Check validity of arguments ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; Return EINVAL if rdi is NULL or otherwise invalid
- mov eax, -22 ; (EINVAL = -22)
-
- test rdi, rdi
- jz finish_return ; rdi is NULL
-
- ; rdi is nonzero, so let's just assume it's a valid pointer;
- ; if that assumption is wrong we'll get a segmentation fault.
- ; But we don't yet trust that [rdi] is a valid thread handle!
- ; To verify this we check the canary value stored at [rdi + 4].
- mov ecx, [rdi + 4]
- cmp ecx, 0xDEADBEEF
- jnz finish_return
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Wait until thread is finished ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- ; We'll clobber rsi if we need to set up a futex call
- mov r8, rsi
-
- finish_retry:
- ; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID:
- ; [rdi] contains the child thread's TID, and will get automatically
- ; cleared (to 0) when the child exits; this is what we'll watch for.
-
- ; Atomically check whether the target thread is still running.
- ; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; }
- xor eax, eax
- lock cmpxchg [rdi], eax
- jz finish_success
-
- ; The thread is still busy, so block until it's done.
- ; The futex system call waits until the dword at an
- ; address (rdi) deviates from an expected value (eax).
- ; See: man 2 futex
-
- ; futex: rdi = uaddr: address of the dword to watch
- ; futex: rsi = futex_op: which futex operation we want:
- ; - FUTEX_WAIT: block until the value at [rdi] changes
- ; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
- mov esi, FUTEX_WAIT
- ; futex: rdx = val: the expected value at [rdi] before it changes
- mov edx, eax
- ; futex: r10 = timeout: in case we had a deadline (we don't)
- xor r10, r10
- ; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
- ; futex: r9 = val3: ignored when FUTEX_WAIT is used
- ; futex: rax = system call ID
- mov eax, SYS_FUTEX
- ; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
- syscall
-
- ; Sometimes the thread exits after the "lock cmpxchg" instruction
- ; but before the futex call. In that case, futex returns EAGAIN.
- cmp rax, -11 ; (EAGAIN = -11)
- je finish_retry
-
- ; Any other nonzero return value means failure
- test rax, rax
- jnz finish_return
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;; Clean up after thread's exit ;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- finish_success:
- ; The thread left its function return value on the stack, read it
- mov rdx, [rdi - 8]
-
- ; The munmap system call destroys mappings created by mmap.
- ; In this case that means deallocating the stack buffer.
- ; See: man 2 munmap
-
- ; munmap: rdi = addr: lowest address of region to unmap
- ; Our rdi is near the buffer's top, so we must subtract
- sub rdi, (STACK_SIZE + GUARD_PAGE - 8)
- ; munmap: rsi = length: size of region starting from rdi
- mov esi, (STACK_SIZE + GUARD_PAGE)
- ; munmap: rax = system call ID
- mov eax, SYS_MUNMAP
- ; munmap: rax = munmap(rdi, rsi)
- syscall
-
- ; Check result of munmap: nonzero means failure
- test rax, rax
- jnz finish_return
-
- ; Check if caller gave a location (r8) to save the return value (rdx)
- test r8, r8
- jz finish_return ; caller doesn't care: gave NULL pointer
- mov [r8], rdx
- ; Note: if munmap failed, the buffer is still there, so we
- ; can safely return an error without losing the return value.
-
- finish_return:
- ret
-