From 7231a21b00028a52d7938131bfeca4d663d09071 Mon Sep 17 00:00:00 2001 From: Prefetch Date: Sat, 22 Jul 2023 19:22:06 +0200 Subject: Add locks, with some minor improvements to threads --- lib/lock_acquire.asm | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/lock_release.asm | 132 ++++++++++++++++++++++++++++++++ lib/thread_create.asm | 24 +++--- lib/thread_finish.asm | 48 ++++++------ 4 files changed, 375 insertions(+), 33 deletions(-) create mode 100644 lib/lock_acquire.asm create mode 100644 lib/lock_release.asm (limited to 'lib') diff --git a/lib/lock_acquire.asm b/lib/lock_acquire.asm new file mode 100644 index 0000000..f32ba6a --- /dev/null +++ b/lib/lock_acquire.asm @@ -0,0 +1,204 @@ +; Under MIT license, see /LICENSE.txt + + +; Cheat sheet for Linux' x86_64 calling convention: +; +; - free to overwrite (caller should save them): +; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 +; - caller expects be kept (callee should save them): +; rbx, rbp, r12-r15 +; +; - for passing paramters to functions: +; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7 +; - for getting return values from functions: +; rax, rdx, xmm0 +; +; - for passing parameters to syscalls: +; rax, rdi, rsi, rdx, r10, r8, r9 +; - for getting return values from syscalls: +; rax, rdx +; - overwritten by syscalls (all others preserved): +; rcx, r11 + + +section .text + + +; Relevant system call IDs +%define SYS_GETTID 186 +%define SYS_FUTEX 202 + +; Relevant operations for futex +%define FUTEX_LOCK_PI 6 +%define FUTEX_PRIVATE_FLAG 0x80 + +; Relevant bits for futex dword +%define FUTEX_TID_MASK 0x3fffffff +%define FUTEX_OWNER_DIED 0x40000000 +%define FUTEX_WAITERS 0x80000000 + + +; Acquire a lock if possible, or wait until it gets released. Argument: +; rdi: struct{u32,u32,u32}* = handle of lock to acquire +; Returns zero on success, or a standard error code. +global linen_lock_acquire +linen_lock_acquire: + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Check validity of argument ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Return EINVAL if rdi is NULL or otherwise invalid + mov eax, -22 ; (EINVAL = -22) + + test rdi, rdi + jz acquire_return ; rdi is NULL + + ; rdi is nonzero, so let's just assume it's a valid pointer; + ; if that assumption is wrong we'll get a segmentation fault. + ; But we don't yet trust that [rdi] is a valid lock handle! + ; To verify this we check the canary value stored at [rdi + 8]. + mov ecx, [rdi + 8] + cmp ecx, 0xCAFEBABE + jnz acquire_return + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Check ownership of lock ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Lock owners are identified by their TID; let's find ours. + ; The gettid system call simply returns our Linux thread ID. + ; See: man 2 gettid + + ; gettid: rax = system call ID + mov eax, SYS_GETTID + ; gettid: rax = gettid() + syscall + + ; Save a copy of our TID (no need for an error check) + mov edx, eax + + ; There are four possible ownership situations for the lock, + ; which we can distinguish based on the dword value at [rdi]: + ; - Case 1: if [rdi] contains zero, then the lock is available. + ; - Case 2: if [rdi] has any of its highest 2 bits set, then the + ; lock isn't free, and kernel intervention is required. + ; - Case 3: if the lower 30 bits of [rdi] contain our TID, + ; then we already own it (recursive acquisition). + ; - Case 4: if the lower 30 bits of [rdi] contain another TID + ; and the high-bit flags aren't set, then we just wait + ; until we can acquire the lock using atomic operations + ; or, optionally, a futex call (usually more efficient). + + ; Atomically check whether the lock is owned by another thread, + ; and if not, try to take ownership by writing our TID to [rdi]. + ; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; } + xor eax, eax + lock cmpxchg [rdi], edx + jz acquire_success ; case 1 + + ; The lock isn't free, so let's check how "clean" its state is. + ; The following flags are set by the kernel (see futex below): + ; - FUTEX_OWNER_DIED: the lock's owner died, so it's actually free + ; (but first the kernel needs to clean up) + ; - FUTEX_WAITERS: we aren't the only one waiting for this lock + ; (so let's sleep until the kernel wakes us up) + ; Either way, we need the kernel's help, so jump to the futex call. + test eax, (FUTEX_OWNER_DIED | FUTEX_WAITERS) + jnz acquire_futex ; case 2 + + ; It seems someone has the lock, check who: it may already be us. + ; If so, this is a recursive acquisition, good, let's continue. + and eax, FUTEX_TID_MASK + cmp eax, edx + je acquire_success ; case 3 + + ; Someone else has the lock, but we're the only one waiting for it. + ; System calls are expensive, so let's try a short spin loop first, + ; hoping it'll get released soon. This is arguably unnecessary, as + ; it's only beneficial when two threads are more or less "in sync", + ; so in most real-world cases you can delete this with no downside. + + ; Loop counter + mov ecx, 10 + acquire_spinloop: + ; The "pause" instruction is specially designed for loops like this + ; and conserves power. It causes a small delay (makes sense here). + pause + + ; Atomically check whether the lock is owned by another thread, + ; and if not, try to take ownership by writing our TID to [rdi]. + ; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; } + xor eax, eax + lock cmpxchg [rdi], edx + jz acquire_success + + ; Decrement loop counter until zero + dec ecx + jnz acquire_spinloop + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Let the kernel handle it ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + acquire_futex: + ; The futex system call waits for the dword at an address (rdi) + ; changes in a certain way, as described above and in the futex + ; manual's section on so-called "priority-inheritance futexes". + ; See: man 2 futex + + ; futex: rdi = uaddr: address of the dword to watch + ; futex: rsi = futex_op: which futex operation we want: + ; - FUTEX_LOCK_PI: block until lock's owner uses FUTEX_UNLOCK_PI + ; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process + mov esi, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) + ; futex: r10 = timeout: in case we had a deadline (we don't) + xor r10, r10 + ; futex: rdx = val: ignored when FUTEX_LOCK_PI is used + ; futex: r8 = uaddr2: ignored when FUTEX_LOCK_PI is used + ; futex: r9 = val3: ignored when FUTEX_LOCK_PI is used + ; futex: rax = system call ID + mov eax, SYS_FUTEX + ; futex: rax = futex(rdi, rsi, (rdx), r10, (r8), (r9)) + syscall + + ; Sometimes the lock is released after the "lock cmpxchg" instruction + ; but just before the futex call. In that case, futex returns EAGAIN. + cmp rax, -11 ; (-EAGAIN) + je acquire_futex + + ; Any other negative return value means failure + test rax, rax + jnz acquire_return + + ; Indicate that we made a futex call (see below for why) + xor edx, edx + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Update the recursion counter ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + acquire_success: + ; Read the recursion counter (we have the lock: no need for atomics) + mov ecx, [rdi + 4] + + ; The value in edx depends on how we came to the acquire_success label: + ; 1) We jumped here after a successful "lock cmpxchg": edx has our TID + ; 2) We finished a successful futex call: edx was set to 0 (see above) + test edx, edx + ; Why do we care? Well, in the latter case, the futex call may have been + ; necessary because there was a problem (i.e. FUTEX_OWNER_DIED was set), + ; in which case the recursion counter is stale and hence must be reset. + ; In any other case, whoever released the lock should've reset it already. + cmovz ecx, edx ; ecx = 0 + + ; Increment the recursion counter and write it back to memory + ; (if the lock is being used non-recursively, it should be 1) + inc ecx + mov [rdi + 4], ecx + + ; Lock acquisition was successful, so we'll return 0. In most cases + ; eax is already 0; we only need this if the recursion counter > 1. + xor eax, eax + + acquire_return: + ret diff --git a/lib/lock_release.asm b/lib/lock_release.asm new file mode 100644 index 0000000..f86caa2 --- /dev/null +++ b/lib/lock_release.asm @@ -0,0 +1,132 @@ +; Under MIT license, see /LICENSE.txt + + +; Cheat sheet for Linux' x86_64 calling convention: +; +; - free to overwrite (caller should save them): +; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 +; - caller expects be kept (callee should save them): +; rbx, rbp, r12-r15 +; +; - for passing paramters to functions: +; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7 +; - for getting return values from functions: +; rax, rdx, xmm0 +; +; - for passing parameters to syscalls: +; rax, rdi, rsi, rdx, r10, r8, r9 +; - for getting return values from syscalls: +; rax, rdx +; - overwritten by syscalls (all others preserved): +; rcx, r11 + + +section .text + + +; Relevant system call IDs +%define SYS_GETTID 186 +%define SYS_FUTEX 202 + +; Relevant operations for futex +%define FUTEX_UNLOCK_PI 7 +%define FUTEX_PRIVATE_FLAG 0x80 + +; Relevant bits for futex dword +%define FUTEX_TID_MASK 0x3fffffff +%define FUTEX_OWNER_DIED 0x40000000 +%define FUTEX_WAITERS 0x80000000 + + +; Release an acquired lock if we're who acquired it. Argument: +; rdi: struct{u32,u32,u32}* = handle of lock to release +; Returns zero on success, or a standard error code. +global linen_lock_release +linen_lock_release: + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Check validity of argument ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Return EINVAL if rdi is NULL or invalid + mov eax, -22 ; (EINVAL = -22) + + test rdi, rdi + jz release_return ; rdi is NULL + + ; rdi is nonzero, so let's just assume it's a valid pointer; + ; if that assumption is wrong we'll get a segmentation fault. + ; But we don't yet trust that [rdi] is a valid lock handle! + ; To verify this we check the canary value stored at [rdi + 8]. + mov ecx, [rdi + 8] + cmp ecx, 0xCAFEBABE + jnz release_return + + ; Lock owners are identified by their TID; let's find ours. + ; The gettid system call simply returns our Linux thread ID. + ; See: man 2 gettid + + ; gettid: rax = system call ID + mov eax, SYS_GETTID + ; gettid: rax = gettid() + syscall + + ; Save a copy of our TID (no need for an error check) + mov edx, eax + + ; Return EPERM if this lock currently doesn't belong to us + mov eax, -1 ; (EPERM = -1) + + ; Read the futex dword at [rdi] and keep its lowest 30 bits + mov ecx, [rdi] + and ecx, FUTEX_TID_MASK + ; Those bits contain the owner's TID; it should be our TID + cmp ecx, edx + jne release_return + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; (Partially) release our lock ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Decrement the recursion counter. If it's still > 1, we're done here. + dec dword [rdi + 4] + jnz release_success + ; If it reaches 0, it's time for a full release by setting [rdi] to 0. + + ; Restore our saved TID to eax for "lock cmpxchg" below + mov eax, edx + + ; Atomically try to set the dword at [rdi] to 0 if it was equal to our TID. + ; if ([rdi] == eax]) { [rdi] = 0; goto release_success; } else { eax = [rdi]; } + xor ecx, ecx + lock cmpxchg [rdi], ecx + je release_success + + ; We failed because [rdi] wasn't equal to our TID. In theory, + ; that can mean only one thing: [rdi] = (edx | FUTEX_WAITERS). + ; In that case we need to ask the kernel to wake up the threads + ; who are waiting (via a futex system call) for [rdi] to change. + ; See: man 2 futex + + ; futex: rdi = uaddr: address of the dword to announce for + ; futex: rsi = futex_op: which futex operation we want: + ; - FUTEX_UNLOCK_PI: wake up one thread sleeping via FUTEX_LOCK_PI + ; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process + mov esi, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: futex_op + ; futex: rdx = val: ignored when FUTEX_UNLOCK_PI is used + ; futex: r10 = timeout: ignored when FUTEX_UNLOCK_PI is used + ; futex: r8 = uaddr2: ignored when FUTEX_UNLOCK_PI is used + ; futex: r9 = val3: ignored when FUTEX_UNLOCK_PI is used + ; futex: rax = system call ID + mov eax, SYS_FUTEX + ; futex: rax = futex(rdi, rsi, (rdx), (r10), (r8), (r9)) + syscall + + ; Check result of futex: nonzero means failure + test rax, rax + jnz release_return + + release_success: + xor eax, eax + + release_return: + ret diff --git a/lib/thread_create.asm b/lib/thread_create.asm index 2ccd1fa..9a6fe78 100644 --- a/lib/thread_create.asm +++ b/lib/thread_create.asm @@ -3,9 +3,9 @@ ; Cheat sheet for Linux' x86_64 calling convention: ; -; - free to overwrite; the caller should save them: +; - free to overwrite (caller should save them): ; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 -; - caller expects no change; callee should save them: +; - caller expects be kept (callee should save them): ; rbx, rbp, r12-r15 ; ; - for passing paramters to functions: @@ -63,9 +63,9 @@ section .text ; Create a new thread executing a given function. Arguments: -; rdi: u32** = where to put the thread handle -; rsi: void* (*)(void*) = function to make the child run -; rdx: void* = single argument for function +; rdi: struct{u32,u32}** = where to put the thread handle +; rsi: void* (*)(void*) = function to make the child run +; rdx: void* = single argument for function ; Returns zero on success, or a standard error code. global linen_thread_create linen_thread_create: @@ -79,9 +79,9 @@ linen_thread_create: ; Return EINVAL if any argument is NULL mov eax, -22 ; (EINVAL = -22) test rdi, rdi - jz create_end ; Nowhere to store the thread handle + jz create_return ; Nowhere to store the thread handle test rsi, rsi - jz create_end ; No function for the thread to run + jz create_return ; No function for the thread to run ; Note: we allow rdx to be NULL; in that case the worst that can happen ; is a segmentation fault in the user's code (not really our problem). @@ -104,7 +104,7 @@ linen_thread_create: mov esi, (STACK_SIZE + GUARD_PAGE) ; mmap: rdx = prot: mprotect-style access permissions mov edx, (PROT_WRITE | PROT_READ) - ; mmap: r10 = flags: settings for mapping + ; mmap: r10 = flags: configuration flags for mapping: ; - MAP_ANONYMOUS: there is no file backing this buffer ; - MAP_PRIVATE: only this process can see thread's stack ; - MAP_STACK: no-op; inform kernel that this is a stack @@ -126,7 +126,7 @@ linen_thread_create: ; Check result of mmap: negative means failure, ; otherwise rax is the address of the new mapping. test rax, rax - js create_end + js create_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Revoke guard page's R/W permissions ;;;; @@ -151,7 +151,7 @@ linen_thread_create: ; Check result of mprotect: nonzero means failure test rax, rax - jnz create_end + jnz create_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Spawn a thread with the new stack ;;;; @@ -202,7 +202,7 @@ linen_thread_create: ; Check result of clone: test rax, rax - js create_end ; Negative means failure + js create_return ; Negative means failure jnz create_success ; Positive means we're in the parent thread ; Zero means we're in the child thread @@ -256,7 +256,7 @@ linen_thread_create: ; Return 0 for success xor eax, eax - create_end: + create_return: ; Restore callee-save registers pop rbx diff --git a/lib/thread_finish.asm b/lib/thread_finish.asm index 9392f1c..860b0a4 100644 --- a/lib/thread_finish.asm +++ b/lib/thread_finish.asm @@ -3,9 +3,9 @@ ; Cheat sheet for Linux' x86_64 calling convention: ; -; - free to overwrite; the caller should save them: +; - free to overwrite (caller should save them): ; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 -; - caller expects no change; callee should save them: +; - caller expects be kept (callee should save them): ; rbx, rbp, r12-r15 ; ; - for passing paramters to functions: @@ -28,8 +28,8 @@ section .text %define SYS_MUNMAP 11 %define SYS_FUTEX 202 -; Relevant flags for futex -%define FUTEX_WAIT 0x00 +; Relevant operations for futex +%define FUTEX_WAIT 0 %define FUTEX_PRIVATE_FLAG 0x80 @@ -38,8 +38,8 @@ section .text ; Wait for thread to exit, save its return value, and clean up. Arguments: -; rdi: u32* = handle of the thread to wait for -; rsi: void** = where to put the void* returned by the thread +; rdi: struct{u32,u32}* = handle of the thread to wait for +; rsi: void** = where to put void* returned by thread ; Returns zero on success, or a standard error code. global linen_thread_finish linen_thread_finish: @@ -51,15 +51,15 @@ linen_thread_finish: mov eax, -22 ; (EINVAL = -22) test rdi, rdi - jz join_end ; rdi is NULL + jz finish_return ; rdi is NULL - ; rdi is nonzero, so let's just assume it's a valid pointer. - ; If that assumption is wrong we'll get a segmentation fault. + ; rdi is nonzero, so let's just assume it's a valid pointer; + ; if that assumption is wrong we'll get a segmentation fault. ; But we don't yet trust that [rdi] is a valid thread handle! - ; To verify this, we check the canary value stored at [rdi + 4]. + ; To verify this we check the canary value stored at [rdi + 4]. mov ecx, [rdi + 4] cmp ecx, 0xDEADBEEF - jnz join_end + jnz finish_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Wait until thread is finished ;;;; @@ -68,15 +68,16 @@ linen_thread_finish: ; We'll clobber rsi if we need to set up a futex call mov r8, rsi + finish_retry: ; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID: ; [rdi] contains the child thread's TID, and will get automatically ; cleared (to 0) when the child exits; this is what we'll watch for. ; Atomically check whether the target thread is still running. - ; if ([rdi] == 0) { goto join_done; } else { eax = [rdi]; } + ; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; } xor eax, eax lock cmpxchg [rdi], eax - jz join_done + jz finish_success ; The thread is still busy, so block until it's done. ; The futex system call waits until the dword at an @@ -84,7 +85,7 @@ linen_thread_finish: ; See: man 2 futex ; futex: rdi = uaddr: address of the dword to watch - ; futex: rsi = futex_op: which futex operation we want + ; futex: rsi = futex_op: which futex operation we want: ; - FUTEX_WAIT: block until the value at [rdi] changes ; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why mov esi, FUTEX_WAIT @@ -93,21 +94,26 @@ linen_thread_finish: ; futex: r10 = timeout: in case we had a deadline (we don't) xor r10, r10 ; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used - ; futex: r9 = val3: ignored when FUTEX WAIT is used + ; futex: r9 = val3: ignored when FUTEX_WAIT is used ; futex: rax = system call ID mov eax, SYS_FUTEX ; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9)) syscall - ; Check result of futex: negative means failure + ; Sometimes the thread exits after the "lock cmpxchg" instruction + ; but before the futex call. In that case, futex returns EAGAIN. + cmp rax, -11 ; (EAGAIN = -11) + je finish_retry + + ; Any other nonzero return value means failure test rax, rax - jnz join_end + jnz finish_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Clean up after thread's exit ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - join_done: + finish_success: ; The thread left its function return value on the stack, read it mov rdx, [rdi - 8] @@ -127,15 +133,15 @@ linen_thread_finish: ; Check result of munmap: nonzero means failure test rax, rax - jnz join_end + jnz finish_return ; Check if caller gave a location (r8) to save the return value (rdx) test r8, r8 - jz join_end ; caller doesn't care: gave NULL pointer + jz finish_return ; caller doesn't care: gave NULL pointer mov [r8], rdx ; Note: if munmap failed, the buffer is still there, so we ; can safely return an error without losing the return value. - join_end: + finish_return: ret -- cgit v1.2.3