; Under MIT license, see /LICENSE.txt ; Cheat sheet for Linux' x86_64 calling convention: ; ; - free to overwrite (caller should save them): ; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 ; - caller expects be kept (callee should save them): ; rbx, rbp, r12-r15 ; ; - for passing paramters to functions: ; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7 ; - for getting return values from functions: ; rax, rdx, xmm0 ; ; - for passing parameters to syscalls: ; rax, rdi, rsi, rdx, r10, r8, r9 ; - for getting return values from syscalls: ; rax, rdx ; - overwritten by syscalls (all others preserved): ; rcx, r11 section .text ; Relevant system call IDs %define SYS_GETTID 186 %define SYS_FUTEX 202 ; Relevant operations for futex %define FUTEX_LOCK_PI 6 %define FUTEX_PRIVATE_FLAG 0x80 ; Relevant bits for futex dword %define FUTEX_TID_MASK 0x3fffffff %define FUTEX_OWNER_DIED 0x40000000 %define FUTEX_WAITERS 0x80000000 ; Acquire a lock if possible, or wait until it gets released. Argument: ; rdi: struct{u32,u32,u32}* = handle of lock to acquire ; Returns zero on success, or a standard error code. global linen_lock_acquire linen_lock_acquire: ; It's handy to have a register that's 0 during most of this function xor esi, esi ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Check validity of argument ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Return EINVAL (-22) if rdi is NULL or otherwise invalid lea eax, [rsi - 22] ; mov eax, -22 test rdi, rdi jz acquire_return ; rdi is NULL ; rdi is nonzero, so let's just assume it's a valid pointer; ; if that assumption is wrong we'll get a segmentation fault. ; But we don't yet trust that [rdi] is a valid lock handle! ; To verify this we check the canary value stored at [rdi + 8]. cmp dword [rdi + 8], 0xCAFEBABE ; Oh CISC... jnz acquire_return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Check ownership of lock ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Lock owners are identified by their TID; let's find ours. ; The gettid system call simply returns our Linux thread ID. ; See: man 2 gettid ; gettid: rax = system call ID xor eax, eax mov al, SYS_GETTID ; gettid: rax = gettid() syscall ; Save a copy of our TID (no need for an error check) mov edx, eax ; There are four possible ownership situations for the lock, ; which we can distinguish based on the dword value at [rdi]: ; - Case 1: if [rdi] contains zero, then the lock is available. ; - Case 2: if [rdi] has any of its highest 2 bits set, then the ; lock isn't free, and kernel intervention is required. ; - Case 3: if the lower 30 bits of [rdi] contain our TID, ; then we already own it (recursive acquisition). ; - Case 4: if the lower 30 bits of [rdi] contain another TID ; and the high-bit flags aren't set, then we just wait ; until we can acquire the lock using atomic operations ; or, optionally, a futex call (usually more efficient). ; Atomically check whether the lock is owned by another thread, ; and if not, try to take ownership by writing our TID to [rdi]. ; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; } xor eax, eax lock cmpxchg [rdi], edx jz acquire_success ; case 1 ; The lock isn't free, so let's check how "clean" its state is. ; The following flags are set by the kernel (see futex below): ; - FUTEX_OWNER_DIED: the lock's owner died, so it's actually free ; (but first the kernel needs to clean up) ; - FUTEX_WAITERS: we aren't the only one waiting for this lock ; (so let's sleep until the kernel wakes us up) ; Either way, we need the kernel's help, so jump to the futex call. test eax, (FUTEX_OWNER_DIED | FUTEX_WAITERS) jnz acquire_futex ; case 2 ; It seems someone has the lock, check who: it may already be us. ; If so, this is a recursive acquisition, good, let's continue. and eax, FUTEX_TID_MASK cmp eax, edx je acquire_success ; case 3 ; Someone else has the lock, but we're the only one waiting for it. ; System calls are expensive, so let's try a short spin loop first, ; hoping it'll get released soon. This is arguably unnecessary, as ; it's only beneficial when two threads are more or less "in sync", ; so in most real-world cases you can delete this with no downside. ; Loop counter mov sil, 10 acquire_spinloop: ; The "pause" instruction is specially designed for loops like this ; and conserves power. It causes a small delay (makes sense here). pause ; Atomically check whether the lock is owned by another thread, ; and if not, try to take ownership by writing our TID to [rdi]. ; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; } xor eax, eax lock cmpxchg [rdi], edx jz acquire_success ; Decrement loop counter until zero dec esi jnz acquire_spinloop ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Let the kernel handle it ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; acquire_futex: ; The futex system call waits for the dword at an address (rdi) ; changes in a certain way, as described above and in the futex ; manual's section on so-called "priority-inheritance futexes". ; See: man 2 futex ; futex: rdi = uaddr: address of the dword to watch ; futex: rsi = futex_op: which futex operation we want: ; - FUTEX_LOCK_PI: block until lock's owner uses FUTEX_UNLOCK_PI ; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process mov sil, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: r10 = timeout: in case we had a deadline (we don't) xor r10, r10 ; futex: rdx = val: ignored when FUTEX_LOCK_PI is used ; futex: r8 = uaddr2: ignored when FUTEX_LOCK_PI is used ; futex: r9 = val3: ignored when FUTEX_LOCK_PI is used ; futex: rax = system call ID xor eax, eax mov al, SYS_FUTEX ; futex: rax = futex(rdi, rsi, (rdx), r10, (r8), (r9)) syscall ; Sometimes the lock is released after the "lock cmpxchg" instruction ; but just before the futex call. In that case, futex returns EAGAIN. cmp eax, -11 ; (-EAGAIN) je acquire_futex ; Any other negative return value means failure test eax, eax jnz acquire_return ; Indicate that we made a futex call (see below for why) xor edx, edx ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Update the recursion counter ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; acquire_success: ; Read the recursion counter (we have the lock: no need for atomics) mov ecx, [rdi + 4] ; The value in edx depends on how we came to the acquire_success label: ; 1) We jumped here after a successful "lock cmpxchg": edx has our TID ; 2) We finished a successful futex call: edx was set to 0 (see above) test edx, edx ; Why do we care? Well, in the latter case, the futex call may have been ; necessary because there was a problem (i.e. FUTEX_OWNER_DIED was set), ; in which case the recursion counter is stale and hence must be reset. ; In any other case, whoever released the lock should've reset it already. cmovz ecx, edx ; ecx = 0 ; Increment the recursion counter and write it back to memory ; (if the lock is being used non-recursively, it should be 1) inc ecx mov [rdi + 4], ecx ; Lock acquisition was successful, so we'll return 0. In most cases ; eax is already 0; we only need this if the recursion counter > 1. xor eax, eax acquire_return: ret