From 7231a21b00028a52d7938131bfeca4d663d09071 Mon Sep 17 00:00:00 2001
From: Prefetch
Date: Sat, 22 Jul 2023 19:22:06 +0200
Subject: Add locks, with some minor improvements to threads

---
 Makefile              |   2 +-
 lib/lock_acquire.asm  | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/lock_release.asm  | 132 ++++++++++++++++++++++++++++++++
 lib/thread_create.asm |  24 +++---
 lib/thread_finish.asm |  48 ++++++------
 linen.h               |  21 +++++-
 6 files changed, 396 insertions(+), 35 deletions(-)
 create mode 100644 lib/lock_acquire.asm
 create mode 100644 lib/lock_release.asm

diff --git a/Makefile b/Makefile
index 00f1816..33fe35b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-liblinen.so: lib/thread_create.o lib/thread_finish.o
+liblinen.so: lib/thread_create.o lib/thread_finish.o lib/lock_acquire.o lib/lock_release.o
 	gcc -shared -o $@ $^
 
 lib/%.o: lib/%.asm
diff --git a/lib/lock_acquire.asm b/lib/lock_acquire.asm
new file mode 100644
index 0000000..f32ba6a
--- /dev/null
+++ b/lib/lock_acquire.asm
@@ -0,0 +1,204 @@
+; Under MIT license, see /LICENSE.txt
+
+
+; Cheat sheet for Linux' x86_64 calling convention:
+;
+; - free to overwrite (caller should save them):
+;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
+; - caller expects be kept (callee should save them):
+;       rbx, rbp, r12-r15
+;
+; - for passing paramters to functions:
+;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
+; - for getting return values from functions:
+;       rax, rdx, xmm0
+;
+; - for passing parameters to syscalls:
+;       rax, rdi, rsi, rdx, r10, r8, r9
+; - for getting return values from syscalls:
+;       rax, rdx
+; - overwritten by syscalls (all others preserved):
+;       rcx, r11
+
+
+section .text
+
+
+; Relevant system call IDs
+%define SYS_GETTID  186
+%define SYS_FUTEX   202
+
+; Relevant operations for futex
+%define FUTEX_LOCK_PI      6
+%define FUTEX_PRIVATE_FLAG 0x80
+
+; Relevant bits for futex dword
+%define FUTEX_TID_MASK   0x3fffffff
+%define FUTEX_OWNER_DIED 0x40000000
+%define FUTEX_WAITERS    0x80000000
+
+
+; Acquire a lock if possible, or wait until it gets released. Argument:
+;	rdi: struct{u32,u32,u32}* = handle of lock to acquire
+; Returns zero on success, or a standard error code.
+global linen_lock_acquire
+linen_lock_acquire:
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Check validity of argument ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+		; Return EINVAL if rdi is NULL or otherwise invalid
+		mov eax, -22 ; (EINVAL = -22)
+
+		test rdi, rdi
+		jz acquire_return ; rdi is NULL
+
+		; rdi is nonzero, so let's just assume it's a valid pointer;
+		; if that assumption is wrong we'll get a segmentation fault.
+		; But we don't yet trust that [rdi] is a valid lock handle!
+		; To verify this we check the canary value stored at [rdi + 8].
+		mov ecx, [rdi + 8]
+		cmp ecx, 0xCAFEBABE
+		jnz acquire_return
+
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Check ownership of lock ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+		; Lock owners are identified by their TID; let's find ours.
+		; The gettid system call simply returns our Linux thread ID.
+		; See: man 2 gettid
+
+		; gettid: rax = system call ID
+		mov eax, SYS_GETTID
+		; gettid: rax = gettid()
+		syscall
+
+		; Save a copy of our TID (no need for an error check)
+		mov edx, eax
+
+		; There are four possible ownership situations for the lock,
+		; which we can distinguish based on the dword value at [rdi]:
+		; - Case 1: if [rdi] contains zero, then the lock is available.
+		; - Case 2: if [rdi] has any of its highest 2 bits set, then the
+		;           lock isn't free, and kernel intervention is required.
+		; - Case 3: if the lower 30 bits of [rdi] contain our TID,
+		;           then we already own it (recursive acquisition).
+		; - Case 4: if the lower 30 bits of [rdi] contain another TID
+		;           and the high-bit flags aren't set, then we just wait
+		;           until we can acquire the lock using atomic operations
+		;           or, optionally, a futex call (usually more efficient).
+
+		; Atomically check whether the lock is owned by another thread,
+		; and if not, try to take ownership by writing our TID to [rdi].
+		; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; }
+		xor eax, eax
+		lock cmpxchg [rdi], edx
+		jz acquire_success ; case 1
+
+		; The lock isn't free, so let's check how "clean" its state is.
+		; The following flags are set by the kernel (see futex below):
+		; - FUTEX_OWNER_DIED: the lock's owner died, so it's actually free
+		;                     (but first the kernel needs to clean up)
+		; - FUTEX_WAITERS:    we aren't the only one waiting for this lock
+		;                     (so let's sleep until the kernel wakes us up)
+		; Either way, we need the kernel's help, so jump to the futex call.
+		test eax, (FUTEX_OWNER_DIED | FUTEX_WAITERS)
+		jnz acquire_futex ; case 2
+
+		; It seems someone has the lock, check who: it may already be us.
+		; If so, this is a recursive acquisition, good, let's continue.
+		and eax, FUTEX_TID_MASK
+		cmp eax, edx
+		je acquire_success ; case 3
+
+		; Someone else has the lock, but we're the only one waiting for it.
+		; System calls are expensive, so let's try a short spin loop first,
+		; hoping it'll get released soon. This is arguably unnecessary, as
+		; it's only beneficial when two threads are more or less "in sync",
+		; so in most real-world cases you can delete this with no downside.
+
+		; Loop counter
+		mov ecx, 10
+	acquire_spinloop:
+		; The "pause" instruction is specially designed for loops like this
+		; and conserves power. It causes a small delay (makes sense here).
+		pause
+
+		; Atomically check whether the lock is owned by another thread,
+		; and if not, try to take ownership by writing our TID to [rdi].
+		; if ([rdi] == 0) { [rdi] = edx; goto acquire_success; } else { eax = [rdi]; }
+		xor eax, eax
+		lock cmpxchg [rdi], edx
+		jz acquire_success
+
+		; Decrement loop counter until zero
+		dec ecx
+		jnz acquire_spinloop
+
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Let the kernel handle it ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+	acquire_futex:
+		; The futex system call waits for the dword at an address (rdi)
+		; changes in a certain way, as described above and in the futex
+		; manual's section on so-called "priority-inheritance futexes".
+		; See: man 2 futex
+
+		; futex: rdi = uaddr: address of the dword to watch
+		; futex: rsi = futex_op: which futex operation we want:
+		; - FUTEX_LOCK_PI:      block until lock's owner uses FUTEX_UNLOCK_PI
+		; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
+		mov esi, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
+		; futex: r10 = timeout: in case we had a deadline (we don't)
+		xor r10, r10
+		; futex: rdx = val:   ignored when FUTEX_LOCK_PI is used
+		; futex: r8 = uaddr2: ignored when FUTEX_LOCK_PI is used
+		; futex: r9 = val3:   ignored when FUTEX_LOCK_PI is used
+		; futex: rax = system call ID
+		mov eax, SYS_FUTEX
+		; futex: rax = futex(rdi, rsi, (rdx), r10, (r8), (r9))
+		syscall
+ 
+		; Sometimes the lock is released after the "lock cmpxchg" instruction
+		; but just before the futex call. In that case, futex returns EAGAIN.
+		cmp rax, -11 ; (-EAGAIN)
+		je acquire_futex
+
+		; Any other negative return value means failure
+		test rax, rax
+		jnz acquire_return
+
+		; Indicate that we made a futex call (see below for why)
+		xor edx, edx
+
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Update the recursion counter ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+	acquire_success:
+		; Read the recursion counter (we have the lock: no need for atomics)
+		mov ecx, [rdi + 4]
+
+		; The value in edx depends on how we came to the acquire_success label:
+		; 1) We jumped here after a successful "lock cmpxchg": edx has our TID
+		; 2) We finished a successful futex call: edx was set to 0 (see above)
+		test edx, edx
+		; Why do we care? Well, in the latter case, the futex call may have been
+		; necessary because there was a problem (i.e. FUTEX_OWNER_DIED was set),
+		; in which case the recursion counter is stale and hence must be reset.
+		; In any other case, whoever released the lock should've reset it already.
+		cmovz ecx, edx ; ecx = 0
+
+		; Increment the recursion counter and write it back to memory
+		; (if the lock is being used non-recursively, it should be 1)
+		inc ecx
+		mov [rdi + 4], ecx
+
+		; Lock acquisition was successful, so we'll return 0. In most cases
+		; eax is already 0; we only need this if the recursion counter > 1.
+		xor eax, eax
+
+	acquire_return:
+		ret
diff --git a/lib/lock_release.asm b/lib/lock_release.asm
new file mode 100644
index 0000000..f86caa2
--- /dev/null
+++ b/lib/lock_release.asm
@@ -0,0 +1,132 @@
+; Under MIT license, see /LICENSE.txt
+
+
+; Cheat sheet for Linux' x86_64 calling convention:
+;
+; - free to overwrite (caller should save them):
+;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
+; - caller expects be kept (callee should save them):
+;       rbx, rbp, r12-r15
+;
+; - for passing paramters to functions:
+;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
+; - for getting return values from functions:
+;       rax, rdx, xmm0
+;
+; - for passing parameters to syscalls:
+;       rax, rdi, rsi, rdx, r10, r8, r9
+; - for getting return values from syscalls:
+;       rax, rdx
+; - overwritten by syscalls (all others preserved):
+;       rcx, r11
+
+
+section .text
+
+
+; Relevant system call IDs
+%define SYS_GETTID  186
+%define SYS_FUTEX   202
+
+; Relevant operations for futex
+%define FUTEX_UNLOCK_PI    7
+%define FUTEX_PRIVATE_FLAG 0x80
+
+; Relevant bits for futex dword
+%define FUTEX_TID_MASK   0x3fffffff
+%define FUTEX_OWNER_DIED 0x40000000
+%define FUTEX_WAITERS    0x80000000
+
+
+; Release an acquired lock if we're who acquired it. Argument:
+;	rdi: struct{u32,u32,u32}* = handle of lock to release
+; Returns zero on success, or a standard error code.
+global linen_lock_release
+linen_lock_release:
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; Check validity of argument ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+		; Return EINVAL if rdi is NULL or invalid
+		mov eax, -22 ; (EINVAL = -22)
+
+		test rdi, rdi
+		jz release_return ; rdi is NULL
+
+		; rdi is nonzero, so let's just assume it's a valid pointer;
+		; if that assumption is wrong we'll get a segmentation fault.
+		; But we don't yet trust that [rdi] is a valid lock handle!
+		; To verify this we check the canary value stored at [rdi + 8].
+		mov ecx, [rdi + 8]
+		cmp ecx, 0xCAFEBABE
+		jnz release_return
+
+		; Lock owners are identified by their TID; let's find ours.
+		; The gettid system call simply returns our Linux thread ID.
+		; See: man 2 gettid
+
+		; gettid: rax = system call ID
+		mov eax, SYS_GETTID
+		; gettid: rax = gettid()
+		syscall
+
+		; Save a copy of our TID (no need for an error check)
+		mov edx, eax
+
+		; Return EPERM if this lock currently doesn't belong to us
+		mov eax, -1 ; (EPERM = -1)
+
+		; Read the futex dword at [rdi] and keep its lowest 30 bits
+		mov ecx, [rdi]
+		and ecx, FUTEX_TID_MASK
+		; Those bits contain the owner's TID; it should be our TID
+		cmp ecx, edx
+		jne release_return
+
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+		;;;; (Partially) release our lock ;;;;
+		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+		; Decrement the recursion counter. If it's still > 1, we're done here.
+		dec dword [rdi + 4]
+		jnz release_success
+		; If it reaches 0, it's time for a full release by setting [rdi] to 0.
+
+		; Restore our saved TID to eax for "lock cmpxchg" below
+		mov eax, edx
+
+		; Atomically try to set the dword at [rdi] to 0 if it was equal to our TID.
+		; if ([rdi] == eax]) { [rdi] = 0; goto release_success; } else { eax = [rdi]; }
+		xor ecx, ecx
+		lock cmpxchg [rdi], ecx
+		je release_success
+
+		; We failed because [rdi] wasn't equal to our TID. In theory,
+		; that can mean only one thing: [rdi] = (edx | FUTEX_WAITERS).
+		; In that case we need to ask the kernel to wake up the threads
+		; who are waiting (via a futex system call) for [rdi] to change.
+		; See: man 2 futex
+
+		; futex: rdi = uaddr: address of the dword to announce for
+		; futex: rsi = futex_op: which futex operation we want:
+		; - FUTEX_UNLOCK_PI:    wake up one thread sleeping via FUTEX_LOCK_PI
+		; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
+		mov esi, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: futex_op
+		; futex: rdx = val:     ignored when FUTEX_UNLOCK_PI is used
+		; futex: r10 = timeout: ignored when FUTEX_UNLOCK_PI is used
+		; futex: r8 = uaddr2:   ignored when FUTEX_UNLOCK_PI is used
+		; futex: r9 = val3:     ignored when FUTEX_UNLOCK_PI is used
+		; futex: rax = system call ID
+		mov eax, SYS_FUTEX
+		; futex: rax = futex(rdi, rsi, (rdx), (r10), (r8), (r9))
+		syscall
+
+		; Check result of futex: nonzero means failure
+		test rax, rax
+		jnz release_return
+
+	release_success:
+		xor eax, eax
+
+	release_return:
+		ret
diff --git a/lib/thread_create.asm b/lib/thread_create.asm
index 2ccd1fa..9a6fe78 100644
--- a/lib/thread_create.asm
+++ b/lib/thread_create.asm
@@ -3,9 +3,9 @@
 
 ; Cheat sheet for Linux' x86_64 calling convention:
 ;
-; - free to overwrite; the caller should save them:
+; - free to overwrite (caller should save them):
 ;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects no change; callee should save them:
+; - caller expects be kept (callee should save them):
 ;       rbx, rbp, r12-r15
 ;
 ; - for passing paramters to functions:
@@ -63,9 +63,9 @@ section .text
 
 
 ; Create a new thread executing a given function. Arguments:
-;	rdi: u32**            = where to put the thread handle
-;	rsi: void* (*)(void*) = function to make the child run
-;	rdx: void*            = single argument for function
+;	rdi: struct{u32,u32}** = where to put the thread handle
+;	rsi: void* (*)(void*)  = function to make the child run
+;	rdx: void*             = single argument for function
 ; Returns zero on success, or a standard error code.
 global linen_thread_create
 linen_thread_create:
@@ -79,9 +79,9 @@ linen_thread_create:
 		; Return EINVAL if any argument is NULL
 		mov eax, -22 ; (EINVAL = -22)
 		test rdi, rdi
-		jz create_end ; Nowhere to store the thread handle
+		jz create_return ; Nowhere to store the thread handle
 		test rsi, rsi
-		jz create_end ; No function for the thread to run
+		jz create_return ; No function for the thread to run
 
 		; Note: we allow rdx to be NULL; in that case the worst that can happen
 		; is a segmentation fault in the user's code (not really our problem).
@@ -104,7 +104,7 @@ linen_thread_create:
 		mov esi, (STACK_SIZE + GUARD_PAGE)
 		; mmap: rdx = prot: mprotect-style access permissions
 		mov edx, (PROT_WRITE | PROT_READ)
-		; mmap: r10 = flags: settings for mapping
+		; mmap: r10 = flags: configuration flags for mapping:
 		; - MAP_ANONYMOUS: there is no file backing this buffer
 		; - MAP_PRIVATE:   only this process can see thread's stack
 		; - MAP_STACK:     no-op; inform kernel that this is a stack
@@ -126,7 +126,7 @@ linen_thread_create:
 		; Check result of mmap: negative means failure,
 		; otherwise rax is the address of the new mapping.
 		test rax, rax
-		js create_end
+		js create_return
 
 		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 		;;;; Revoke guard page's R/W permissions ;;;;
@@ -151,7 +151,7 @@ linen_thread_create:
 
 		; Check result of mprotect: nonzero means failure
 		test rax, rax
-		jnz create_end
+		jnz create_return
 
 		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 		;;;; Spawn a thread with the new stack ;;;;
@@ -202,7 +202,7 @@ linen_thread_create:
 
 		; Check result of clone:
 		test rax, rax
-		js create_end      ; Negative means failure
+		js create_return   ; Negative means failure
 		jnz create_success ; Positive means we're in the parent thread
 		; Zero means we're in the child thread
 
@@ -256,7 +256,7 @@ linen_thread_create:
 		; Return 0 for success
 		xor eax, eax
 
-	create_end:
+	create_return:
 		; Restore callee-save registers
 		pop rbx
 
diff --git a/lib/thread_finish.asm b/lib/thread_finish.asm
index 9392f1c..860b0a4 100644
--- a/lib/thread_finish.asm
+++ b/lib/thread_finish.asm
@@ -3,9 +3,9 @@
 
 ; Cheat sheet for Linux' x86_64 calling convention:
 ;
-; - free to overwrite; the caller should save them:
+; - free to overwrite (caller should save them):
 ;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
-; - caller expects no change; callee should save them:
+; - caller expects be kept (callee should save them):
 ;       rbx, rbp, r12-r15
 ;
 ; - for passing paramters to functions:
@@ -28,8 +28,8 @@ section .text
 %define SYS_MUNMAP  11
 %define SYS_FUTEX  202
 
-; Relevant flags for futex
-%define FUTEX_WAIT         0x00
+; Relevant operations for futex
+%define FUTEX_WAIT         0
 %define FUTEX_PRIVATE_FLAG 0x80
 
 
@@ -38,8 +38,8 @@ section .text
 
 
 ; Wait for thread to exit, save its return value, and clean up. Arguments:
-;	rdi: u32*   = handle of the thread to wait for
-;	rsi: void** = where to put the void* returned by the thread
+;	rdi: struct{u32,u32}* = handle of the thread to wait for
+;	rsi: void**           = where to put void* returned by thread
 ; Returns zero on success, or a standard error code.
 global linen_thread_finish
 linen_thread_finish:
@@ -51,15 +51,15 @@ linen_thread_finish:
 		mov eax, -22 ; (EINVAL = -22)
 
 		test rdi, rdi
-		jz join_end ; rdi is NULL
+		jz finish_return ; rdi is NULL
 
-		; rdi is nonzero, so let's just assume it's a valid pointer.
-		; If that assumption is wrong we'll get a segmentation fault.
+		; rdi is nonzero, so let's just assume it's a valid pointer;
+		; if that assumption is wrong we'll get a segmentation fault.
 		; But we don't yet trust that [rdi] is a valid thread handle!
-		; To verify this, we check the canary value stored at [rdi + 4].
+		; To verify this we check the canary value stored at [rdi + 4].
 		mov ecx, [rdi + 4]
 		cmp ecx, 0xDEADBEEF
-		jnz join_end
+		jnz finish_return
 
 		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 		;;;; Wait until thread is finished ;;;;
@@ -68,15 +68,16 @@ linen_thread_finish:
 		; We'll clobber rsi if we need to set up a futex call
 		mov r8, rsi
 
+	finish_retry:
 		; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID:
 		; [rdi] contains the child thread's TID, and will get automatically
 		; cleared (to 0) when the child exits; this is what we'll watch for.
 
 		; Atomically check whether the target thread is still running.
-		; if ([rdi] == 0) { goto join_done; } else { eax = [rdi]; }
+		; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; }
 		xor eax, eax
 		lock cmpxchg [rdi], eax
-		jz join_done
+		jz finish_success
 
 		; The thread is still busy, so block until it's done.
 		; The futex system call waits until the dword at an
@@ -84,7 +85,7 @@ linen_thread_finish:
 		; See: man 2 futex
 
 		; futex: rdi = uaddr: address of the dword to watch
-		; futex: rsi = futex_op: which futex operation we want
+		; futex: rsi = futex_op: which futex operation we want:
 		; - FUTEX_WAIT:         block until the value at [rdi] changes
 		; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
 		mov esi, FUTEX_WAIT
@@ -93,21 +94,26 @@ linen_thread_finish:
 		; futex: r10 = timeout: in case we had a deadline (we don't)
 		xor r10, r10
 		; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
-		; futex: r9 = val3:   ignored when FUTEX WAIT is used
+		; futex: r9 = val3:   ignored when FUTEX_WAIT is used
 		; futex: rax = system call ID
 		mov eax, SYS_FUTEX
 		; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
 		syscall
 
-		; Check result of futex: negative means failure
+		; Sometimes the thread exits after the "lock cmpxchg" instruction
+		; but before the futex call. In that case, futex returns EAGAIN.
+		cmp rax, -11 ; (EAGAIN = -11)
+		je finish_retry
+
+		; Any other nonzero return value means failure
 		test rax, rax
-		jnz join_end
+		jnz finish_return
 
 		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 		;;;; Clean up after thread's exit ;;;;
 		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-	join_done:
+	finish_success:
 		; The thread left its function return value on the stack, read it
 		mov rdx, [rdi - 8]
 
@@ -127,15 +133,15 @@ linen_thread_finish:
 
 		; Check result of munmap: nonzero means failure
 		test rax, rax
-		jnz join_end
+		jnz finish_return
 
 		; Check if caller gave a location (r8) to save the return value (rdx)
 		test r8, r8
-		jz join_end ; caller doesn't care: gave NULL pointer
+		jz finish_return ; caller doesn't care: gave NULL pointer
 		mov [r8], rdx
 		; Note: if munmap failed, the buffer is still there, so we
 		; can safely return an error without losing the return value.
 
-	join_end:
+	finish_return:
 		ret
 
diff --git a/linen.h b/linen.h
index ef625d0..9062111 100644
--- a/linen.h
+++ b/linen.h
@@ -5,9 +5,28 @@
 	#error "Linen (libinen.so) only works on x86_64 Linux!"
 #endif
 
-typedef int* linen_thread_t;
+#include <stdint.h>
+
+
+typedef struct {
+	uint32_t futex;
+	uint32_t magic;
+} *linen_thread_t;
 
 extern int linen_thread_create(linen_thread_t* handle, void* (*func)(void*), void* arg);
 extern int linen_thread_finish(linen_thread_t  handle, void** retval);
 
+
+typedef struct {
+	uint32_t futex;
+	uint32_t count;
+	uint32_t magic;
+} linen_lock_t;
+
+#define LINEN_LOCK_INITIALIZER { 0, 0, 0xCAFEBABE }
+
+extern int linen_lock_acquire(linen_lock_t* handle);
+extern int linen_lock_release(linen_lock_t* handle);
+
+
 #endif /* _LINEN_H_ */
-- 
cgit v1.2.3