; Under MIT license, see /LICENSE.txt


; Cheat sheet for Linux' x86_64 calling convention:
;
; - free to overwrite (caller should save them):
;       rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
; - caller expects be kept (callee should save them):
;       rbx, rbp, r12-r15
;
; - for passing paramters to functions:
;       rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
; - for getting return values from functions:
;       rax, rdx, xmm0
;
; - for passing parameters to syscalls:
;       rax, rdi, rsi, rdx, r10, r8, r9
; - for getting return values from syscalls:
;       rax, rdx
; - overwritten by syscalls (all others preserved):
;       rcx, r11


section .text


; Relevant system call IDs
%define SYS_GETTID  186
%define SYS_FUTEX   202

; Relevant operations for futex
%define FUTEX_UNLOCK_PI    7
%define FUTEX_PRIVATE_FLAG 0x80

; Relevant bits for futex dword
%define FUTEX_TID_MASK   0x3fffffff
%define FUTEX_OWNER_DIED 0x40000000
%define FUTEX_WAITERS    0x80000000


; Release an acquired lock if we're who acquired it. Argument:
;	rdi: struct{u32,u32,u32}* = handle of lock to release
; Returns zero on success, or a standard error code.
global linen_lock_release
linen_lock_release:
		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
		;;;; Check validity of argument ;;;;
		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

		; Return EINVAL if rdi is NULL or invalid
		mov eax, -22 ; (EINVAL = -22)

		test rdi, rdi
		jz release_return ; rdi is NULL

		; rdi is nonzero, so let's just assume it's a valid pointer;
		; if that assumption is wrong we'll get a segmentation fault.
		; But we don't yet trust that [rdi] is a valid lock handle!
		; To verify this we check the canary value stored at [rdi + 8].
		mov ecx, [rdi + 8]
		cmp ecx, 0xCAFEBABE
		jnz release_return

		; Lock owners are identified by their TID; let's find ours.
		; The gettid system call simply returns our Linux thread ID.
		; See: man 2 gettid

		; gettid: rax = system call ID
		mov eax, SYS_GETTID
		; gettid: rax = gettid()
		syscall

		; Save a copy of our TID (no need for an error check)
		mov edx, eax

		; Return EPERM if this lock currently doesn't belong to us
		mov eax, -1 ; (EPERM = -1)

		; Read the futex dword at [rdi] and keep its lowest 30 bits
		mov ecx, [rdi]
		and ecx, FUTEX_TID_MASK
		; Those bits contain the owner's TID; it should be our TID
		cmp ecx, edx
		jne release_return

		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
		;;;; (Partially) release our lock ;;;;
		;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

		; Decrement the recursion counter. If it's still > 1, we're done here.
		dec dword [rdi + 4]
		jnz release_success
		; If it reaches 0, it's time for a full release by setting [rdi] to 0.

		; Restore our saved TID to eax for "lock cmpxchg" below
		mov eax, edx

		; Atomically try to set the dword at [rdi] to 0 if it was equal to our TID.
		; if ([rdi] == eax]) { [rdi] = 0; goto release_success; } else { eax = [rdi]; }
		xor ecx, ecx
		lock cmpxchg [rdi], ecx
		je release_success

		; We failed because [rdi] wasn't equal to our TID. In theory,
		; that can mean only one thing: [rdi] = (edx | FUTEX_WAITERS).
		; In that case we need to ask the kernel to wake up the threads
		; who are waiting (via a futex system call) for [rdi] to change.
		; See: man 2 futex

		; futex: rdi = uaddr: address of the dword to announce for
		; futex: rsi = futex_op: which futex operation we want:
		; - FUTEX_UNLOCK_PI:    wake up one thread sleeping via FUTEX_LOCK_PI
		; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
		mov esi, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: futex_op
		; futex: rdx = val:     ignored when FUTEX_UNLOCK_PI is used
		; futex: r10 = timeout: ignored when FUTEX_UNLOCK_PI is used
		; futex: r8 = uaddr2:   ignored when FUTEX_UNLOCK_PI is used
		; futex: r9 = val3:     ignored when FUTEX_UNLOCK_PI is used
		; futex: rax = system call ID
		mov eax, SYS_FUTEX
		; futex: rax = futex(rdi, rsi, (rdx), (r10), (r8), (r9))
		syscall

		; Check result of futex: nonzero means failure
		test rax, rax
		jnz release_return

	release_success:
		xor eax, eax

	release_return:
		ret