summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPrefetch2023-07-24 19:21:30 +0200
committerPrefetch2023-07-24 19:21:30 +0200
commit94e3e3652f9f04810126ee754fa9a788289e2897 (patch)
treeaf5aac9f176039e97f1ebc46b497b5fd192a4fa2
parenta211da8cfe9b0565881537cc81b09ae55c722111 (diff)
Reduce total code size by 53 bytes (big deal, right?)HEADmaster
-rw-r--r--src/lock_acquire.asm26
-rw-r--r--src/lock_release.asm32
-rw-r--r--src/thread_create.asm52
-rw-r--r--src/thread_finish.asm30
4 files changed, 84 insertions, 56 deletions
diff --git a/src/lock_acquire.asm b/src/lock_acquire.asm
index f32ba6a..8415d7f 100644
--- a/src/lock_acquire.asm
+++ b/src/lock_acquire.asm
@@ -43,12 +43,15 @@ section .text
; Returns zero on success, or a standard error code.
global linen_lock_acquire
linen_lock_acquire:
+ ; It's handy to have a register that's 0 during most of this function
+ xor esi, esi
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Check validity of argument ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ; Return EINVAL if rdi is NULL or otherwise invalid
- mov eax, -22 ; (EINVAL = -22)
+ ; Return EINVAL (-22) if rdi is NULL or otherwise invalid
+ lea eax, [rsi - 22] ; mov eax, -22
test rdi, rdi
jz acquire_return ; rdi is NULL
@@ -57,8 +60,7 @@ linen_lock_acquire:
; if that assumption is wrong we'll get a segmentation fault.
; But we don't yet trust that [rdi] is a valid lock handle!
; To verify this we check the canary value stored at [rdi + 8].
- mov ecx, [rdi + 8]
- cmp ecx, 0xCAFEBABE
+ cmp dword [rdi + 8], 0xCAFEBABE ; Oh CISC...
jnz acquire_return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -70,7 +72,8 @@ linen_lock_acquire:
; See: man 2 gettid
; gettid: rax = system call ID
- mov eax, SYS_GETTID
+ xor eax, eax
+ mov al, SYS_GETTID
; gettid: rax = gettid()
syscall
@@ -119,7 +122,7 @@ linen_lock_acquire:
; so in most real-world cases you can delete this with no downside.
; Loop counter
- mov ecx, 10
+ mov sil, 10
acquire_spinloop:
; The "pause" instruction is specially designed for loops like this
; and conserves power. It causes a small delay (makes sense here).
@@ -133,7 +136,7 @@ linen_lock_acquire:
jz acquire_success
; Decrement loop counter until zero
- dec ecx
+ dec esi
jnz acquire_spinloop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -150,24 +153,25 @@ linen_lock_acquire:
; futex: rsi = futex_op: which futex operation we want:
; - FUTEX_LOCK_PI: block until lock's owner uses FUTEX_UNLOCK_PI
; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
- mov esi, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
+ mov sil, (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
; futex: r10 = timeout: in case we had a deadline (we don't)
xor r10, r10
; futex: rdx = val: ignored when FUTEX_LOCK_PI is used
; futex: r8 = uaddr2: ignored when FUTEX_LOCK_PI is used
; futex: r9 = val3: ignored when FUTEX_LOCK_PI is used
; futex: rax = system call ID
- mov eax, SYS_FUTEX
+ xor eax, eax
+ mov al, SYS_FUTEX
; futex: rax = futex(rdi, rsi, (rdx), r10, (r8), (r9))
syscall
; Sometimes the lock is released after the "lock cmpxchg" instruction
; but just before the futex call. In that case, futex returns EAGAIN.
- cmp rax, -11 ; (-EAGAIN)
+ cmp eax, -11 ; (-EAGAIN)
je acquire_futex
; Any other negative return value means failure
- test rax, rax
+ test eax, eax
jnz acquire_return
; Indicate that we made a futex call (see below for why)
diff --git a/src/lock_release.asm b/src/lock_release.asm
index f86caa2..2892cc3 100644
--- a/src/lock_release.asm
+++ b/src/lock_release.asm
@@ -43,12 +43,15 @@ section .text
; Returns zero on success, or a standard error code.
global linen_lock_release
linen_lock_release:
+ ; It's handy to have a register that's 0 during most of this function
+ xor esi, esi
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Check validity of argument ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ; Return EINVAL if rdi is NULL or invalid
- mov eax, -22 ; (EINVAL = -22)
+ ; Return EINVAL (-22) if rdi is NULL or invalid
+ lea eax, [rsi - 22] ; mov eax, -22
test rdi, rdi
jz release_return ; rdi is NULL
@@ -57,8 +60,7 @@ linen_lock_release:
; if that assumption is wrong we'll get a segmentation fault.
; But we don't yet trust that [rdi] is a valid lock handle!
; To verify this we check the canary value stored at [rdi + 8].
- mov ecx, [rdi + 8]
- cmp ecx, 0xCAFEBABE
+ cmp dword [rdi + 8], 0xCAFEBABE ; Oh CISC...
jnz release_return
; Lock owners are identified by their TID; let's find ours.
@@ -66,17 +68,19 @@ linen_lock_release:
; See: man 2 gettid
; gettid: rax = system call ID
- mov eax, SYS_GETTID
+ xor eax, eax
+ mov al, SYS_GETTID
; gettid: rax = gettid()
syscall
; Save a copy of our TID (no need for an error check)
mov edx, eax
- ; Return EPERM if this lock currently doesn't belong to us
- mov eax, -1 ; (EPERM = -1)
+ ; Return EPERM (-1) if this lock currently doesn't belong to us
+ or eax, -1 ; mov eax, -1
- ; Read the futex dword at [rdi] and keep its lowest 30 bits
+ ; Read the futex dword at [rdi] and keep its lowest 30 bits.
+ ; No need to use atomics, since we currently own this lock.
mov ecx, [rdi]
and ecx, FUTEX_TID_MASK
; Those bits contain the owner's TID; it should be our TID
@@ -96,9 +100,8 @@ linen_lock_release:
mov eax, edx
; Atomically try to set the dword at [rdi] to 0 if it was equal to our TID.
- ; if ([rdi] == eax]) { [rdi] = 0; goto release_success; } else { eax = [rdi]; }
- xor ecx, ecx
- lock cmpxchg [rdi], ecx
+ ; if ([rdi] == eax) { [rdi] = 0; goto release_success; } else { eax = [rdi]; }
+ lock cmpxchg [rdi], esi ; esi = 0
je release_success
; We failed because [rdi] wasn't equal to our TID. In theory,
@@ -111,18 +114,19 @@ linen_lock_release:
; futex: rsi = futex_op: which futex operation we want:
; - FUTEX_UNLOCK_PI: wake up one thread sleeping via FUTEX_LOCK_PI
; - FUTEX_PRIVATE_FLAG: this lock isn't shared with another process
- mov esi, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) ; futex: futex_op
+ mov sil, (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
; futex: rdx = val: ignored when FUTEX_UNLOCK_PI is used
; futex: r10 = timeout: ignored when FUTEX_UNLOCK_PI is used
; futex: r8 = uaddr2: ignored when FUTEX_UNLOCK_PI is used
; futex: r9 = val3: ignored when FUTEX_UNLOCK_PI is used
; futex: rax = system call ID
- mov eax, SYS_FUTEX
+ xor eax, eax
+ mov al, SYS_FUTEX
; futex: rax = futex(rdi, rsi, (rdx), (r10), (r8), (r9))
syscall
; Check result of futex: nonzero means failure
- test rax, rax
+ test eax, eax
jnz release_return
release_success:
diff --git a/src/thread_create.asm b/src/thread_create.asm
index 9a6fe78..8dc8813 100644
--- a/src/thread_create.asm
+++ b/src/thread_create.asm
@@ -72,12 +72,16 @@ linen_thread_create:
; Callee-save registers
push rbx
+ ; It's handy to have a register that's 0 for a while
+ xor ecx, ecx
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Check validity of arguments ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ; Return EINVAL if any argument is NULL
- mov eax, -22 ; (EINVAL = -22)
+ ; Return EINVAL (-22) if any argument is NULL
+ lea eax, [rcx - 22] ; mov eax, -22
+
test rdi, rdi
jz create_return ; Nowhere to store the thread handle
test rsi, rsi
@@ -91,9 +95,9 @@ linen_thread_create:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Save these registers: we'll clobber them for the mmap call
- mov rbx, rdi
- push rdx
- push rsi
+ push rdx ; argument
+ push rsi ; function
+ push rdi ; thread handle destination
; The mmap system call does many things, in this case allocate memory.
; See: man 2 mmap
@@ -103,25 +107,26 @@ linen_thread_create:
; mmap: rsi = length: size of buffer to allocate
mov esi, (STACK_SIZE + GUARD_PAGE)
; mmap: rdx = prot: mprotect-style access permissions
- mov edx, (PROT_WRITE | PROT_READ)
+ lea edx, [rcx + 3] ; mov edx, (PROT_READ | PROT_WRITE)
; mmap: r10 = flags: configuration flags for mapping:
; - MAP_ANONYMOUS: there is no file backing this buffer
; - MAP_PRIVATE: only this process can see thread's stack
; - MAP_STACK: no-op; inform kernel that this is a stack
- mov r10, (MAP_ANONYMOUS | MAP_PRIVATE | MAP_STACK)
+ mov r10d, (MAP_ANONYMOUS | MAP_PRIVATE | MAP_STACK)
; mmap: r8 = fd: ignored for MAP_ANONYMOUS, recommended -1
- mov r8, -1
+ lea r8, [rcx - 1] ; mov r8, -1
; mmap: r9 = offset: should be 0 when MAP_ANONYMOUS is used
xor r9, r9
; mmap: rax = system call ID
- mov eax, SYS_MMAP
- ; mmap: rax = mmap(rdi, rsi, rdx, r10, r8, 9)
+ lea eax, [rcx + SYS_MMAP] ; mov eax, SYS_MMAP
+ ; mmap: rax = mmap(rdi, rsi, rdx, r10, (r8), (r9))
syscall
; Pop these now before we start branching. Those registers
; won't be used by the next system calls, so they're safe.
- pop r8 ; function
- pop r9 ; argument
+ pop rbx ; thread handle destination
+ pop r8 ; function
+ pop r9 ; argument
; Check result of mmap: negative means failure,
; otherwise rax is the address of the new mapping.
@@ -145,12 +150,13 @@ linen_thread_create:
; mprotect: rdx = prot: access permissions; zero for none
xor edx, edx
; mprotect: rax = system call ID
- mov eax, SYS_MPROTECT
+ xor eax, eax
+ mov al, SYS_MPROTECT
; mprotect: rax = mprotect(rdi, rsi, rdx)
syscall
; Check result of mprotect: nonzero means failure
- test rax, rax
+ test eax, eax
jnz create_return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -162,7 +168,7 @@ linen_thread_create:
; once in the parent (0 if success) and once in the child (the TID).
; See: man 2 clone
- ; clone: rsi = stack
+ ; clone: rsi = stack: pointer for child's initial rsp
; Currently rdi points to the lowest byte of the stack area.
; Again, stacks grow downward, so we calculate the address of
; the top qword to use as the child thread's starting point.
@@ -194,14 +200,14 @@ linen_thread_create:
; clone: r8 = tls: ignored unless CLONE_SETTLS is used
; clone: rax = system call ID
- mov eax, SYS_CLONE
+ mov al, SYS_CLONE
; clone: rax = clone(rdi, rsi, (rdx), r10, (r8));
syscall
; Ideally, both parent and new-born child are executing this code now.
; Check result of clone:
- test rax, rax
+ test eax, eax
js create_return ; Negative means failure
jnz create_success ; Positive means we're in the parent thread
; Zero means we're in the child thread
@@ -220,10 +226,16 @@ linen_thread_create:
; Once done, leave function's return value lying around
push rax
- ; Exit the thread with return value 0
+ ; Exit the thread with status 0 using the exit system call.
+ ; See: man 2 exit
+
+ ; exit: rdi = status to report
xor edi, edi
- mov rax, SYS_EXIT
- syscall ; (never returns)
+ ; exit: rax = system call ID
+ xor eax, eax
+ mov al, SYS_EXIT
+ ; exit: call never returns
+ syscall
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Clean up in parent thread ;;;;
diff --git a/src/thread_finish.asm b/src/thread_finish.asm
index 860b0a4..c453d40 100644
--- a/src/thread_finish.asm
+++ b/src/thread_finish.asm
@@ -43,12 +43,15 @@ section .text
; Returns zero on success, or a standard error code.
global linen_thread_finish
linen_thread_finish:
+ ; It's handy to have a register that's 0 for a while
+ xor ecx, ecx
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Check validity of arguments ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ; Return EINVAL if rdi is NULL or otherwise invalid
- mov eax, -22 ; (EINVAL = -22)
+ ; Return EINVAL (-22) if rdi is NULL or otherwise invalid
+ lea eax, [rcx - 22] ; mov eax, -22
test rdi, rdi
jz finish_return ; rdi is NULL
@@ -57,8 +60,7 @@ linen_thread_finish:
; if that assumption is wrong we'll get a segmentation fault.
; But we don't yet trust that [rdi] is a valid thread handle!
; To verify this we check the canary value stored at [rdi + 4].
- mov ecx, [rdi + 4]
- cmp ecx, 0xDEADBEEF
+ cmp dword [rdi + 4], 0xDEADBEEF ; Oh CISC...
jnz finish_return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -86,9 +88,9 @@ linen_thread_finish:
; futex: rdi = uaddr: address of the dword to watch
; futex: rsi = futex_op: which futex operation we want:
- ; - FUTEX_WAIT: block until the value at [rdi] changes
+ ; - FUTEX_WAIT = 0: block until the value at [rdi] changes
; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
- mov esi, FUTEX_WAIT
+ xor esi, esi ; mov esi, FUTEX_WAIT
; futex: rdx = val: the expected value at [rdi] before it changes
mov edx, eax
; futex: r10 = timeout: in case we had a deadline (we don't)
@@ -96,17 +98,18 @@ linen_thread_finish:
; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
; futex: r9 = val3: ignored when FUTEX_WAIT is used
; futex: rax = system call ID
- mov eax, SYS_FUTEX
+ xor eax, eax
+ mov al, SYS_FUTEX
; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
syscall
; Sometimes the thread exits after the "lock cmpxchg" instruction
; but before the futex call. In that case, futex returns EAGAIN.
- cmp rax, -11 ; (EAGAIN = -11)
+ cmp eax, -11 ; (EAGAIN = -11)
je finish_retry
; Any other nonzero return value means failure
- test rax, rax
+ test eax, eax
jnz finish_return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -117,6 +120,11 @@ linen_thread_finish:
; The thread left its function return value on the stack, read it
mov rdx, [rdi - 8]
+ ; Remove the canary value so the thread handle becomes invalid. We're about
+ ; to deallocate this memory anyway, so it's optional, but maybe the address
+ ; becomes valid again later in the program's life, who knows? Play it safe.
+ mov [rdi + 4], eax ; eax = 0 for all paths
+
; The munmap system call destroys mappings created by mmap.
; In this case that means deallocating the stack buffer.
; See: man 2 munmap
@@ -127,12 +135,12 @@ linen_thread_finish:
; munmap: rsi = length: size of region starting from rdi
mov esi, (STACK_SIZE + GUARD_PAGE)
; munmap: rax = system call ID
- mov eax, SYS_MUNMAP
+ mov al, SYS_MUNMAP
; munmap: rax = munmap(rdi, rsi)
syscall
; Check result of munmap: nonzero means failure
- test rax, rax
+ test eax, eax
jnz finish_return
; Check if caller gave a location (r8) to save the return value (rdx)