diff options
author | Prefetch | 2023-07-16 15:04:44 +0200 |
---|---|---|
committer | Prefetch | 2023-07-16 15:04:44 +0200 |
commit | 1a2e93c752a6b835423cd8bee609b8ec6c3f1262 (patch) | |
tree | a1c1edccbb110edc9b74b2e7ecbf47196a14c418 |
Initial commit for publication
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | lib/thread_create.asm | 284 | ||||
-rw-r--r-- | lib/thread_finish.asm | 161 | ||||
-rw-r--r-- | linen.h | 13 | ||||
-rw-r--r-- | tests/test01.c | 52 | ||||
-rw-r--r-- | tests/test02.c | 62 |
7 files changed, 593 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fab3e1d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/lib/*.o +/liblinen.so +/tests/*.run diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..edef168 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +liblinen.so: lib/thread_create.o lib/thread_finish.o + gcc -shared -o $@ $^ + +lib/%.o: lib/%.asm + nasm -f elf64 $< + +tests/%.run: tests/%.c liblinen.so linen.h + gcc -L . -llinen -I . -o $@ $< + +tests: tests/test01.run tests/test02.run + LD_LIBRARY_PATH=. ./tests/test01.run + LD_LIBRARY_PATH=. ./tests/test02.run + +.PHONY: clean +clean: + rm -f tests/*.run + rm -f liblinen.so + rm -f lib/*.o diff --git a/lib/thread_create.asm b/lib/thread_create.asm new file mode 100644 index 0000000..4163d29 --- /dev/null +++ b/lib/thread_create.asm @@ -0,0 +1,284 @@ +; MIT license: +; +; Copyright (c) 2023 Marcus R.A. Newman (prefetch@prefetch.eu) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy +; of this software and associated documentation files (the "Software"), to deal +; in the Software without restriction, including without limitation the rights +; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +; copies of the Software, and to permit persons to whom the Software is +; furnished to do so, subject to the following conditions: +; +; The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; +; THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +; SOFTWARE. + + +; Cheat sheet for Linux' x86_64 calling convention: +; +; - free to overwrite; the caller should save them: +; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 +; - caller expects no change; callee should save them: +; rbx, rbp, r12-r15 +; +; - for passing paramters to functions: +; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7 +; - for getting return values from functions: +; rax, rdx, xmm0 +; +; - for passing parameters to syscalls: +; rax, rdi, rsi, rdx, r10, r8, r9 +; - for getting return values from syscalls: +; rax, rdx +; - overwritten by syscalls (all others preserved): +; rcx, r11 + + +section .text + + +; Relevant system call IDs +%define SYS_MMAP 9 +%define SYS_MPROTECT 10 +%define SYS_CLONE 56 +%define SYS_EXIT 60 + +; Relevant flags for mmap +%define MAP_SHARED 0x00001 +%define MAP_PRIVATE 0x00002 +%define MAP_ANONYMOUS 0x00020 +;%define MAP_GROWSDOWN 0x00100 ; Insecure, segfaults anyway +%define MAP_LOCKED 0x02000 +%define MAP_POPULATE 0x08000 +%define MAP_STACK 0x20000 + +; Relevant flags for mprotect +%define PROT_READ 0x1 +%define PROT_WRITE 0x2 + +; Relevant flags for clone +%define CLONE_VM 0x00000100 +%define CLONE_FS 0x00000200 +%define CLONE_FILES 0x00000400 +%define CLONE_SIGHAND 0x00000800 +%define CLONE_PARENT 0x00008000 +%define CLONE_THREAD 0x00010000 +%define CLONE_SYSVSEM 0x00040000 +%define CLONE_SETTLS 0x00080000 +%define CLONE_PARENT_SETTID 0x00100000 +%define CLONE_CHILD_CLEARTID 0x00200000 +%define CLONE_CHILD_SETTID 0x01000000 +%define CLONE_IO 0x80000000 + + +%define STACK_SIZE 2097152 ; 2 MiB stack +%define GUARD_PAGE 4096 ; 4 KiB guard page + + +; Create a new thread executing a given function. Arguments: +; rdi: u32** = where to put the thread handle +; rsi: void* (*)(void*) = function to make the child run +; rdx: void* = single argument for function +; Returns zero on success, or a standard error code. +global linen_thread_create +linen_thread_create: + ; Callee-save registers + push rbx + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Check validity of arguments ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Return EINVAL if any argument is NULL + mov eax, -22 ; (EINVAL = -22) + test rdi, rdi + jz create_end ; Nowhere to store the thread handle + test rsi, rsi + jz create_end ; No function for the thread to run + + ; Note: we allow rdx to be NULL; in that case the worst that can happen + ; is a segmentation fault in the user's code (not really our problem). + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Allocate a stack and guard page ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Save these registers: we'll clobber them for the mmap call + mov rbx, rdi + push rdx + push rsi + + ; The mmap system call does many things, in this case allocate memory. + ; See: man 2 mmap + + ; mmap: rdi = addr: address for mapping; 0 lets kernel choose + xor edi, edi + ; mmap: rsi = length: size of buffer to allocate + mov esi, (STACK_SIZE + GUARD_PAGE) + ; mmap: rdx = prot: mprotect-style access permissions + mov edx, (PROT_WRITE | PROT_READ) + ; mmap: r10 = flags: settings for mapping + ; - MAP_ANONYMOUS: there is no file backing this buffer + ; - MAP_PRIVATE: only this process can see thread's stack + ; - MAP_STACK: no-op; inform kernel that this is a stack + mov r10, (MAP_ANONYMOUS | MAP_PRIVATE | MAP_STACK) + ; mmap: r8 = fd: ignored for MAP_ANONYMOUS, recommended -1 + mov r8, -1 + ; mmap: r9 = offset: should be 0 when MAP_ANONYMOUS is used + xor r9, r9 + ; mmap: rax = system call ID + mov eax, SYS_MMAP + ; mmap: rax = mmap(rdi, rsi, rdx, r10, r8, 9) + syscall + + ; Pop these now before we start branching. Those registers + ; won't be used by the next system calls, so they're safe. + pop r8 ; function + pop r9 ; argument + + ; Check result of mmap: negative means failure, + ; otherwise rax is the address of the new mapping. + test rax, rax + js create_end + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Revoke guard page's R/W permissions ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Keep in mind that stacks grow downward, so the guard page is at + ; the lowest address of the newly-allocated buffer, i.e. at [rax]. + + ; The mprotect system call changes the permissions of a memory region. + ; See: man 2 mprotect + + ; mprotect: rdi = addr: lower address of region to control + mov rdi, rax + ; mprotect: rsi = len: size of region, one page in this case + mov esi, GUARD_PAGE + ; mprotect: rdx = prot: access permissions; zero for none + xor edx, edx + ; mprotect: rax = system call ID + mov eax, SYS_MPROTECT + ; mprotect: rax = mprotect(rdi, rsi, rdx) + syscall + + ; Check result of mprotect: nonzero means failure + test rax, rax + jnz create_end + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Spawn a thread with the new stack ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; The clone system call spawns a new thread, cloned from a parent. + ; Both threads end up running the same code, i.e. it returns "twice", + ; once in the parent (0 if success) and once in the child (the TID). + ; See: man 2 clone + + ; clone: rsi = stack + ; Currently rdi points to the lowest byte of the stack area. + ; Again, stacks grow downward, so we calculate the address of + ; the top qword to use as the child thread's starting point. + lea rsi, [rdi + (STACK_SIZE + GUARD_PAGE - 8)] + + ; clone: rdi = flags: settings for cloned thread + ; These flags make the parent and child share resources: + ; - CLONE_VM: memory address space + ; - CLONE_FS: filesystem information, e.g. working directory + ; - CLONE_FILES: file descriptor table + ; - CLONE_IO: I/O scheduler context + ; - CLONE_SIGHAND: signal handlers + ; - CLONE_PARENT: parent process (implied by CLONE_THREAD?) + ; - CLONE_THREAD: shared PID, distinguish by TID instead (I think?) + ; These flags are relevant for a threading API: + ; - CLONE_CHILD_SETTID: store child's TID at supplied address (in r10) + ; - CLONE_CHILD_CLEARTID: set stored TID to zero when child finishes + ; (this will be used for joining threads) + mov edi, (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_IO \ + | CLONE_SIGHAND | CLONE_PARENT | CLONE_THREAD \ + | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) + + ; clone: rdx = parent_tid: ignored unless CLONE_PARENT_SETTID is used + + ; clone: r10 = child_tid: address to store new thread's TID + ; We use "bottom" of stack (rsi), i.e. where child will start. + mov r10, rsi + + ; clone: r8 = tls: ignored unless CLONE_SETTLS is used + + ; clone: rax = system call ID + mov eax, SYS_CLONE + ; clone: rax = clone(rdi, rsi, (rdx), r10, (r8)); + syscall + + ; Ideally, both parent and new-born child are executing this code now. + + ; Check result of clone: + test rax, rax + js create_end ; Negative means failure + jnz create_success ; Positive means we're in the parent thread + ; Zero means we're in the child thread + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Initialization in child thread ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Best practice is to clear the frame pointer + xor ebp, ebp + + ; Move argument into place and call supplied function + mov rdi, r9 + call r8 + + ; Once done, leave function's return value lying around + push rax + + ; Exit the thread with return value 0 + xor edi, edi + mov rax, SYS_EXIT + syscall ; (never returns) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Clean up in parent thread ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + create_success: + ; We use the highest dword of the child's stack buffer as a futex + ; to detect when it has finished (see CLONE_CHILD_CLEARTID above). + ; That dword's address also acts as a thread handle for our API, + ; so we store it at the address the caller supplied (now in rbx). + mov [rbx], rsi + + ; We place a canary value in the unused dword at the top: + ; checking this value tells us if a thread handle is valid. + mov dword [rsi + 4], 0xDEADBEEF + + ; "Sketch" of child's stack buffer's layout: + ; + ; (bottom of range allocated by mmap) + ; 4 KiB: guard page, unused + ; (bottom of usable buffer) + ; ... + ; ... Child is currently doing work here ... + ; ... + ; qword: return address of function called by child (from r8) + ; dword: futex to detect when child has returned (address: rsi) + ; dword: canary value to know if handle is valid (address: rsi + 4) + ; (top of range allocated by mmap = top of usable buffer) + + ; Return 0 for success + xor eax, eax + + create_end: + ; Restore callee-save registers + pop rbx + + ret + diff --git a/lib/thread_finish.asm b/lib/thread_finish.asm new file mode 100644 index 0000000..ee6d676 --- /dev/null +++ b/lib/thread_finish.asm @@ -0,0 +1,161 @@ +; MIT license: +; +; Copyright (c) 2023 Marcus R.A. Newman (prefetch@prefetch.eu) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy +; of this software and associated documentation files (the "Software"), to deal +; in the Software without restriction, including without limitation the rights +; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +; copies of the Software, and to permit persons to whom the Software is +; furnished to do so, subject to the following conditions: +; +; The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; +; THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +; SOFTWARE. + + +; Cheat sheet for Linux' x86_64 calling convention: +; +; - free to overwrite; the caller should save them: +; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15 +; - caller expects no change; callee should save them: +; rbx, rbp, r12-r15 +; +; - for passing paramters to functions: +; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7 +; - for getting return values from functions: +; rax, rdx, xmm0 +; +; - for passing parameters to syscalls: +; rax, rdi, rsi, rdx, r10, r8, r9 +; - for getting return values from syscalls: +; rax, rdx +; - overwritten by syscalls (all others preserved): +; rcx, r11 + + +section .text + + +; Relevant system call IDs +%define SYS_MUNMAP 11 +%define SYS_FUTEX 202 + +; Relevant flags for futex +%define FUTEX_WAIT 0x00 +%define FUTEX_PRIVATE_FLAG 0x80 + + +%define STACK_SIZE 2097152 ; 2 MiB stack +%define GUARD_PAGE 4096 ; 4 KiB guard page + + +; Wait for thread to exit, save its return value, and clean up. Arguments: +; rdi: u32* = handle of the thread to wait for +; rsi: void** = where to put the void* returned by the thread +; Returns zero on success, or a standard error code. +global linen_thread_finish +linen_thread_finish: + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Check validity of arguments ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Return EINVAL if rdi is NULL or otherwise invalid + mov eax, -22 ; (EINVAL = -22) + + test rdi, rdi + jz join_end ; rdi is NULL + + ; rdi is nonzero, so let's just assume it's a valid pointer. + ; If that assumption is wrong we'll get a segmentation fault. + ; But we don't yet trust that [rdi] is a valid thread handle! + ; To verify this, we check the canary value stored at [rdi + 4]. + mov ecx, [rdi + 4] + cmp ecx, 0xDEADBEEF + jnz join_end + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Wait until thread is finished ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; We'll clobber rsi if we need to set up a futex call + mov r8, rsi + + ; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID: + ; [rdi] contains the child thread's TID, and will get automatically + ; cleared (to 0) when the child exits; this is what we'll watch for. + + ; Atomically check whether the target thread is still running. + ; if ([rdi] == 0) { goto join_done; } else { eax = [rdi]; } + xor eax, eax + lock cmpxchg [rdi], eax + jz join_done + + ; The thread is still busy, so block until it's done. + ; The futex system call waits until the dword at an + ; address (rdi) deviates from an expected value (eax). + ; See: man 2 futex + + ; futex: rdi = uaddr: address of the dword to watch + ; futex: rsi = futex_op: which futex operation we want + ; - FUTEX_WAIT: block until the value at [rdi] changes + ; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why + mov esi, FUTEX_WAIT + ; futex: rdx = val: the expected value at [rdi] before it changes + mov edx, eax + ; futex: r10 = timeout: in case we had a deadline (we don't) + xor r10, r10 + ; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used + ; futex: r9 = val3: ignored when FUTEX WAIT is used + ; futex: rax = system call ID + mov eax, SYS_FUTEX + ; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9)) + syscall + + ; Check result of futex: negative means failure + test rax, rax + jnz join_end + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; Clean up after thread's exit ;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + join_done: + ; The thread left its function return value on the stack, read it + mov rdx, [rdi - 8] + + ; The munmap system call destroys mappings created by mmap. + ; In this case that means deallocating the stack buffer. + ; See: man 2 munmap + + ; munmap: rdi = addr: lowest address of region to unmap + ; Our rdi is near the buffer's top, so we must subtract + sub rdi, (STACK_SIZE + GUARD_PAGE - 8) + ; munmap: rsi = length: size of region starting from rdi + mov esi, (STACK_SIZE + GUARD_PAGE) + ; munmap: rax = system call ID + mov eax, SYS_MUNMAP + ; munmap: rax = munmap(rdi, rsi) + syscall + + ; Check result of munmap: nonzero means failure + test rax, rax + jnz join_end + + ; Check if caller gave a location (r8) to save the return value (rdx) + test r8, r8 + jz join_end ; caller doesn't care: gave NULL pointer + mov [r8], rdx + ; Note: if munmap failed, the buffer is still there, so we + ; can safely return an error without losing the return value. + + join_end: + ret + @@ -0,0 +1,13 @@ +#ifndef _LINEN_H_ +#define _LINEN_H_ + +#if (__x86_64 != 1 || __linux__ != 1 || __LP64__ != 1) + #error "Linen (libinen.so) only works on x86_64 Linux!" +#endif + +typedef int* linen_thread_t; + +extern int linen_thread_create(linen_thread_t* handle, void* (*func)(void*), void* arg); +extern int linen_thread_finish(linen_thread_t handle, void** retval); + +#endif /* _LINEN_H_ */ diff --git a/tests/test01.c b/tests/test01.c new file mode 100644 index 0000000..198dac7 --- /dev/null +++ b/tests/test01.c @@ -0,0 +1,52 @@ +#include "linen.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> + + +#define NUM_THREADS 10 + + +/* Argument for thread function */ +typedef struct { + useconds_t delay; + int id; +} f_arg_t; + + +/* Function for threads to run */ +void* f(void* arg) { + f_arg_t* a = (f_arg_t*)arg; + usleep(a->delay); + printf(" Hello from thread #%d!\n", a->id); + return (void*)&(a->delay); +} + + +void main() { + printf("\x1B[1mTEST 01: threads print after random delay:\x1B[0m\n"); + + /* Thread handles and arguments */ + linen_thread_t ts[NUM_THREADS]; + f_arg_t args[NUM_THREADS]; + + /* Set arguments */ + srand(time(NULL)); + for (int i = 0; i < NUM_THREADS; i++) { + args[i].delay = (useconds_t)rand() % 1000000; + args[i].id = i; + } + + /* Spawn threads */ + for (int i = 0; i < NUM_THREADS; i++) { + int r = linen_thread_create(&ts[i], f, (void*)&args[i]); + if (r) { + printf(" Failed to spawn thread #%d with error %d\n", i, r); + } + } + + /* Wait for threads to finish */ + usleep(1200000); +} diff --git a/tests/test02.c b/tests/test02.c new file mode 100644 index 0000000..1cd0c1a --- /dev/null +++ b/tests/test02.c @@ -0,0 +1,62 @@ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> + +#include "linen.h" + + +#define NUM_THREADS 10 + + +/* Argument for thread function */ +typedef struct { + useconds_t delay; + int id; +} f_arg_t; + + +/* Function for threads to run */ +void* f(void* arg) { + f_arg_t* a = (f_arg_t*)arg; + usleep(a->delay); + printf(" Hello from thread #%d!\n", a->id); + return (void*)&(a->delay); +} + + +void main() { + printf("\x1B[1mTEST 02: threads print after random delay " + "while getting joined consecutively:\x1B[0m\n"); + + /* Thread handles and arguments */ + linen_thread_t ts[NUM_THREADS]; + f_arg_t args[NUM_THREADS]; + + /* Set arguments */ + srand(time(NULL)); + for (int i = 0; i < NUM_THREADS; i++) { + args[i].delay = (useconds_t)rand() % 1000000; + args[i].id = i; + } + + /* Spawn threads */ + for (int i = 0; i < NUM_THREADS; i++) { + int r = linen_thread_create(&ts[i], f, (void*)&args[i]); + if (r) { + printf(" Failed to spawn thread #%d with error %d\n", i, r); + } + } + + /* Wait for each thread to finish in order, and print its argument. + * The first message only prints once thread #0 is done, and so on. */ + for (int i = 0; i < NUM_THREADS; i++) { + useconds_t* pd; + int r = linen_thread_finish(ts[i], (void**)&pd); + if (r) { + printf(" Failed to join thread #%d with error %d\n", i, r); + } else { + printf(" Thread #%d slept for %dms\n", i, *pd / 1000); + } + } +} |