1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
; Under MIT license, see /LICENSE.txt
; Cheat sheet for Linux' x86_64 calling convention:
;
; - free to overwrite (caller should save them):
; rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
; - caller expects be kept (callee should save them):
; rbx, rbp, r12-r15
;
; - for passing paramters to functions:
; rdi, rsi, rdx, rcx, r8, r9, xmm0-xmm7
; - for getting return values from functions:
; rax, rdx, xmm0
;
; - for passing parameters to syscalls:
; rax, rdi, rsi, rdx, r10, r8, r9
; - for getting return values from syscalls:
; rax, rdx
; - overwritten by syscalls (all others preserved):
; rcx, r11
section .text
; Relevant system call IDs
%define SYS_MUNMAP 11
%define SYS_FUTEX 202
; Relevant operations for futex
%define FUTEX_WAIT 0
%define FUTEX_PRIVATE_FLAG 0x80
%define STACK_SIZE 2097152 ; 2 MiB stack
%define GUARD_PAGE 4096 ; 4 KiB guard page
; Wait for thread to exit, save its return value, and clean up. Arguments:
; rdi: struct{u32,u32}* = handle of the thread to wait for
; rsi: void** = where to put void* returned by thread
; Returns zero on success, or a standard error code.
global linen_thread_finish
linen_thread_finish:
; It's handy to have a register that's 0 for a while
xor ecx, ecx
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Check validity of arguments ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Return EINVAL (-22) if rdi is NULL or otherwise invalid
lea eax, [rcx - 22] ; mov eax, -22
test rdi, rdi
jz finish_return ; rdi is NULL
; rdi is nonzero, so let's just assume it's a valid pointer;
; if that assumption is wrong we'll get a segmentation fault.
; But we don't yet trust that [rdi] is a valid thread handle!
; To verify this we check the canary value stored at [rdi + 4].
cmp dword [rdi + 4], 0xDEADBEEF ; Oh CISC...
jnz finish_return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Wait until thread is finished ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; We'll clobber rsi if we need to set up a futex call
mov r8, rsi
finish_retry:
; When spawning, we set CLONE_CHILD_SETTID and CLONE_CHILD_CLEARTID:
; [rdi] contains the child thread's TID, and will get automatically
; cleared (to 0) when the child exits; this is what we'll watch for.
; Atomically check whether the target thread is still running.
; if ([rdi] == 0) { goto finish_success; } else { eax = [rdi]; }
xor eax, eax
lock cmpxchg [rdi], eax
jz finish_success
; The thread is still busy, so block until it's done.
; The futex system call waits until the dword at an
; address (rdi) deviates from an expected value (eax).
; See: man 2 futex
; futex: rdi = uaddr: address of the dword to watch
; futex: rsi = futex_op: which futex operation we want:
; - FUTEX_WAIT = 0: block until the value at [rdi] changes
; - FUTEX_PRIVATE_FLAG: FIXME waits forever, I don't understand why
xor esi, esi ; mov esi, FUTEX_WAIT
; futex: rdx = val: the expected value at [rdi] before it changes
mov edx, eax
; futex: r10 = timeout: in case we had a deadline (we don't)
xor r10, r10
; futex: r8 = uaddr2: ignored when FUTEX_WAIT is used
; futex: r9 = val3: ignored when FUTEX_WAIT is used
; futex: rax = system call ID
xor eax, eax
mov al, SYS_FUTEX
; futex: rax = futex(rdi, rsi, rdx, r10, (r8), (r9))
syscall
; Sometimes the thread exits after the "lock cmpxchg" instruction
; but before the futex call. In that case, futex returns EAGAIN.
cmp eax, -11 ; (EAGAIN = -11)
je finish_retry
; Any other nonzero return value means failure
test eax, eax
jnz finish_return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Clean up after thread's exit ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
finish_success:
; The thread left its function return value on the stack, read it
mov rdx, [rdi - 8]
; Remove the canary value so the thread handle becomes invalid. We're about
; to deallocate this memory anyway, so it's optional, but maybe the address
; becomes valid again later in the program's life, who knows? Play it safe.
mov [rdi + 4], eax ; eax = 0 for all paths
; The munmap system call destroys mappings created by mmap.
; In this case that means deallocating the stack buffer.
; See: man 2 munmap
; munmap: rdi = addr: lowest address of region to unmap
; Our rdi is near the buffer's top, so we must subtract
sub rdi, (STACK_SIZE + GUARD_PAGE - 8)
; munmap: rsi = length: size of region starting from rdi
mov esi, (STACK_SIZE + GUARD_PAGE)
; munmap: rax = system call ID
mov al, SYS_MUNMAP
; munmap: rax = munmap(rdi, rsi)
syscall
; Check result of munmap: nonzero means failure
test eax, eax
jnz finish_return
; Check if caller gave a location (r8) to save the return value (rdx)
test r8, r8
jz finish_return ; caller doesn't care: gave NULL pointer
mov [r8], rdx
; Note: if munmap failed, the buffer is still there, so we
; can safely return an error without losing the return value.
finish_return:
ret
|