xref: /aosp_15_r20/external/llvm-libc/src/__support/threads/linux/thread.cpp (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/__support/threads/thread.h"
10 #include "config/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/macros/config.h"
18 #include "src/__support/threads/linux/futex_utils.h" // For FutexWordType
19 #include "src/errno/libc_errno.h"                    // For error macros
20 
21 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
22 #include <arm_acle.h>
23 #endif
24 
25 #include "hdr/fcntl_macros.h"
26 #include <linux/param.h> // For EXEC_PAGESIZE.
27 #include <linux/prctl.h> // For PR_SET_NAME
28 #include <linux/sched.h> // For CLONE_* flags.
29 #include <stdint.h>
30 #include <sys/mman.h>    // For PROT_* and MAP_* definitions.
31 #include <sys/syscall.h> // For syscall numbers.
32 
33 namespace LIBC_NAMESPACE_DECL {
34 
35 #ifdef SYS_mmap2
36 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
37 #elif defined(SYS_mmap)
38 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
39 #else
40 #error "mmap or mmap2 syscalls not available."
41 #endif
42 
43 static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator
44 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
45 static constexpr unsigned CLONE_SYSCALL_FLAGS =
46     CLONE_VM        // Share the memory space with the parent.
47     | CLONE_FS      // Share the file system with the parent.
48     | CLONE_FILES   // Share the files with the parent.
49     | CLONE_SIGHAND // Share the signal handlers with the parent.
50     | CLONE_THREAD  // Same thread group as the parent.
51     | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
52                     // values
53     | CLONE_PARENT_SETTID  // Set child thread ID in |ptid| of the parent.
54     | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
55                            // wake the joining thread.
56     | CLONE_SETTLS;        // Setup the thread pointer of the new thread.
57 
58 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
59 #define CLONE_RESULT_REGISTER "x0"
60 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
61 #define CLONE_RESULT_REGISTER "t0"
62 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
63 #define CLONE_RESULT_REGISTER "rax"
64 #else
65 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
66 #endif
67 
add_no_overflow(size_t lhs,size_t rhs)68 static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) {
69   if (lhs > SIZE_MAX - rhs)
70     return Error{EINVAL};
71   if (rhs > SIZE_MAX - lhs)
72     return Error{EINVAL};
73   return lhs + rhs;
74 }
75 
round_to_page(size_t v)76 static constexpr ErrorOr<size_t> round_to_page(size_t v) {
77   auto vp_or_err = add_no_overflow(v, EXEC_PAGESIZE - 1);
78   if (!vp_or_err)
79     return vp_or_err;
80 
81   return vp_or_err.value() & -EXEC_PAGESIZE;
82 }
83 
alloc_stack(size_t stacksize,size_t guardsize)84 LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) {
85 
86   // Guard needs to be mapped with PROT_NONE
87   int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE;
88   auto size_or_err = add_no_overflow(stacksize, guardsize);
89   if (!size_or_err)
90     return Error{int(size_or_err.error())};
91   size_t size = size_or_err.value();
92 
93   // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
94   // future-proof.
95   long mmap_result = LIBC_NAMESPACE::syscall_impl<long>(
96       MMAP_SYSCALL_NUMBER,
97       0, // No special address
98       size, prot,
99       MAP_ANONYMOUS | MAP_PRIVATE, // Process private.
100       -1,                          // Not backed by any file
101       0                            // No offset
102   );
103   if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
104     return Error{int(-mmap_result)};
105 
106   if (guardsize) {
107     // Give read/write permissions to actual stack.
108     // TODO: We are assuming stack growsdown here.
109     long result = LIBC_NAMESPACE::syscall_impl<long>(
110         SYS_mprotect, mmap_result + guardsize, stacksize,
111         PROT_READ | PROT_WRITE);
112 
113     if (result != 0)
114       return Error{int(-result)};
115   }
116   mmap_result += guardsize;
117   return reinterpret_cast<void *>(mmap_result);
118 }
119 
120 // This must always be inlined as we may be freeing the calling threads stack in
121 // which case a normal return from the top the stack would cause an invalid
122 // memory read.
123 [[gnu::always_inline]] LIBC_INLINE void
free_stack(void * stack,size_t stacksize,size_t guardsize)124 free_stack(void *stack, size_t stacksize, size_t guardsize) {
125   uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
126   stackaddr -= guardsize;
127   stack = reinterpret_cast<void *>(stackaddr);
128   LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, stack, stacksize + guardsize);
129 }
130 
131 struct Thread;
132 
133 // We align the start args to 16-byte boundary as we adjust the allocated
134 // stack memory with its size. We want the adjusted address to be at a
135 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
136 // If different architecture in future requires higher alignment, then we
137 // can add a platform specific alignment spec.
138 struct alignas(STACK_ALIGNMENT) StartArgs {
139   ThreadAttributes *thread_attrib;
140   ThreadRunner runner;
141   void *arg;
142 };
143 
144 // This must always be inlined as we may be freeing the calling threads stack in
145 // which case a normal return from the top the stack would cause an invalid
146 // memory read.
147 [[gnu::always_inline]] LIBC_INLINE void
cleanup_thread_resources(ThreadAttributes * attrib)148 cleanup_thread_resources(ThreadAttributes *attrib) {
149   // Cleanup the TLS before the stack as the TLS information is stored on
150   // the stack.
151   cleanup_tls(attrib->tls, attrib->tls_size);
152   if (attrib->owned_stack)
153     free_stack(attrib->stack, attrib->stacksize, attrib->guardsize);
154 }
155 
get_start_args_addr()156 [[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() {
157 // NOTE: For __builtin_frame_address to work reliably across compilers,
158 // architectures and various optimization levels, the TU including this file
159 // should be compiled with -fno-omit-frame-pointer.
160 #ifdef LIBC_TARGET_ARCH_IS_X86_64
161   return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
162          // The x86_64 call instruction pushes resume address on to the stack.
163          // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
164          // on to the stack. So, we have to step past two 64-bit values to get
165          // to the start args.
166          + sizeof(uintptr_t) * 2;
167 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
168   // The frame pointer after cloning the new thread in the Thread::run method
169   // is set to the stack pointer where start args are stored. So, we fetch
170   // from there.
171   return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
172 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
173   // The current frame pointer is the previous stack pointer where the start
174   // args are stored.
175   return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
176 #endif
177 }
178 
start_thread()179 [[gnu::noinline]] void start_thread() {
180   auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
181   auto *attrib = start_args->thread_attrib;
182   self.attrib = attrib;
183   self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr();
184 
185   if (attrib->style == ThreadStyle::POSIX) {
186     attrib->retval.posix_retval =
187         start_args->runner.posix_runner(start_args->arg);
188     thread_exit(ThreadReturnValue(attrib->retval.posix_retval),
189                 ThreadStyle::POSIX);
190   } else {
191     attrib->retval.stdc_retval =
192         start_args->runner.stdc_runner(start_args->arg);
193     thread_exit(ThreadReturnValue(attrib->retval.stdc_retval),
194                 ThreadStyle::STDC);
195   }
196 }
197 
run(ThreadStyle style,ThreadRunner runner,void * arg,void * stack,size_t stacksize,size_t guardsize,bool detached)198 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
199                 size_t stacksize, size_t guardsize, bool detached) {
200   bool owned_stack = false;
201   if (stack == nullptr) {
202     // TODO: Should we return EINVAL here? Should we have a generic concept of a
203     //       minimum stacksize (like 16384 for pthread).
204     if (stacksize == 0)
205       stacksize = DEFAULT_STACKSIZE;
206     // Roundup stacksize/guardsize to page size.
207     // TODO: Should be also add sizeof(ThreadAttribute) and other internal
208     //       meta data?
209     auto round_or_err = round_to_page(guardsize);
210     if (!round_or_err)
211       return round_or_err.error();
212     guardsize = round_or_err.value();
213 
214     round_or_err = round_to_page(stacksize);
215     if (!round_or_err)
216       return round_or_err.error();
217 
218     stacksize = round_or_err.value();
219     auto alloc = alloc_stack(stacksize, guardsize);
220     if (!alloc)
221       return alloc.error();
222     else
223       stack = alloc.value();
224     owned_stack = true;
225   }
226 
227   // Validate that stack/stacksize are validly aligned.
228   uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
229   if ((stackaddr % STACK_ALIGNMENT != 0) ||
230       ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) {
231     if (owned_stack)
232       free_stack(stack, stacksize, guardsize);
233     return EINVAL;
234   }
235 
236   TLSDescriptor tls;
237   init_tls(tls);
238 
239   // When the new thread is spawned by the kernel, the new thread gets the
240   // stack we pass to the clone syscall. However, this stack is empty and does
241   // not have any local vars present in this function. Hence, one cannot
242   // pass arguments to the thread start function, or use any local vars from
243   // here. So, we pack them into the new stack from where the thread can sniff
244   // them out.
245   //
246   // Likewise, the actual thread state information is also stored on the
247   // stack memory.
248 
249   static constexpr size_t INTERNAL_STACK_DATA_SIZE =
250       sizeof(StartArgs) + sizeof(ThreadAttributes) + sizeof(Futex);
251 
252   // This is pretty arbitrary, but at the moment we don't adjust user provided
253   // stacksize (or default) to account for this data as its assumed minimal. If
254   // this assert starts failing we probably should. Likewise if we can't bound
255   // this we may overflow when we subtract it from the top of the stack.
256   static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE);
257 
258   // TODO: We are assuming stack growsdown here.
259   auto adjusted_stack_or_err =
260       add_no_overflow(reinterpret_cast<uintptr_t>(stack), stacksize);
261   if (!adjusted_stack_or_err) {
262     cleanup_tls(tls.addr, tls.size);
263     if (owned_stack)
264       free_stack(stack, stacksize, guardsize);
265     return adjusted_stack_or_err.error();
266   }
267 
268   uintptr_t adjusted_stack =
269       adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE;
270   adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
271 
272   auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
273 
274   attrib =
275       reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
276   attrib->style = style;
277   attrib->detach_state =
278       uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
279   attrib->stack = stack;
280   attrib->stacksize = stacksize;
281   attrib->guardsize = guardsize;
282   attrib->owned_stack = owned_stack;
283   attrib->tls = tls.addr;
284   attrib->tls_size = tls.size;
285 
286   start_args->thread_attrib = attrib;
287   start_args->runner = runner;
288   start_args->arg = arg;
289 
290   auto clear_tid = reinterpret_cast<Futex *>(
291       adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
292   clear_tid->set(CLEAR_TID_VALUE);
293   attrib->platform_data = clear_tid;
294 
295   // The clone syscall takes arguments in an architecture specific order.
296   // Also, we want the result of the syscall to be in a register as the child
297   // thread gets a completely different stack after it is created. The stack
298   // variables from this function will not be availalbe to the child thread.
299 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
300   long register clone_result asm(CLONE_RESULT_REGISTER);
301   clone_result = LIBC_NAMESPACE::syscall_impl<long>(
302       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
303       &attrib->tid,    // The address where the child tid is written
304       &clear_tid->val, // The futex where the child thread status is signalled
305       tls.tp           // The thread pointer value for the new thread.
306   );
307 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) ||                                  \
308     defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
309   long register clone_result asm(CLONE_RESULT_REGISTER);
310   clone_result = LIBC_NAMESPACE::syscall_impl<long>(
311       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
312       &attrib->tid,   // The address where the child tid is written
313       tls.tp,         // The thread pointer value for the new thread.
314       &clear_tid->val // The futex where the child thread status is signalled
315   );
316 #else
317 #error "Unsupported architecture for the clone syscall."
318 #endif
319 
320   if (clone_result == 0) {
321 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
322     // We set the frame pointer to be the same as the "sp" so that start args
323     // can be sniffed out from start_thread.
324 #ifdef __clang__
325     // GCC does not currently implement __arm_wsr64/__arm_rsr64.
326     __arm_wsr64("x29", __arm_rsr64("sp"));
327 #else
328     asm volatile("mov x29, sp");
329 #endif
330 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
331     asm volatile("mv fp, sp");
332 #endif
333     start_thread();
334   } else if (clone_result < 0) {
335     cleanup_thread_resources(attrib);
336     return static_cast<int>(-clone_result);
337   }
338 
339   return 0;
340 }
341 
join(ThreadReturnValue & retval)342 int Thread::join(ThreadReturnValue &retval) {
343   wait();
344 
345   if (attrib->style == ThreadStyle::POSIX)
346     retval.posix_retval = attrib->retval.posix_retval;
347   else
348     retval.stdc_retval = attrib->retval.stdc_retval;
349 
350   cleanup_thread_resources(attrib);
351 
352   return 0;
353 }
354 
detach()355 int Thread::detach() {
356   uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
357   if (attrib->detach_state.compare_exchange_strong(
358           joinable_state, uint32_t(DetachState::DETACHED))) {
359     return int(DetachType::SIMPLE);
360   }
361 
362   // If the thread was already detached, then the detach method should not
363   // be called at all. If the thread is exiting, then we wait for it to exit
364   // and free up resources.
365   wait();
366 
367   cleanup_thread_resources(attrib);
368 
369   return int(DetachType::CLEANUP);
370 }
371 
wait()372 void Thread::wait() {
373   // The kernel should set the value at the clear tid address to zero.
374   // If not, it is a spurious wake and we should continue to wait on
375   // the futex.
376   auto *clear_tid = reinterpret_cast<Futex *>(attrib->platform_data);
377   // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
378   // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
379   while (clear_tid->load() != 0)
380     clear_tid->wait(CLEAR_TID_VALUE, cpp::nullopt, true);
381 }
382 
operator ==(const Thread & thread) const383 bool Thread::operator==(const Thread &thread) const {
384   return attrib->tid == thread.attrib->tid;
385 }
386 
387 static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/");
388 static constexpr size_t THREAD_NAME_PATH_SIZE =
389     THREAD_NAME_PATH_PREFIX.size() +
390     IntegerToString<int>::buffer_size() + // Size of tid
391     1 +                                   // For '/' character
392     5; // For the file name "comm" and the nullterminator.
393 
construct_thread_name_file_path(cpp::StringStream & stream,int tid)394 static void construct_thread_name_file_path(cpp::StringStream &stream,
395                                             int tid) {
396   stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm")
397          << cpp::StringStream::ENDS;
398 }
399 
set_name(const cpp::string_view & name)400 int Thread::set_name(const cpp::string_view &name) {
401   if (name.size() >= NAME_SIZE_MAX)
402     return ERANGE;
403 
404   if (*this == self) {
405     // If we are setting the name of the current thread, then we can
406     // use the syscall to set the name.
407     int retval =
408         LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_SET_NAME, name.data());
409     if (retval < 0)
410       return -retval;
411     else
412       return 0;
413   }
414 
415   char path_name_buffer[THREAD_NAME_PATH_SIZE];
416   cpp::StringStream path_stream(path_name_buffer);
417   construct_thread_name_file_path(path_stream, attrib->tid);
418 #ifdef SYS_open
419   int fd =
420       LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDWR);
421 #else
422   int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
423                                              path_name_buffer, O_RDWR);
424 #endif
425   if (fd < 0)
426     return -fd;
427 
428   int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_write, fd, name.data(),
429                                                  name.size());
430   LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
431 
432   if (retval < 0)
433     return -retval;
434   else if (retval != int(name.size()))
435     return EIO;
436   else
437     return 0;
438 }
439 
get_name(cpp::StringStream & name) const440 int Thread::get_name(cpp::StringStream &name) const {
441   if (name.bufsize() < NAME_SIZE_MAX)
442     return ERANGE;
443 
444   char name_buffer[NAME_SIZE_MAX];
445 
446   if (*this == self) {
447     // If we are getting the name of the current thread, then we can
448     // use the syscall to get the name.
449     int retval =
450         LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_GET_NAME, name_buffer);
451     if (retval < 0)
452       return -retval;
453     name << name_buffer << cpp::StringStream::ENDS;
454     return 0;
455   }
456 
457   char path_name_buffer[THREAD_NAME_PATH_SIZE];
458   cpp::StringStream path_stream(path_name_buffer);
459   construct_thread_name_file_path(path_stream, attrib->tid);
460 #ifdef SYS_open
461   int fd =
462       LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDONLY);
463 #else
464   int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
465                                              path_name_buffer, O_RDONLY);
466 #endif
467   if (fd < 0)
468     return -fd;
469 
470   int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_read, fd, name_buffer,
471                                                  NAME_SIZE_MAX);
472   LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
473   if (retval < 0)
474     return -retval;
475   if (retval == NAME_SIZE_MAX)
476     return ERANGE;
477   if (name_buffer[retval - 1] == '\n')
478     name_buffer[retval - 1] = '\0';
479   else
480     name_buffer[retval] = '\0';
481   name << name_buffer << cpp::StringStream::ENDS;
482   return 0;
483 }
484 
thread_exit(ThreadReturnValue retval,ThreadStyle style)485 void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
486   auto attrib = self.attrib;
487 
488   // The very first thing we do is to call the thread's atexit callbacks.
489   // These callbacks could be the ones registered by the language runtimes,
490   // for example, the destructors of thread local objects. They can also
491   // be destructors of the TSS objects set using API like pthread_setspecific.
492   // NOTE: We cannot call the atexit callbacks as part of the
493   // cleanup_thread_resources function as that function can be called from a
494   // different thread. The destructors of thread local and TSS objects should
495   // be called by the thread which owns them.
496   internal::call_atexit_callbacks(attrib);
497 
498   uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
499   if (!attrib->detach_state.compare_exchange_strong(
500           joinable_state, uint32_t(DetachState::EXITING))) {
501     // Thread is detached so cleanup the resources.
502     cleanup_thread_resources(attrib);
503 
504     // Set the CLEAR_TID address to nullptr to prevent the kernel
505     // from signalling at a non-existent futex location.
506     LIBC_NAMESPACE::syscall_impl<long>(SYS_set_tid_address, 0);
507     // Return value for detached thread should be unused. We need to avoid
508     // referencing `style` or `retval.*` because they may be stored on the stack
509     // and we have deallocated our stack!
510     LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 0);
511     __builtin_unreachable();
512   }
513 
514   if (style == ThreadStyle::POSIX)
515     LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.posix_retval);
516   else
517     LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.stdc_retval);
518   __builtin_unreachable();
519 }
520 
521 } // namespace LIBC_NAMESPACE_DECL
522