From 3154492dcf5e8176dff3df2b9caa2c50f898022d Mon Sep 17 00:00:00 2001 From: Jonas 'Sortie' Termansen Date: Sat, 10 Dec 2022 13:09:09 +0100 Subject: [PATCH] Fix deadlocks and lost wakeups in threading primitives. The futex and kutex implementations used the same linked list for waiting, however the futex implementation used kutexs and the same thread could be in the same list twice in the case of contention. This case corrupted the wait lists and led to deadlocks and lost wakeups. This change fixes the problem by having separate data structures for futexes and kutexes. Mutexes contended by multiple threads could lead to lost wakeups since only one contended thread was awoken and subsequent unlocks are unaware of the unawakened contended threads. This change fixes the problem with a temporary solution of waking all the contended threads until a better design is implemented. Additional details are tweaked to be more reliable and simpler. --- kernel/include/sortix/kernel/thread.h | 6 ++ kernel/kthread.cpp | 111 ++++++++++++++++---------- kernel/scheduler.cpp | 40 +++++----- kernel/thread.cpp | 4 + libc/pthread/pthread_mutex_unlock.c | 10 ++- 5 files changed, 106 insertions(+), 65 deletions(-) diff --git a/kernel/include/sortix/kernel/thread.h b/kernel/include/sortix/kernel/thread.h index 713cf78a..e6ef5303 100644 --- a/kernel/include/sortix/kernel/thread.h +++ b/kernel/include/sortix/kernel/thread.h @@ -62,6 +62,8 @@ enum yield_operation YIELD_OPERATION_NONE, YIELD_OPERATION_WAIT_FUTEX, YIELD_OPERATION_WAIT_FUTEX_SIGNAL, + YIELD_OPERATION_WAIT_KUTEX, + YIELD_OPERATION_WAIT_KUTEX_SIGNAL, }; class Thread @@ -99,10 +101,14 @@ public: Clock execute_clock; Clock system_clock; uintptr_t futex_address; + uintptr_t kutex_address; bool futex_woken; + bool kutex_woken; bool timer_woken; Thread* futex_prev_waiting; Thread* futex_next_waiting; + Thread* kutex_prev_waiting; + Thread* kutex_next_waiting; enum yield_operation yield_operation; public: diff --git a/kernel/kthread.cpp b/kernel/kthread.cpp index 20bec6d9..c82d7900 100644 --- a/kernel/kthread.cpp +++ b/kernel/kthread.cpp @@ -33,6 +33,10 @@ namespace Sortix { +static kthread_mutex_t kutex_lock = KTHREAD_MUTEX_INITIALIZER; +static Thread* kutex_first_waiting; +static Thread* kutex_last_waiting; + void kthread_yield() { Thread* thread = CurrentThread(); @@ -66,6 +70,28 @@ void kthread_wait_futex_signal() #endif } +static void kthread_wait_kutex() +{ + Thread* thread = CurrentThread(); + thread->yield_operation = YIELD_OPERATION_WAIT_KUTEX; +#if defined(__i386__) || defined(__x86_64__) + asm volatile ("int $129"); +#else +#error "kthread_wait_kutex needs to be implemented" +#endif +} + +static void kthread_wait_kutex_signal() +{ + Thread* thread = CurrentThread(); + thread->yield_operation = YIELD_OPERATION_WAIT_KUTEX_SIGNAL; +#if defined(__i386__) || defined(__x86_64__) + asm volatile ("int $129"); +#else +#error "kthread_wait_kutex needs to be implemented" +#endif +} + void kthread_wake_futex(Thread* thread) { Scheduler::SetThreadState(thread, ThreadState::RUNNABLE, true); @@ -96,68 +122,61 @@ static bool kutex_wait(int* address, int value, bool signal) { // TODO: Use a per-mutex wait queue instead. Thread* thread = CurrentThread(); - Process* kernel_process = Scheduler::GetKernelProcess(); bool was_enabled = Interrupt::SetEnabled(false); - kthread_spinlock_lock(&kernel_process->futex_lock); - thread->futex_address = (uintptr_t) address; - thread->futex_woken = false; - thread->futex_prev_waiting = kernel_process->futex_last_waiting; - thread->futex_next_waiting = NULL; - (kernel_process->futex_last_waiting ? - kernel_process->futex_last_waiting->futex_next_waiting : - kernel_process->futex_first_waiting) = thread; - kernel_process->futex_last_waiting = thread; - kthread_spinlock_unlock(&kernel_process->futex_lock); + kthread_spinlock_lock(&kutex_lock); + thread->kutex_address = (uintptr_t) address; + thread->kutex_woken = false; + thread->kutex_prev_waiting = kutex_last_waiting; + thread->kutex_next_waiting = NULL; + (kutex_last_waiting ? + kutex_last_waiting->kutex_next_waiting : + kutex_first_waiting) = thread; + kutex_last_waiting = thread; + kthread_spinlock_unlock(&kutex_lock); Interrupt::SetEnabled(was_enabled); thread->timer_woken = false; - bool result = true; if ( __atomic_load_n(address, __ATOMIC_SEQ_CST) == value ) { if ( signal ) - kthread_wait_futex(); + kthread_wait_kutex_signal(); else - kthread_wait_futex_signal(); + kthread_wait_kutex(); } Interrupt::SetEnabled(false); - kthread_spinlock_lock(&kernel_process->futex_lock); - if ( result && !thread->futex_woken ) - { - if ( signal && Signal::IsPending() ) - result = false; - } - thread->futex_address = 0; - thread->futex_woken = false; - (thread->futex_prev_waiting ? - thread->futex_prev_waiting->futex_next_waiting : - kernel_process->futex_first_waiting) = thread->futex_next_waiting; - (thread->futex_next_waiting ? - thread->futex_next_waiting->futex_prev_waiting : - kernel_process->futex_last_waiting) = thread->futex_prev_waiting; - thread->futex_prev_waiting = NULL; - thread->futex_next_waiting = NULL; - kthread_spinlock_unlock(&kernel_process->futex_lock); + kthread_spinlock_lock(&kutex_lock); + bool result = thread->kutex_woken || !signal || !Signal::IsPending(); + thread->kutex_address = 0; + thread->kutex_woken = false; + (thread->kutex_prev_waiting ? + thread->kutex_prev_waiting->kutex_next_waiting : + kutex_first_waiting) = thread->kutex_next_waiting; + (thread->kutex_next_waiting ? + thread->kutex_next_waiting->kutex_prev_waiting : + kutex_last_waiting) = thread->kutex_prev_waiting; + thread->kutex_prev_waiting = NULL; + thread->kutex_next_waiting = NULL; + kthread_spinlock_unlock(&kutex_lock); Interrupt::SetEnabled(was_enabled); return result; } static void kutex_wake(int* address, int count) { - Process* kernel_process = Scheduler::GetKernelProcess(); bool was_enabled = Interrupt::SetEnabled(false); - kthread_spinlock_lock(&kernel_process->futex_lock); - for ( Thread* waiter = kernel_process->futex_first_waiting; + kthread_spinlock_lock(&kutex_lock); + for ( Thread* waiter = kutex_first_waiting; 0 < count && waiter; - waiter = waiter->futex_next_waiting ) + waiter = waiter->kutex_next_waiting ) { - if ( waiter->futex_address == (uintptr_t) address ) + if ( waiter->kutex_address == (uintptr_t) address ) { - waiter->futex_woken = true; + waiter->kutex_woken = true; kthread_wake_futex(waiter); if ( count != INT_MAX ) count--; } } - kthread_spinlock_unlock(&kernel_process->futex_lock); + kthread_spinlock_unlock(&kutex_lock); Interrupt::SetEnabled(was_enabled); } @@ -214,8 +233,13 @@ bool kthread_mutex_lock_signal(kthread_mutex_t* mutex) void kthread_mutex_unlock(kthread_mutex_t* mutex) { + // TODO: Multiple threads could have caused the contention and this wakes + // only one such thread, but then the mutex returns to normal and the + // subsequent unlocks doesn't wake the other contended threads. Work + // around this bug by waking all of them instead, but it's better to + // instead count the number of waiting threads. if ( __atomic_exchange_n(mutex, UNLOCKED, __ATOMIC_SEQ_CST) == CONTENDED ) - kutex_wake(mutex, 1); + kutex_wake(mutex, INT_MAX); } // The kernel thread needs another stack to delete its own stack. @@ -271,8 +295,8 @@ void kthread_cond_wait(kthread_cond_t* cond, kthread_mutex_t* mutex) cond->first = &elem; cond->last = &elem; kthread_mutex_unlock(mutex); - while ( !__atomic_load_n(&elem.woken, __ATOMIC_SEQ_CST) && - kutex_wait(&elem.woken, 0, false) < 0 ); + while ( !__atomic_load_n(&elem.woken, __ATOMIC_SEQ_CST) ) + kutex_wait(&elem.woken, 0, false); kthread_mutex_lock(mutex); if ( !__atomic_load_n(&elem.woken, __ATOMIC_SEQ_CST) ) { @@ -302,10 +326,9 @@ bool kthread_cond_wait_signal(kthread_cond_t* cond, kthread_mutex_t* mutex) cond->last = &elem; kthread_mutex_unlock(mutex); bool result = true; - while ( !__atomic_load_n(&elem.woken, __ATOMIC_SEQ_CST) && - kutex_wait(&elem.woken, 0, false) < 0 ) + while ( !__atomic_load_n(&elem.woken, __ATOMIC_SEQ_CST) ) { - if ( Signal::IsPending() ) + if ( !kutex_wait(&elem.woken, 0, true) ) { result = false; break; diff --git a/kernel/scheduler.cpp b/kernel/scheduler.cpp index 5a8167e5..06a4ec61 100644 --- a/kernel/scheduler.cpp +++ b/kernel/scheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2021 Jonas 'Sortie' Termansen. + * Copyright (c) 2011-2015, 2021-2022 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -364,27 +364,29 @@ void Switch(struct interrupt_context* intctx) void InterruptYieldCPU(struct interrupt_context* intctx, void* /*user*/) { - if ( current_thread->yield_operation == YIELD_OPERATION_NONE ) - RealSwitch(intctx, true); - else if ( current_thread->yield_operation == YIELD_OPERATION_WAIT_FUTEX ) + bool wait = false; + switch ( current_thread->yield_operation ) { - if ( !current_thread->futex_woken && - !current_thread->timer_woken && - !asm_signal_is_pending ) - { - SetThreadState(current_thread, ThreadState::FUTEX_WAITING); - RealSwitch(intctx, false); - } + case YIELD_OPERATION_NONE: RealSwitch(intctx, true); return; + case YIELD_OPERATION_WAIT_FUTEX: + wait = !current_thread->futex_woken && !current_thread->timer_woken; + break; + case YIELD_OPERATION_WAIT_FUTEX_SIGNAL: + wait = !current_thread->futex_woken && !current_thread->timer_woken && + !asm_signal_is_pending; + break; + case YIELD_OPERATION_WAIT_KUTEX: + wait = !current_thread->kutex_woken && !current_thread->timer_woken; + break; + case YIELD_OPERATION_WAIT_KUTEX_SIGNAL: + wait = !current_thread->kutex_woken && !current_thread->timer_woken && + !asm_signal_is_pending; + break; } - else if ( current_thread->yield_operation == - YIELD_OPERATION_WAIT_FUTEX_SIGNAL ) + if ( wait ) { - if ( !current_thread->futex_woken && - !current_thread->timer_woken ) - { - SetThreadState(current_thread, ThreadState::FUTEX_WAITING); - RealSwitch(intctx, false); - } + SetThreadState(current_thread, ThreadState::FUTEX_WAITING); + RealSwitch(intctx, false); } } diff --git a/kernel/thread.cpp b/kernel/thread.cpp index 2d9dda73..df83ad2c 100644 --- a/kernel/thread.cpp +++ b/kernel/thread.cpp @@ -84,9 +84,13 @@ Thread::Thread() // system_clock initialized in member constructor. Time::InitializeThreadClocks(this); futex_address = 0; + kutex_address = 0; futex_woken = false; + kutex_woken = false; futex_prev_waiting = NULL; futex_next_waiting = NULL; + kutex_prev_waiting = NULL; + kutex_next_waiting = NULL; yield_operation = YIELD_OPERATION_NONE; } diff --git a/libc/pthread/pthread_mutex_unlock.c b/libc/pthread/pthread_mutex_unlock.c index 0d78f0b5..0d591be4 100644 --- a/libc/pthread/pthread_mutex_unlock.c +++ b/libc/pthread/pthread_mutex_unlock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2014, 2021 Jonas 'Sortie' Termansen. + * Copyright (c) 2013, 2014, 2021, 2022 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,6 +19,7 @@ #include +#include #include #include @@ -33,8 +34,13 @@ int pthread_mutex_unlock(pthread_mutex_t* mutex) return 0; } mutex->owner = 0; + // TODO: Multiple threads could have caused the contention and this wakes + // only one such thread, but then the mutex returns to normal and the + // subsequent unlocks doesn't wake the other contended threads. Work + // around this bug by waking all of them instead, but it's better to + // instead count the number of waiting threads. if ( __atomic_exchange_n(&mutex->lock, UNLOCKED, __ATOMIC_SEQ_CST) == CONTENDED ) - futex(&mutex->lock, FUTEX_WAKE, 1, NULL); + futex(&mutex->lock, FUTEX_WAKE, INT_MAX, NULL); return 0; }