From ebce7b8e8350ab8799bd13aa54bdb2a562365442 Mon Sep 17 00:00:00 2001 From: fef Date: Thu, 4 Nov 2021 05:56:40 +0100 Subject: [PATCH] mutex: fix lock race conditions --- arch/x86/include/amd64/latom.h | 24 +++--- arch/x86/include/arch/atom.h | 53 +++++++----- arch/x86/include/i386/latom.h | 9 +- cmake/config.cmake | 2 + include/gay/cdefs.h | 24 ++++++ include/gay/config.h.in | 3 + include/gay/mutex.h | 15 +++- include/gay/systm.h | 20 +++++ kernel/mutex.c | 150 ++++++++++++++++++++------------- 9 files changed, 202 insertions(+), 98 deletions(-) diff --git a/arch/x86/include/amd64/latom.h b/arch/x86/include/amd64/latom.h index e9c172e..90f7751 100644 --- a/arch/x86/include/amd64/latom.h +++ b/arch/x86/include/amd64/latom.h @@ -12,27 +12,27 @@ #error "__LP64__ must be defined on amd64" #endif +static inline void latom_init(latom_t *latom, long val) +{ + latom->_value = val; +} + static inline long latom_read(const latom_t *latom) { return latom->_value; } -static inline long latom_write(latom_t *latom, long val) +static inline long latom_xchg(latom_t *latom, long val) { - long rax; - + /* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */ __asm__ volatile( -" movq (%2), %0 \n" /* rax = atom->_value */ -"1: lock \n" -" cmpxchgq %1, (%2) \n" /* if (latom->_value == rax) latom->_value = val */ -" pause \n" /* intel says you're supposed to do this in spin loops */ -" jne 1b \n" /* else goto 1 (rax updated to new latom->_value) */ - : "=a"(rax) - : "r"(val), "r"(&latom->_value) - : "cc", "memory" +" xchgq %0, (%1) \n" + : "+r"(val) + : "r"(&latom->_value) + : "memory" ); - return rax; + return val; } static inline long latom_cmp_xchg(latom_t *latom, long compare, long val) diff --git a/arch/x86/include/arch/atom.h b/arch/x86/include/arch/atom.h index 74a0a98..37ce1d6 100644 --- a/arch/x86/include/arch/atom.h +++ b/arch/x86/include/arch/atom.h @@ -6,11 +6,24 @@ #include #include +/** + * @brief Initialize an atom non-atomically. + * This function is **only** for initializing an atom, you should never use it + * in code that requires actual atomicity (i.e. everything except initializers). + * + * @param atom Atom to initialize + * @param val Initial value for the atom + */ +static inline void atom_init(atom_t *atom, int val) +{ + atom->_value = val; +} + /** * @brief Read an atom's current value. * You usually shouldn't need this function because all the other atomic * primitives return the value before the operation, and we are only really - * interested in how values *compare* between operations. + * interested in how values *compare* between operations in most cases. * Don't use `atom_read()` followed by another atomic operation, it defeats the * whole purpose of using atomics in the first place. * @@ -29,22 +42,17 @@ static inline int atom_read(const atom_t *atom) * @param val New value * @return The value of `atom` *before* the operation */ -static inline int atom_write(atom_t *atom, int val) +static inline int atom_xchg(atom_t *atom, int val) { - int eax; - + /* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */ __asm__ volatile( -" movl (%2), %0 \n" /* eax = atom->_value */ -"1: lock \n" -" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */ -" pause \n" /* intel says you're supposed to do this in spin loops */ -" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */ - : "=a"(eax) - : "r"(val), "r"(&atom->_value) - : "cc", "memory" +" xchgl %0, (%1) \n" + : "+r"(val) + : "r"(&atom->_value) + : "memory" ); - return eax; + return val; } /** @@ -164,16 +172,15 @@ static inline bool atom_dec(atom_t *atom) */ static inline int atom_and(atom_t *atom, int val) { - int eax; + int eax = atom->_value; __asm__ volatile( -" movl (%2), %0 \n" /* eax = atom->_value */ "1: andl %0, %1 \n" /* val &= eax */ " lock \n" " cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */ " pause \n" /* intel says you're supposed to do this in spin loops */ " jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */ - : "=a"(eax), "+r"(val) + : "+a"(eax), "+r"(val) : "r"(&atom->_value) : "cc", "memory" ); @@ -190,16 +197,15 @@ static inline int atom_and(atom_t *atom, int val) */ static inline int atom_or(atom_t *atom, int val) { - int eax; + int eax = atom->_value; __asm__ volatile( -" movl (%2), %0 \n" /* eax = atom->_value */ "1: orl %0, %1 \n" /* val |= eax */ " lock \n" " cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */ " pause \n" /* intel says you're supposed to do this in spin loops */ " jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */ - : "=a"(eax), "+r"(val) + : "+a"(eax), "+r"(val) : "r"(&atom->_value) : "cc", "memory" ); @@ -276,14 +282,19 @@ static inline bool atom_clr_bit(atom_t *atom, int pos) #error "sizeof(long) is expected equal sizeof(void *) on x86" #endif +static inline void patom_init(patom_t *patom, void *val) +{ + patom->_ptr = val; +} + static inline void *patom_read(const patom_t *patom) { return patom->_ptr; } -static __always_inline void *patom_write(patom_t *patom, void *val) +static __always_inline void *patom_xchg(patom_t *patom, void *val) { - return (void *)latom_write((latom_t *)patom, (long)val); + return (void *)latom_xchg((latom_t *)patom, (long)val); } static __always_inline void *patom_cmp_xchg(patom_t *patom, void *compare, void *val) diff --git a/arch/x86/include/i386/latom.h b/arch/x86/include/i386/latom.h index 8075dee..506777a 100644 --- a/arch/x86/include/i386/latom.h +++ b/arch/x86/include/i386/latom.h @@ -16,14 +16,19 @@ #error "__ILP32__ must be defined on i386" #endif +static inline void latom_init(latom_t *latom, long val) +{ + latom->_value = val; +} + static inline long latom_read(const latom_t *latom) { return latom->_value; } -static __always_inline long latom_write(latom_t *latom, long val) +static __always_inline long latom_xchg(latom_t *latom, long val) { - return atom_write((atom_t *)latom, val); + return atom_xchg((atom_t *)latom, val); } static __always_inline long latom_cmp_xchg(latom_t *latom, long compare, long val) diff --git a/cmake/config.cmake b/cmake/config.cmake index 0fc1af3..1326fd5 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -20,6 +20,8 @@ set(CFG_MAX_CPU "64" CACHE STRING "Maximum number of logical processors") option(CFG_DEBUG_IRQ "Debug IRQs" ${DEBUG}) +option(CFG_DEBUG_MTX "Debug mutexes" ${DEBUG}) + option(CFG_DEBUG_CLIST "Sanitize clist operations" ${DEBUG}) option(CFG_DEBUG_PAGE_ALLOCS "Debug page frame allocations" OFF) diff --git a/include/gay/cdefs.h b/include/gay/cdefs.h index 6eaeb1c..9f55fe5 100644 --- a/include/gay/cdefs.h +++ b/include/gay/cdefs.h @@ -106,6 +106,30 @@ #define typeof(expr) __typeof(expr) #endif +/* + * These are hints for clang's branch optimizer which will try to arrange the + * code to yield the best performance when a condition is true or false. + * + * - Use them sparingly and only in performance critical places because they + * come with a sometimes very significant code size overhead due to branches + * being rearranged and aligned + * - Only use them if you know *for sure* that a particular branch is *very* + * unlikely to be hit, for example when + * + * Use it sparingly and only in performance critical places because the overhead + * from rearranging and aligning the individual instructions can quickly make + * the kernel image too big. + * Also, only use it if you actually know *for sure* that a particular branch + * is *very* unlikely to be hit. Most modern CPUs have become pretty good at + * speculative execution (~~and intel even managed to make it buggy~~) and + * probably do a better job optimizing themselves at runtime than you can here. + * Finally, you don't need this when doing resource allocation failure checks, + * because those functions are annotated with __malloc_like anyway. + */ + +#define __predict_true(x) __builtin_expect(x, 1) +#define __predict_false(x) __builtin_expect(x, 0) + /* * This file is part of GayBSD. * Copyright (c) 2021 fef . diff --git a/include/gay/config.h.in b/include/gay/config.h.in index c75b4b9..9e735ec 100644 --- a/include/gay/config.h.in +++ b/include/gay/config.h.in @@ -40,6 +40,9 @@ /** @brief Debug IRQs */ #cmakedefine01 CFG_DEBUG_IRQ +/** @brief Debug mutexes */ +#cmakedefine01 CFG_DEBUG_MTX + /** @brief Sanitize clist operations */ #cmakedefine01 CFG_DEBUG_CLIST diff --git a/include/gay/mutex.h b/include/gay/mutex.h index e3c7fb0..6ccbce6 100644 --- a/include/gay/mutex.h +++ b/include/gay/mutex.h @@ -19,7 +19,7 @@ typedef struct { void spin_init(spin_t *spin); void spin_lock(spin_t *spin); -int spin_trylock(spin_t *spin); +bool spin_trylock(spin_t *spin); void spin_unlock(spin_t *spin); struct lock_waiter { @@ -27,6 +27,15 @@ struct lock_waiter { struct task *task; }; +/** + * @brief Yielding mutual exclusion lock. + * + * Several rules apply when using a mutex: + * + * - No access from irq context or within a critical section + * - Must be unlocked from the same thread that locked it + * - No more than one consecutive unlock allowed + */ struct mtx { atom_t lock; /* 1 = free, 0 = locked, < 0 = locked and other threads are waiting */ spin_t wait_queue_lock; @@ -34,7 +43,7 @@ struct mtx { }; #define MTX_DEFINE(name) { \ - .lock = ATOM_DEFINE(0), \ + .lock = ATOM_DEFINE(1), \ .wait_queue_lock = SPIN_DEFINE, \ .wait_queue = CLIST_DEFINE((name).wait_queue), \ } @@ -44,7 +53,7 @@ struct mtx { void mtx_init(struct mtx *mutex); void mtx_lock(struct mtx *mutex); void mtx_unlock(struct mtx *mutex); -int mtx_trylock(struct mtx *mutex); +bool mtx_trylock(struct mtx *mutex); struct sem { atom_t count; diff --git a/include/gay/systm.h b/include/gay/systm.h index 1600948..d1a3e88 100644 --- a/include/gay/systm.h +++ b/include/gay/systm.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include @@ -30,14 +31,31 @@ void panic(const char *fmt, ...) __noreturn __printflike(1, 2); #define KASSERT(x) do { } while (0) #endif +/** + * @brief Enter a critical section, meaning preemption gets disabled. + * Critical sections should be kept as short as possible for performance. + * Cannot be called from irq context because irqs always run atomically. + * + * @see `critical_leave()` + */ static inline void critical_enter(void) { + KASSERT(!in_irq()); + struct task *tsk = current; tsk->critnest++; } +/** + * @brief Leave a critical section. + * Cannot be called from irq context. + * + * @see `critical_enter()` + */ static inline void critical_leave(void) { + KASSERT(!in_irq()); + struct task *tsk = current; tsk->critnest--; @@ -46,6 +64,8 @@ static inline void critical_leave(void) static inline bool in_critical(void) { + if (in_irq()) + return true; return current->critnest > 0; } diff --git a/kernel/mutex.c b/kernel/mutex.c index 717edbb..74e2577 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -4,56 +4,58 @@ #include #include -#include -#include #include #include +#include +#include -#include +#if CFG_DEBUG_MTX +#define MTX_ASSERT(x) KASSERT(x) +#else +#define MTX_ASSERT(x) ({}) +#endif void spin_init(spin_t *spin) { - atom_write(&spin->lock, 0); + atom_init(&spin->lock, 0); } void spin_lock(spin_t *spin) { + MTX_ASSERT(in_critical()); + spin_loop { - if (atom_cmp_xchg(&spin->lock, 0, 1) == 0) + if (atom_xchg(&spin->lock, 1) == 0) break; } } -int spin_trylock(spin_t *spin) +bool spin_trylock(spin_t *spin) { - if (atom_cmp_xchg(&spin->lock, 0, 1) != 0) - return -EAGAIN; - return 0; + MTX_ASSERT(in_critical()); + + return atom_xchg(&spin->lock, 1) == 0; } void spin_unlock(spin_t *spin) { - atom_write(&spin->lock, 0); + MTX_ASSERT(in_critical()); + + atom_init(&spin->lock, 0); } void mtx_init(struct mtx *mtx) { - atom_write(&mtx->lock, 1); + atom_init(&mtx->lock, 1); spin_init(&mtx->wait_queue_lock); clist_init(&mtx->wait_queue); } void mtx_lock(struct mtx *mtx) { -# ifdef DEBUG - if (in_irq()) { - kprintf("mtx_lock() called from irq context!\n"); - spin_loop { - if (atom_cmp_xchg(&mtx->lock, 1, 0) == 1) - return; - } - } -# endif + MTX_ASSERT(!in_critical()); + + critical_enter(); /* * When the mutex is locked, its lock value goes to 0. @@ -61,79 +63,92 @@ void mtx_lock(struct mtx *mtx) * nonzero, meaning the lock value has become negative. */ if (atom_dec(&mtx->lock)) { - struct task *task = current; - /* - * It might not be the smartest idea to allocate this thing on - * the stack because it's gonna blow up if the task somehow dies - * before returning here. Let's see how this turns out. - */ + struct task *this_task = current; struct lock_waiter waiter = { - .task = task, + .task = this_task, }; spin_lock(&mtx->wait_queue_lock); - clist_add(&mtx->wait_queue, &waiter.clink); - spin_unlock(&mtx->wait_queue_lock); + if (atom_cmp_xchg(&mtx->lock, 1, 0) == 1) { + /* mutex was unlocked after we failed to claim it, but + * before the other thread claimed wait_queue_lock */ + spin_unlock(&mtx->wait_queue_lock); - task->state = TASK_BLOCKED; - schedule(); + critical_leave(); + } else { + this_task->state = TASK_BLOCKED; + clist_add(&mtx->wait_queue, &waiter.clink); + spin_unlock(&mtx->wait_queue_lock); + + critical_leave(); + + schedule(); + } + } else { + critical_leave(); } } -int mtx_trylock(struct mtx *mtx) +bool mtx_trylock(struct mtx *mtx) { - if (atom_cmp_xchg(&mtx->lock, 1, 0) != 1) - return -EAGAIN; - return 0; + MTX_ASSERT(!in_critical()); + + return atom_cmp_xchg(&mtx->lock, 1, 0) == 1; } void mtx_unlock(struct mtx *mtx) { + MTX_ASSERT(!in_critical()); + + critical_enter(); + if (atom_add(&mtx->lock, 1) < 0) { spin_lock(&mtx->wait_queue_lock); - struct lock_waiter *waiter = - clist_del_first_entry(&mtx->wait_queue, typeof(*waiter), clink); + if (!clist_is_empty(&mtx->wait_queue)) { + struct lock_waiter *waiter = clist_del_first_entry( + &mtx->wait_queue, + typeof(*waiter), + clink + ); + waiter->task->state = TASK_READY; + } spin_unlock(&mtx->wait_queue_lock); - waiter->task->state = TASK_READY; } + + critical_leave(); } void sem_init(struct sem *sem, int initial_count) { - atom_write(&sem->count, initial_count); + atom_init(&sem->count, initial_count); spin_init(&sem->wait_queue_lock); clist_init(&sem->wait_queue); } int sem_down(struct sem *sem) { -# ifdef DEBUG - if (in_irq()) { - kprintf("sem_down() called from IRQ context!\n"); - spin_loop { - int old = atom_sub(&sem->count, 1); - if (old >= 0) - return old; - atom_inc(&sem->count); - } - } -# endif + MTX_ASSERT(!in_critical()); - int ret = atom_sub(&sem->count, 1); + critical_enter(); + int ret = atom_sub(&sem->count, 1); if (ret < 0) { - struct task *task = current; + struct task *this_task = current; struct lock_waiter waiter = { - .task = task, + .task = this_task, }; + this_task->state = TASK_BLOCKED; spin_lock(&sem->wait_queue_lock); clist_add(&sem->wait_queue, &waiter.clink); spin_unlock(&sem->wait_queue_lock); - task->state = TASK_BLOCKED; + critical_leave(); + schedule(); ret = 0; + } else { + critical_leave(); } return ret; @@ -141,30 +156,45 @@ int sem_down(struct sem *sem) int sem_up(struct sem *sem) { - int ret = atom_add(&sem->count, 1); + MTX_ASSERT(!in_critical()); + + critical_enter(); + int ret = atom_add(&sem->count, 1); if (ret <= 0) { spin_lock(&sem->wait_queue_lock); - struct lock_waiter *waiter = - clist_del_first_entry(&sem->wait_queue, typeof(*waiter), clink); + if (!clist_is_empty(&sem->wait_queue)) { + struct lock_waiter *waiter = clist_del_first_entry( + &sem->wait_queue, + typeof(*waiter), + clink + ); + waiter->task->state = TASK_READY; + } spin_unlock(&sem->wait_queue_lock); - waiter->task->state = TASK_READY; ret = 0; } + critical_leave(); + return ret; } int sem_trydown(struct sem *sem) { - int ret = atom_sub(&sem->count, 1); + MTX_ASSERT(!in_critical()); + critical_enter(); + + int ret = atom_sub(&sem->count, 1); if (ret < 0) { atom_inc(&sem->count); - ret = -EAGAIN; + ret = -1; } + critical_leave(); + return ret; }