mutex: fix lock race conditions

main
anna 3 years ago
parent b6385aea0a
commit ebce7b8e83
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -12,27 +12,27 @@
#error "__LP64__ must be defined on amd64" #error "__LP64__ must be defined on amd64"
#endif #endif
static inline void latom_init(latom_t *latom, long val)
{
latom->_value = val;
}
static inline long latom_read(const latom_t *latom) static inline long latom_read(const latom_t *latom)
{ {
return latom->_value; return latom->_value;
} }
static inline long latom_write(latom_t *latom, long val) static inline long latom_xchg(latom_t *latom, long val)
{ {
long rax; /* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */
__asm__ volatile( __asm__ volatile(
" movq (%2), %0 \n" /* rax = atom->_value */ " xchgq %0, (%1) \n"
"1: lock \n" : "+r"(val)
" cmpxchgq %1, (%2) \n" /* if (latom->_value == rax) latom->_value = val */ : "r"(&latom->_value)
" pause \n" /* intel says you're supposed to do this in spin loops */ : "memory"
" jne 1b \n" /* else goto 1 (rax updated to new latom->_value) */
: "=a"(rax)
: "r"(val), "r"(&latom->_value)
: "cc", "memory"
); );
return rax; return val;
} }
static inline long latom_cmp_xchg(latom_t *latom, long compare, long val) static inline long latom_cmp_xchg(latom_t *latom, long compare, long val)

@ -6,11 +6,24 @@
#include <gay/cdefs.h> #include <gay/cdefs.h>
#include <gay/types.h> #include <gay/types.h>
/**
* @brief Initialize an atom non-atomically.
* This function is **only** for initializing an atom, you should never use it
* in code that requires actual atomicity (i.e. everything except initializers).
*
* @param atom Atom to initialize
* @param val Initial value for the atom
*/
static inline void atom_init(atom_t *atom, int val)
{
atom->_value = val;
}
/** /**
* @brief Read an atom's current value. * @brief Read an atom's current value.
* You usually shouldn't need this function because all the other atomic * You usually shouldn't need this function because all the other atomic
* primitives return the value before the operation, and we are only really * primitives return the value before the operation, and we are only really
* interested in how values *compare* between operations. * interested in how values *compare* between operations in most cases.
* Don't use `atom_read()` followed by another atomic operation, it defeats the * Don't use `atom_read()` followed by another atomic operation, it defeats the
* whole purpose of using atomics in the first place. * whole purpose of using atomics in the first place.
* *
@ -29,22 +42,17 @@ static inline int atom_read(const atom_t *atom)
* @param val New value * @param val New value
* @return The value of `atom` *before* the operation * @return The value of `atom` *before* the operation
*/ */
static inline int atom_write(atom_t *atom, int val) static inline int atom_xchg(atom_t *atom, int val)
{ {
int eax; /* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */
__asm__ volatile( __asm__ volatile(
" movl (%2), %0 \n" /* eax = atom->_value */ " xchgl %0, (%1) \n"
"1: lock \n" : "+r"(val)
" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */ : "r"(&atom->_value)
" pause \n" /* intel says you're supposed to do this in spin loops */ : "memory"
" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */
: "=a"(eax)
: "r"(val), "r"(&atom->_value)
: "cc", "memory"
); );
return eax; return val;
} }
/** /**
@ -164,16 +172,15 @@ static inline bool atom_dec(atom_t *atom)
*/ */
static inline int atom_and(atom_t *atom, int val) static inline int atom_and(atom_t *atom, int val)
{ {
int eax; int eax = atom->_value;
__asm__ volatile( __asm__ volatile(
" movl (%2), %0 \n" /* eax = atom->_value */
"1: andl %0, %1 \n" /* val &= eax */ "1: andl %0, %1 \n" /* val &= eax */
" lock \n" " lock \n"
" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */ " cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */
" pause \n" /* intel says you're supposed to do this in spin loops */ " pause \n" /* intel says you're supposed to do this in spin loops */
" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */ " jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */
: "=a"(eax), "+r"(val) : "+a"(eax), "+r"(val)
: "r"(&atom->_value) : "r"(&atom->_value)
: "cc", "memory" : "cc", "memory"
); );
@ -190,16 +197,15 @@ static inline int atom_and(atom_t *atom, int val)
*/ */
static inline int atom_or(atom_t *atom, int val) static inline int atom_or(atom_t *atom, int val)
{ {
int eax; int eax = atom->_value;
__asm__ volatile( __asm__ volatile(
" movl (%2), %0 \n" /* eax = atom->_value */
"1: orl %0, %1 \n" /* val |= eax */ "1: orl %0, %1 \n" /* val |= eax */
" lock \n" " lock \n"
" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */ " cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */
" pause \n" /* intel says you're supposed to do this in spin loops */ " pause \n" /* intel says you're supposed to do this in spin loops */
" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */ " jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */
: "=a"(eax), "+r"(val) : "+a"(eax), "+r"(val)
: "r"(&atom->_value) : "r"(&atom->_value)
: "cc", "memory" : "cc", "memory"
); );
@ -276,14 +282,19 @@ static inline bool atom_clr_bit(atom_t *atom, int pos)
#error "sizeof(long) is expected equal sizeof(void *) on x86" #error "sizeof(long) is expected equal sizeof(void *) on x86"
#endif #endif
static inline void patom_init(patom_t *patom, void *val)
{
patom->_ptr = val;
}
static inline void *patom_read(const patom_t *patom) static inline void *patom_read(const patom_t *patom)
{ {
return patom->_ptr; return patom->_ptr;
} }
static __always_inline void *patom_write(patom_t *patom, void *val) static __always_inline void *patom_xchg(patom_t *patom, void *val)
{ {
return (void *)latom_write((latom_t *)patom, (long)val); return (void *)latom_xchg((latom_t *)patom, (long)val);
} }
static __always_inline void *patom_cmp_xchg(patom_t *patom, void *compare, void *val) static __always_inline void *patom_cmp_xchg(patom_t *patom, void *compare, void *val)

@ -16,14 +16,19 @@
#error "__ILP32__ must be defined on i386" #error "__ILP32__ must be defined on i386"
#endif #endif
static inline void latom_init(latom_t *latom, long val)
{
latom->_value = val;
}
static inline long latom_read(const latom_t *latom) static inline long latom_read(const latom_t *latom)
{ {
return latom->_value; return latom->_value;
} }
static __always_inline long latom_write(latom_t *latom, long val) static __always_inline long latom_xchg(latom_t *latom, long val)
{ {
return atom_write((atom_t *)latom, val); return atom_xchg((atom_t *)latom, val);
} }
static __always_inline long latom_cmp_xchg(latom_t *latom, long compare, long val) static __always_inline long latom_cmp_xchg(latom_t *latom, long compare, long val)

@ -20,6 +20,8 @@ set(CFG_MAX_CPU "64" CACHE STRING "Maximum number of logical processors")
option(CFG_DEBUG_IRQ "Debug IRQs" ${DEBUG}) option(CFG_DEBUG_IRQ "Debug IRQs" ${DEBUG})
option(CFG_DEBUG_MTX "Debug mutexes" ${DEBUG})
option(CFG_DEBUG_CLIST "Sanitize clist operations" ${DEBUG}) option(CFG_DEBUG_CLIST "Sanitize clist operations" ${DEBUG})
option(CFG_DEBUG_PAGE_ALLOCS "Debug page frame allocations" OFF) option(CFG_DEBUG_PAGE_ALLOCS "Debug page frame allocations" OFF)

@ -106,6 +106,30 @@
#define typeof(expr) __typeof(expr) #define typeof(expr) __typeof(expr)
#endif #endif
/*
* These are hints for clang's branch optimizer which will try to arrange the
* code to yield the best performance when a condition is true or false.
*
* - Use them sparingly and only in performance critical places because they
* come with a sometimes very significant code size overhead due to branches
* being rearranged and aligned
* - Only use them if you know *for sure* that a particular branch is *very*
* unlikely to be hit, for example when
*
* Use it sparingly and only in performance critical places because the overhead
* from rearranging and aligning the individual instructions can quickly make
* the kernel image too big.
* Also, only use it if you actually know *for sure* that a particular branch
* is *very* unlikely to be hit. Most modern CPUs have become pretty good at
* speculative execution (~~and intel even managed to make it buggy~~) and
* probably do a better job optimizing themselves at runtime than you can here.
* Finally, you don't need this when doing resource allocation failure checks,
* because those functions are annotated with __malloc_like anyway.
*/
#define __predict_true(x) __builtin_expect(x, 1)
#define __predict_false(x) __builtin_expect(x, 0)
/* /*
* This file is part of GayBSD. * This file is part of GayBSD.
* Copyright (c) 2021 fef <owo@fef.moe>. * Copyright (c) 2021 fef <owo@fef.moe>.

@ -40,6 +40,9 @@
/** @brief Debug IRQs */ /** @brief Debug IRQs */
#cmakedefine01 CFG_DEBUG_IRQ #cmakedefine01 CFG_DEBUG_IRQ
/** @brief Debug mutexes */
#cmakedefine01 CFG_DEBUG_MTX
/** @brief Sanitize clist operations */ /** @brief Sanitize clist operations */
#cmakedefine01 CFG_DEBUG_CLIST #cmakedefine01 CFG_DEBUG_CLIST

@ -19,7 +19,7 @@ typedef struct {
void spin_init(spin_t *spin); void spin_init(spin_t *spin);
void spin_lock(spin_t *spin); void spin_lock(spin_t *spin);
int spin_trylock(spin_t *spin); bool spin_trylock(spin_t *spin);
void spin_unlock(spin_t *spin); void spin_unlock(spin_t *spin);
struct lock_waiter { struct lock_waiter {
@ -27,6 +27,15 @@ struct lock_waiter {
struct task *task; struct task *task;
}; };
/**
* @brief Yielding mutual exclusion lock.
*
* Several rules apply when using a mutex:
*
* - No access from irq context or within a critical section
* - Must be unlocked from the same thread that locked it
* - No more than one consecutive unlock allowed
*/
struct mtx { struct mtx {
atom_t lock; /* 1 = free, 0 = locked, < 0 = locked and other threads are waiting */ atom_t lock; /* 1 = free, 0 = locked, < 0 = locked and other threads are waiting */
spin_t wait_queue_lock; spin_t wait_queue_lock;
@ -34,7 +43,7 @@ struct mtx {
}; };
#define MTX_DEFINE(name) { \ #define MTX_DEFINE(name) { \
.lock = ATOM_DEFINE(0), \ .lock = ATOM_DEFINE(1), \
.wait_queue_lock = SPIN_DEFINE, \ .wait_queue_lock = SPIN_DEFINE, \
.wait_queue = CLIST_DEFINE((name).wait_queue), \ .wait_queue = CLIST_DEFINE((name).wait_queue), \
} }
@ -44,7 +53,7 @@ struct mtx {
void mtx_init(struct mtx *mutex); void mtx_init(struct mtx *mutex);
void mtx_lock(struct mtx *mutex); void mtx_lock(struct mtx *mutex);
void mtx_unlock(struct mtx *mutex); void mtx_unlock(struct mtx *mutex);
int mtx_trylock(struct mtx *mutex); bool mtx_trylock(struct mtx *mutex);
struct sem { struct sem {
atom_t count; atom_t count;

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <gay/cdefs.h> #include <gay/cdefs.h>
#include <gay/irq.h>
#include <gay/sched.h> #include <gay/sched.h>
#include <gay/types.h> #include <gay/types.h>
@ -30,14 +31,31 @@ void panic(const char *fmt, ...) __noreturn __printflike(1, 2);
#define KASSERT(x) do { } while (0) #define KASSERT(x) do { } while (0)
#endif #endif
/**
* @brief Enter a critical section, meaning preemption gets disabled.
* Critical sections should be kept as short as possible for performance.
* Cannot be called from irq context because irqs always run atomically.
*
* @see `critical_leave()`
*/
static inline void critical_enter(void) static inline void critical_enter(void)
{ {
KASSERT(!in_irq());
struct task *tsk = current; struct task *tsk = current;
tsk->critnest++; tsk->critnest++;
} }
/**
* @brief Leave a critical section.
* Cannot be called from irq context.
*
* @see `critical_enter()`
*/
static inline void critical_leave(void) static inline void critical_leave(void)
{ {
KASSERT(!in_irq());
struct task *tsk = current; struct task *tsk = current;
tsk->critnest--; tsk->critnest--;
@ -46,6 +64,8 @@ static inline void critical_leave(void)
static inline bool in_critical(void) static inline bool in_critical(void)
{ {
if (in_irq())
return true;
return current->critnest > 0; return current->critnest > 0;
} }

@ -4,56 +4,58 @@
#include <arch/cpufunc.h> #include <arch/cpufunc.h>
#include <gay/clist.h> #include <gay/clist.h>
#include <gay/irq.h>
#include <gay/kprintf.h>
#include <gay/mutex.h> #include <gay/mutex.h>
#include <gay/sched.h> #include <gay/sched.h>
#include <gay/systm.h>
#include <gay/util.h>
#include <errno.h> #if CFG_DEBUG_MTX
#define MTX_ASSERT(x) KASSERT(x)
#else
#define MTX_ASSERT(x) ({})
#endif
void spin_init(spin_t *spin) void spin_init(spin_t *spin)
{ {
atom_write(&spin->lock, 0); atom_init(&spin->lock, 0);
} }
void spin_lock(spin_t *spin) void spin_lock(spin_t *spin)
{ {
MTX_ASSERT(in_critical());
spin_loop { spin_loop {
if (atom_cmp_xchg(&spin->lock, 0, 1) == 0) if (atom_xchg(&spin->lock, 1) == 0)
break; break;
} }
} }
int spin_trylock(spin_t *spin) bool spin_trylock(spin_t *spin)
{ {
if (atom_cmp_xchg(&spin->lock, 0, 1) != 0) MTX_ASSERT(in_critical());
return -EAGAIN;
return 0; return atom_xchg(&spin->lock, 1) == 0;
} }
void spin_unlock(spin_t *spin) void spin_unlock(spin_t *spin)
{ {
atom_write(&spin->lock, 0); MTX_ASSERT(in_critical());
atom_init(&spin->lock, 0);
} }
void mtx_init(struct mtx *mtx) void mtx_init(struct mtx *mtx)
{ {
atom_write(&mtx->lock, 1); atom_init(&mtx->lock, 1);
spin_init(&mtx->wait_queue_lock); spin_init(&mtx->wait_queue_lock);
clist_init(&mtx->wait_queue); clist_init(&mtx->wait_queue);
} }
void mtx_lock(struct mtx *mtx) void mtx_lock(struct mtx *mtx)
{ {
# ifdef DEBUG MTX_ASSERT(!in_critical());
if (in_irq()) {
kprintf("mtx_lock() called from irq context!\n"); critical_enter();
spin_loop {
if (atom_cmp_xchg(&mtx->lock, 1, 0) == 1)
return;
}
}
# endif
/* /*
* When the mutex is locked, its lock value goes to 0. * When the mutex is locked, its lock value goes to 0.
@ -61,79 +63,92 @@ void mtx_lock(struct mtx *mtx)
* nonzero, meaning the lock value has become negative. * nonzero, meaning the lock value has become negative.
*/ */
if (atom_dec(&mtx->lock)) { if (atom_dec(&mtx->lock)) {
struct task *task = current; struct task *this_task = current;
/*
* It might not be the smartest idea to allocate this thing on
* the stack because it's gonna blow up if the task somehow dies
* before returning here. Let's see how this turns out.
*/
struct lock_waiter waiter = { struct lock_waiter waiter = {
.task = task, .task = this_task,
}; };
spin_lock(&mtx->wait_queue_lock); spin_lock(&mtx->wait_queue_lock);
clist_add(&mtx->wait_queue, &waiter.clink); if (atom_cmp_xchg(&mtx->lock, 1, 0) == 1) {
spin_unlock(&mtx->wait_queue_lock); /* mutex was unlocked after we failed to claim it, but
* before the other thread claimed wait_queue_lock */
spin_unlock(&mtx->wait_queue_lock);
task->state = TASK_BLOCKED; critical_leave();
schedule(); } else {
this_task->state = TASK_BLOCKED;
clist_add(&mtx->wait_queue, &waiter.clink);
spin_unlock(&mtx->wait_queue_lock);
critical_leave();
schedule();
}
} else {
critical_leave();
} }
} }
int mtx_trylock(struct mtx *mtx) bool mtx_trylock(struct mtx *mtx)
{ {
if (atom_cmp_xchg(&mtx->lock, 1, 0) != 1) MTX_ASSERT(!in_critical());
return -EAGAIN;
return 0; return atom_cmp_xchg(&mtx->lock, 1, 0) == 1;
} }
void mtx_unlock(struct mtx *mtx) void mtx_unlock(struct mtx *mtx)
{ {
MTX_ASSERT(!in_critical());
critical_enter();
if (atom_add(&mtx->lock, 1) < 0) { if (atom_add(&mtx->lock, 1) < 0) {
spin_lock(&mtx->wait_queue_lock); spin_lock(&mtx->wait_queue_lock);
struct lock_waiter *waiter = if (!clist_is_empty(&mtx->wait_queue)) {
clist_del_first_entry(&mtx->wait_queue, typeof(*waiter), clink); struct lock_waiter *waiter = clist_del_first_entry(
&mtx->wait_queue,
typeof(*waiter),
clink
);
waiter->task->state = TASK_READY;
}
spin_unlock(&mtx->wait_queue_lock); spin_unlock(&mtx->wait_queue_lock);
waiter->task->state = TASK_READY;
} }
critical_leave();
} }
void sem_init(struct sem *sem, int initial_count) void sem_init(struct sem *sem, int initial_count)
{ {
atom_write(&sem->count, initial_count); atom_init(&sem->count, initial_count);
spin_init(&sem->wait_queue_lock); spin_init(&sem->wait_queue_lock);
clist_init(&sem->wait_queue); clist_init(&sem->wait_queue);
} }
int sem_down(struct sem *sem) int sem_down(struct sem *sem)
{ {
# ifdef DEBUG MTX_ASSERT(!in_critical());
if (in_irq()) {
kprintf("sem_down() called from IRQ context!\n");
spin_loop {
int old = atom_sub(&sem->count, 1);
if (old >= 0)
return old;
atom_inc(&sem->count);
}
}
# endif
int ret = atom_sub(&sem->count, 1); critical_enter();
int ret = atom_sub(&sem->count, 1);
if (ret < 0) { if (ret < 0) {
struct task *task = current; struct task *this_task = current;
struct lock_waiter waiter = { struct lock_waiter waiter = {
.task = task, .task = this_task,
}; };
this_task->state = TASK_BLOCKED;
spin_lock(&sem->wait_queue_lock); spin_lock(&sem->wait_queue_lock);
clist_add(&sem->wait_queue, &waiter.clink); clist_add(&sem->wait_queue, &waiter.clink);
spin_unlock(&sem->wait_queue_lock); spin_unlock(&sem->wait_queue_lock);
task->state = TASK_BLOCKED; critical_leave();
schedule(); schedule();
ret = 0; ret = 0;
} else {
critical_leave();
} }
return ret; return ret;
@ -141,30 +156,45 @@ int sem_down(struct sem *sem)
int sem_up(struct sem *sem) int sem_up(struct sem *sem)
{ {
int ret = atom_add(&sem->count, 1); MTX_ASSERT(!in_critical());
critical_enter();
int ret = atom_add(&sem->count, 1);
if (ret <= 0) { if (ret <= 0) {
spin_lock(&sem->wait_queue_lock); spin_lock(&sem->wait_queue_lock);
struct lock_waiter *waiter = if (!clist_is_empty(&sem->wait_queue)) {
clist_del_first_entry(&sem->wait_queue, typeof(*waiter), clink); struct lock_waiter *waiter = clist_del_first_entry(
&sem->wait_queue,
typeof(*waiter),
clink
);
waiter->task->state = TASK_READY;
}
spin_unlock(&sem->wait_queue_lock); spin_unlock(&sem->wait_queue_lock);
waiter->task->state = TASK_READY;
ret = 0; ret = 0;
} }
critical_leave();
return ret; return ret;
} }
int sem_trydown(struct sem *sem) int sem_trydown(struct sem *sem)
{ {
int ret = atom_sub(&sem->count, 1); MTX_ASSERT(!in_critical());
critical_enter();
int ret = atom_sub(&sem->count, 1);
if (ret < 0) { if (ret < 0) {
atom_inc(&sem->count); atom_inc(&sem->count);
ret = -EAGAIN; ret = -1;
} }
critical_leave();
return ret; return ret;
} }

Loading…
Cancel
Save