mutex: fix lock race conditions

main
anna 3 years ago
parent b6385aea0a
commit ebce7b8e83
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -12,27 +12,27 @@
#error "__LP64__ must be defined on amd64"
#endif
static inline void latom_init(latom_t *latom, long val)
{
latom->_value = val;
}
static inline long latom_read(const latom_t *latom)
{
return latom->_value;
}
static inline long latom_write(latom_t *latom, long val)
static inline long latom_xchg(latom_t *latom, long val)
{
long rax;
/* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */
__asm__ volatile(
" movq (%2), %0 \n" /* rax = atom->_value */
"1: lock \n"
" cmpxchgq %1, (%2) \n" /* if (latom->_value == rax) latom->_value = val */
" pause \n" /* intel says you're supposed to do this in spin loops */
" jne 1b \n" /* else goto 1 (rax updated to new latom->_value) */
: "=a"(rax)
: "r"(val), "r"(&latom->_value)
: "cc", "memory"
" xchgq %0, (%1) \n"
: "+r"(val)
: "r"(&latom->_value)
: "memory"
);
return rax;
return val;
}
static inline long latom_cmp_xchg(latom_t *latom, long compare, long val)

@ -6,11 +6,24 @@
#include <gay/cdefs.h>
#include <gay/types.h>
/**
* @brief Initialize an atom non-atomically.
* This function is **only** for initializing an atom, you should never use it
* in code that requires actual atomicity (i.e. everything except initializers).
*
* @param atom Atom to initialize
* @param val Initial value for the atom
*/
static inline void atom_init(atom_t *atom, int val)
{
atom->_value = val;
}
/**
* @brief Read an atom's current value.
* You usually shouldn't need this function because all the other atomic
* primitives return the value before the operation, and we are only really
* interested in how values *compare* between operations.
* interested in how values *compare* between operations in most cases.
* Don't use `atom_read()` followed by another atomic operation, it defeats the
* whole purpose of using atomics in the first place.
*
@ -29,22 +42,17 @@ static inline int atom_read(const atom_t *atom)
* @param val New value
* @return The value of `atom` *before* the operation
*/
static inline int atom_write(atom_t *atom, int val)
static inline int atom_xchg(atom_t *atom, int val)
{
int eax;
/* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */
__asm__ volatile(
" movl (%2), %0 \n" /* eax = atom->_value */
"1: lock \n"
" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */
" pause \n" /* intel says you're supposed to do this in spin loops */
" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */
: "=a"(eax)
: "r"(val), "r"(&atom->_value)
: "cc", "memory"
" xchgl %0, (%1) \n"
: "+r"(val)
: "r"(&atom->_value)
: "memory"
);
return eax;
return val;
}
/**
@ -164,16 +172,15 @@ static inline bool atom_dec(atom_t *atom)
*/
static inline int atom_and(atom_t *atom, int val)
{
int eax;
int eax = atom->_value;
__asm__ volatile(
" movl (%2), %0 \n" /* eax = atom->_value */
"1: andl %0, %1 \n" /* val &= eax */
" lock \n"
" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */
" pause \n" /* intel says you're supposed to do this in spin loops */
" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */
: "=a"(eax), "+r"(val)
: "+a"(eax), "+r"(val)
: "r"(&atom->_value)
: "cc", "memory"
);
@ -190,16 +197,15 @@ static inline int atom_and(atom_t *atom, int val)
*/
static inline int atom_or(atom_t *atom, int val)
{
int eax;
int eax = atom->_value;
__asm__ volatile(
" movl (%2), %0 \n" /* eax = atom->_value */
"1: orl %0, %1 \n" /* val |= eax */
" lock \n"
" cmpxchgl %1, (%2) \n" /* if (atom->_value == eax) atom->_value = val */
" pause \n" /* intel says you're supposed to do this in spin loops */
" jne 1b \n" /* else goto 1 (eax updated to new atom->_value) */
: "=a"(eax), "+r"(val)
: "+a"(eax), "+r"(val)
: "r"(&atom->_value)
: "cc", "memory"
);
@ -276,14 +282,19 @@ static inline bool atom_clr_bit(atom_t *atom, int pos)
#error "sizeof(long) is expected equal sizeof(void *) on x86"
#endif
static inline void patom_init(patom_t *patom, void *val)
{
patom->_ptr = val;
}
static inline void *patom_read(const patom_t *patom)
{
return patom->_ptr;
}
static __always_inline void *patom_write(patom_t *patom, void *val)
static __always_inline void *patom_xchg(patom_t *patom, void *val)
{
return (void *)latom_write((latom_t *)patom, (long)val);
return (void *)latom_xchg((latom_t *)patom, (long)val);
}
static __always_inline void *patom_cmp_xchg(patom_t *patom, void *compare, void *val)

@ -16,14 +16,19 @@
#error "__ILP32__ must be defined on i386"
#endif
static inline void latom_init(latom_t *latom, long val)
{
latom->_value = val;
}
static inline long latom_read(const latom_t *latom)
{
return latom->_value;
}
static __always_inline long latom_write(latom_t *latom, long val)
static __always_inline long latom_xchg(latom_t *latom, long val)
{
return atom_write((atom_t *)latom, val);
return atom_xchg((atom_t *)latom, val);
}
static __always_inline long latom_cmp_xchg(latom_t *latom, long compare, long val)

@ -20,6 +20,8 @@ set(CFG_MAX_CPU "64" CACHE STRING "Maximum number of logical processors")
option(CFG_DEBUG_IRQ "Debug IRQs" ${DEBUG})
option(CFG_DEBUG_MTX "Debug mutexes" ${DEBUG})
option(CFG_DEBUG_CLIST "Sanitize clist operations" ${DEBUG})
option(CFG_DEBUG_PAGE_ALLOCS "Debug page frame allocations" OFF)

@ -106,6 +106,30 @@
#define typeof(expr) __typeof(expr)
#endif
/*
* These are hints for clang's branch optimizer which will try to arrange the
* code to yield the best performance when a condition is true or false.
*
* - Use them sparingly and only in performance critical places because they
* come with a sometimes very significant code size overhead due to branches
* being rearranged and aligned
* - Only use them if you know *for sure* that a particular branch is *very*
* unlikely to be hit, for example when
*
* Use it sparingly and only in performance critical places because the overhead
* from rearranging and aligning the individual instructions can quickly make
* the kernel image too big.
* Also, only use it if you actually know *for sure* that a particular branch
* is *very* unlikely to be hit. Most modern CPUs have become pretty good at
* speculative execution (~~and intel even managed to make it buggy~~) and
* probably do a better job optimizing themselves at runtime than you can here.
* Finally, you don't need this when doing resource allocation failure checks,
* because those functions are annotated with __malloc_like anyway.
*/
#define __predict_true(x) __builtin_expect(x, 1)
#define __predict_false(x) __builtin_expect(x, 0)
/*
* This file is part of GayBSD.
* Copyright (c) 2021 fef <owo@fef.moe>.

@ -40,6 +40,9 @@
/** @brief Debug IRQs */
#cmakedefine01 CFG_DEBUG_IRQ
/** @brief Debug mutexes */
#cmakedefine01 CFG_DEBUG_MTX
/** @brief Sanitize clist operations */
#cmakedefine01 CFG_DEBUG_CLIST

@ -19,7 +19,7 @@ typedef struct {
void spin_init(spin_t *spin);
void spin_lock(spin_t *spin);
int spin_trylock(spin_t *spin);
bool spin_trylock(spin_t *spin);
void spin_unlock(spin_t *spin);
struct lock_waiter {
@ -27,6 +27,15 @@ struct lock_waiter {
struct task *task;
};
/**
* @brief Yielding mutual exclusion lock.
*
* Several rules apply when using a mutex:
*
* - No access from irq context or within a critical section
* - Must be unlocked from the same thread that locked it
* - No more than one consecutive unlock allowed
*/
struct mtx {
atom_t lock; /* 1 = free, 0 = locked, < 0 = locked and other threads are waiting */
spin_t wait_queue_lock;
@ -34,7 +43,7 @@ struct mtx {
};
#define MTX_DEFINE(name) { \
.lock = ATOM_DEFINE(0), \
.lock = ATOM_DEFINE(1), \
.wait_queue_lock = SPIN_DEFINE, \
.wait_queue = CLIST_DEFINE((name).wait_queue), \
}
@ -44,7 +53,7 @@ struct mtx {
void mtx_init(struct mtx *mutex);
void mtx_lock(struct mtx *mutex);
void mtx_unlock(struct mtx *mutex);
int mtx_trylock(struct mtx *mutex);
bool mtx_trylock(struct mtx *mutex);
struct sem {
atom_t count;

@ -3,6 +3,7 @@
#pragma once
#include <gay/cdefs.h>
#include <gay/irq.h>
#include <gay/sched.h>
#include <gay/types.h>
@ -30,14 +31,31 @@ void panic(const char *fmt, ...) __noreturn __printflike(1, 2);
#define KASSERT(x) do { } while (0)
#endif
/**
* @brief Enter a critical section, meaning preemption gets disabled.
* Critical sections should be kept as short as possible for performance.
* Cannot be called from irq context because irqs always run atomically.
*
* @see `critical_leave()`
*/
static inline void critical_enter(void)
{
KASSERT(!in_irq());
struct task *tsk = current;
tsk->critnest++;
}
/**
* @brief Leave a critical section.
* Cannot be called from irq context.
*
* @see `critical_enter()`
*/
static inline void critical_leave(void)
{
KASSERT(!in_irq());
struct task *tsk = current;
tsk->critnest--;
@ -46,6 +64,8 @@ static inline void critical_leave(void)
static inline bool in_critical(void)
{
if (in_irq())
return true;
return current->critnest > 0;
}

@ -4,56 +4,58 @@
#include <arch/cpufunc.h>
#include <gay/clist.h>
#include <gay/irq.h>
#include <gay/kprintf.h>
#include <gay/mutex.h>
#include <gay/sched.h>
#include <gay/systm.h>
#include <gay/util.h>
#include <errno.h>
#if CFG_DEBUG_MTX
#define MTX_ASSERT(x) KASSERT(x)
#else
#define MTX_ASSERT(x) ({})
#endif
void spin_init(spin_t *spin)
{
atom_write(&spin->lock, 0);
atom_init(&spin->lock, 0);
}
void spin_lock(spin_t *spin)
{
MTX_ASSERT(in_critical());
spin_loop {
if (atom_cmp_xchg(&spin->lock, 0, 1) == 0)
if (atom_xchg(&spin->lock, 1) == 0)
break;
}
}
int spin_trylock(spin_t *spin)
bool spin_trylock(spin_t *spin)
{
if (atom_cmp_xchg(&spin->lock, 0, 1) != 0)
return -EAGAIN;
return 0;
MTX_ASSERT(in_critical());
return atom_xchg(&spin->lock, 1) == 0;
}
void spin_unlock(spin_t *spin)
{
atom_write(&spin->lock, 0);
MTX_ASSERT(in_critical());
atom_init(&spin->lock, 0);
}
void mtx_init(struct mtx *mtx)
{
atom_write(&mtx->lock, 1);
atom_init(&mtx->lock, 1);
spin_init(&mtx->wait_queue_lock);
clist_init(&mtx->wait_queue);
}
void mtx_lock(struct mtx *mtx)
{
# ifdef DEBUG
if (in_irq()) {
kprintf("mtx_lock() called from irq context!\n");
spin_loop {
if (atom_cmp_xchg(&mtx->lock, 1, 0) == 1)
return;
}
}
# endif
MTX_ASSERT(!in_critical());
critical_enter();
/*
* When the mutex is locked, its lock value goes to 0.
@ -61,79 +63,92 @@ void mtx_lock(struct mtx *mtx)
* nonzero, meaning the lock value has become negative.
*/
if (atom_dec(&mtx->lock)) {
struct task *task = current;
/*
* It might not be the smartest idea to allocate this thing on
* the stack because it's gonna blow up if the task somehow dies
* before returning here. Let's see how this turns out.
*/
struct task *this_task = current;
struct lock_waiter waiter = {
.task = task,
.task = this_task,
};
spin_lock(&mtx->wait_queue_lock);
clist_add(&mtx->wait_queue, &waiter.clink);
spin_unlock(&mtx->wait_queue_lock);
if (atom_cmp_xchg(&mtx->lock, 1, 0) == 1) {
/* mutex was unlocked after we failed to claim it, but
* before the other thread claimed wait_queue_lock */
spin_unlock(&mtx->wait_queue_lock);
task->state = TASK_BLOCKED;
schedule();
critical_leave();
} else {
this_task->state = TASK_BLOCKED;
clist_add(&mtx->wait_queue, &waiter.clink);
spin_unlock(&mtx->wait_queue_lock);
critical_leave();
schedule();
}
} else {
critical_leave();
}
}
int mtx_trylock(struct mtx *mtx)
bool mtx_trylock(struct mtx *mtx)
{
if (atom_cmp_xchg(&mtx->lock, 1, 0) != 1)
return -EAGAIN;
return 0;
MTX_ASSERT(!in_critical());
return atom_cmp_xchg(&mtx->lock, 1, 0) == 1;
}
void mtx_unlock(struct mtx *mtx)
{
MTX_ASSERT(!in_critical());
critical_enter();
if (atom_add(&mtx->lock, 1) < 0) {
spin_lock(&mtx->wait_queue_lock);
struct lock_waiter *waiter =
clist_del_first_entry(&mtx->wait_queue, typeof(*waiter), clink);
if (!clist_is_empty(&mtx->wait_queue)) {
struct lock_waiter *waiter = clist_del_first_entry(
&mtx->wait_queue,
typeof(*waiter),
clink
);
waiter->task->state = TASK_READY;
}
spin_unlock(&mtx->wait_queue_lock);
waiter->task->state = TASK_READY;
}
critical_leave();
}
void sem_init(struct sem *sem, int initial_count)
{
atom_write(&sem->count, initial_count);
atom_init(&sem->count, initial_count);
spin_init(&sem->wait_queue_lock);
clist_init(&sem->wait_queue);
}
int sem_down(struct sem *sem)
{
# ifdef DEBUG
if (in_irq()) {
kprintf("sem_down() called from IRQ context!\n");
spin_loop {
int old = atom_sub(&sem->count, 1);
if (old >= 0)
return old;
atom_inc(&sem->count);
}
}
# endif
MTX_ASSERT(!in_critical());
int ret = atom_sub(&sem->count, 1);
critical_enter();
int ret = atom_sub(&sem->count, 1);
if (ret < 0) {
struct task *task = current;
struct task *this_task = current;
struct lock_waiter waiter = {
.task = task,
.task = this_task,
};
this_task->state = TASK_BLOCKED;
spin_lock(&sem->wait_queue_lock);
clist_add(&sem->wait_queue, &waiter.clink);
spin_unlock(&sem->wait_queue_lock);
task->state = TASK_BLOCKED;
critical_leave();
schedule();
ret = 0;
} else {
critical_leave();
}
return ret;
@ -141,30 +156,45 @@ int sem_down(struct sem *sem)
int sem_up(struct sem *sem)
{
int ret = atom_add(&sem->count, 1);
MTX_ASSERT(!in_critical());
critical_enter();
int ret = atom_add(&sem->count, 1);
if (ret <= 0) {
spin_lock(&sem->wait_queue_lock);
struct lock_waiter *waiter =
clist_del_first_entry(&sem->wait_queue, typeof(*waiter), clink);
if (!clist_is_empty(&sem->wait_queue)) {
struct lock_waiter *waiter = clist_del_first_entry(
&sem->wait_queue,
typeof(*waiter),
clink
);
waiter->task->state = TASK_READY;
}
spin_unlock(&sem->wait_queue_lock);
waiter->task->state = TASK_READY;
ret = 0;
}
critical_leave();
return ret;
}
int sem_trydown(struct sem *sem)
{
int ret = atom_sub(&sem->count, 1);
MTX_ASSERT(!in_critical());
critical_enter();
int ret = atom_sub(&sem->count, 1);
if (ret < 0) {
atom_inc(&sem->count);
ret = -EAGAIN;
ret = -1;
}
critical_leave();
return ret;
}

Loading…
Cancel
Save