/* Copyright (C) 2021,2022 fef <owo@fef.moe>.  All rights reserved. */

#pragma once
#ifndef _ARCH_ATOM_H_
#error "This file is not meant to be included directly, use <arch/atom.h>"
#endif

#include <gay/cdefs.h>
#include <gay/types.h>

#ifndef __LP64__
#error "__LP64__ must be defined on amd64"
#endif

static inline void latom_init(latom_t *latom, long val)
{
	latom->_value = val;
}

static inline long latom_read(const latom_t *latom)
{
	return latom->_value;
}

static inline long latom_xchg(latom_t *latom, long val)
{
	/* the intel manual says XCHG is always atomic, and you don't need a LOCK prefix */
	__asm__ volatile(
"	xchgq	%0, (%1)	\n"
	: "+r"(val)
	: "r"(&latom->_value)
	: "memory"
	);

	return val;
}

static inline long latom_cmp_xchg(latom_t *latom, long compare, long val)
{
	long rax = compare;

	__asm__ volatile(
	X86_LOCK_PREFIX
"	cmpxchgq %1,	(%2)	\n" /* if ((rax = latom->_value) == compare) latom->_value = val */
	: "+a"(rax)
	: "r"(val), "r"(&latom->_value)
	: "cc", "memory"
	);

	return rax;
}

/**
 * @brief Perform an atomic load/add/store.
 *
 * @param atom Atom to add to
 * @param val Value to add
 * @return The value of `atom` *before* the operation
 */
static inline long latom_add(latom_t *latom, long val)
{
	__asm__ volatile(
	X86_LOCK_PREFIX
"	xaddq	%0, (%1)	\n"
	: "+r"(val)
	: "r"(&latom->_value)
	: "cc", "memory"
	);

	return val;
}

static inline long latom_sub(latom_t *latom, long val)
{
	return latom_add(latom, -val);
}

static inline bool latom_inc(latom_t *latom)
{
	bool nonzero = false;

	__asm__ volatile(
	X86_LOCK_PREFIX
"	incq	(%1)	\n"
"	setne	%0	\n"
	: "+r"(nonzero) /* read+write to ensure the initial value isn't optimized out */
	: "r"(&latom->_value)
	: "cc", "memory"
	);

	return nonzero;
}

static inline bool latom_dec(latom_t *latom)
{
	bool nonzero = false;

	__asm__ volatile(
	X86_LOCK_PREFIX
"	decq	(%1)	\n"
"	setne	%0	\n"
	: "+r"(nonzero) /* read+write to ensure the initializer isn't optimized out */
	: "r"(&latom->_value)
	: "cc", "memory"
	);

	return nonzero;
}

static inline long latom_and(latom_t *latom, long val)
{
	long rax;

	__asm__ volatile(
"	movq	(%2), %0	\n" /* rax = latom->_value */
"1:	andq	%0, %1		\n" /* val &= rax */
	X86_LOCK_PREFIX
"	cmpxchgq %1, (%2)	\n" /* if (latom->_value == rax) latom->_value = val */
"	pause			\n" /* intel says you're supposed to do this in spin loops */
"	jne	1b		\n" /* else goto 1 (rax updated to new latom->_value) */
	: "=a"(rax), "+r"(val)
	: "r"(&latom->_value)
	: "cc", "memory"
	);

	return rax;
}

static inline long latom_or(latom_t *latom, long val)
{
	long rax;

	__asm__ volatile(
"	movq	(%2),	%0	\n" /* rax = latom->_value */
"1:	orq	%0,	%1	\n" /* val |= rax */
	X86_LOCK_PREFIX
"	cmpxchgq %1,	(%2)	\n" /* if (latom->_value == rax) latom->_value = val */
"	pause			\n" /* intel says you're supposed to do this in spin loops */
"	jne	1b		\n" /* else goto 1 (rax updated to new latom->_value) */
	: "=a"(rax), "+r"(val)
	: "r"(&latom->_value)
	: "cc", "memory"
	);

	return rax;
}

static inline long latom_xor(latom_t *latom, long val)
{
	long rax;

	__asm__ volatile(
"	movq	(%2),	%0	\n" /* rax = latom->_value */
"1:	xorq	%0,	%1	\n" /* val ^= rax */
	X86_LOCK_PREFIX
"	cmpxchgq %1,	(%2)	\n" /* if (latom->_value == rax) latom->_value = val */
"	pause			\n" /* intel says you're supposed to do this in spin loops */
"	jne	1b		\n" /* else goto 1 (rax updated to new latom->_value) */
	: "=a"(rax), "+r"(val)
	: "r"(&latom->_value)
	: "cc", "memory"
	);

	return rax;
}

static inline bool latom_set_bit(latom_t *latom, int pos)
{
	bool ret = false;

	__asm__ volatile(
	X86_LOCK_PREFIX
"	btsq	%1, (%2)	\n"
"	setc	%0		\n"
	: "+r"(ret)
	: "r"(pos), "r"(&latom->_value)
	: "cc", "memory"
	);

	return ret;
}

static inline bool latom_clr_bit(latom_t *latom, int pos)
{
	bool ret = false;

	__asm__ volatile(
	X86_LOCK_PREFIX
"	btrq	%1, (%2)	\n"
"	setc	%b0		\n"
	: "+r"(ret)
	: "r"(pos), "r"(&latom->_value)
	: "cc", "memory"
	);

	return ret;
}

static inline bool latom_flip_bit(latom_t *latom, int pos)
{
	bool ret = false;

	__asm__ volatile(
	X86_LOCK_PREFIX
"	btcq	%q1, (%2)	\n"
"	setc	%b0		\n"
	: "+r"(ret)
	: "r"(pos), "r"(&latom->_value)
	: "cc", "memory"
	);

	return ret;
}