From 24ae60225f168aca08d5e3adcb58684057878c0b Mon Sep 17 00:00:00 2001 From: fef Date: Fri, 12 Nov 2021 06:13:10 +0100 Subject: [PATCH] amd64: add base 64-bit support files This has been brewing for quite some time now, and it still is nowhere near finished, but at least it compiles now. A lot has changed, and it's still quite messy (i386 is almost certainly broken now, i haven't even checked) --- arch/x86/boot/setup64.S | 346 +++++++++++++++++++++++++++++ arch/x86/config/kernel64.ld | 82 +++++++ arch/x86/config/toolchain.cmake | 4 +- arch/x86/include/amd64/interrupt.h | 44 ++++ arch/x86/include/amd64/latom.h | 4 +- arch/x86/include/amd64/page.h | 114 ++++++++++ arch/x86/include/amd64/sched.h | 51 +++++ arch/x86/include/amd64/trap.h | 46 ++++ arch/x86/include/amd64/vmparam.h | 38 ++++ arch/x86/include/arch/interrupt.h | 2 +- arch/x86/include/arch/sched.h | 7 - arch/x86/include/arch/segment.h | 17 +- arch/x86/include/i386/interrupt.h | 2 +- arch/x86/include/i386/sched.h | 7 + arch/x86/mm/amd64/page.c | 153 +++++++++++++ arch/x86/mm/{ => i386}/page.c | 0 arch/x86/mm/segment.S | 88 +++++--- arch/x86/sys/amd64/idt.S | 39 ++++ arch/x86/sys/amd64/irq.S | 105 +++++++++ arch/x86/sys/amd64/switch.S | 67 ++++++ arch/x86/sys/amd64/systm.c | 34 +++ arch/x86/sys/amd64/trap.S | 142 ++++++++++++ arch/x86/sys/i386/systm.c | 2 +- arch/x86/sys/interrupt.c | 2 +- cmake/config-x86.cmake | 2 +- cmake/config.cmake | 2 +- doc/amd64/memory.md | 50 +++++ include/gay/mm.h | 2 + 28 files changed, 1389 insertions(+), 63 deletions(-) create mode 100644 arch/x86/boot/setup64.S create mode 100644 arch/x86/config/kernel64.ld create mode 100644 arch/x86/include/amd64/interrupt.h create mode 100644 arch/x86/include/amd64/page.h create mode 100644 arch/x86/include/amd64/sched.h create mode 100644 arch/x86/include/amd64/trap.h create mode 100644 arch/x86/include/amd64/vmparam.h create mode 100644 arch/x86/mm/amd64/page.c rename arch/x86/mm/{ => i386}/page.c (100%) create mode 100644 arch/x86/sys/amd64/idt.S create mode 100644 arch/x86/sys/amd64/irq.S create mode 100644 arch/x86/sys/amd64/switch.S create mode 100644 arch/x86/sys/amd64/systm.c create mode 100644 arch/x86/sys/amd64/trap.S create mode 100644 doc/amd64/memory.md diff --git a/arch/x86/boot/setup64.S b/arch/x86/boot/setup64.S new file mode 100644 index 0000000..bafa047 --- /dev/null +++ b/arch/x86/boot/setup64.S @@ -0,0 +1,346 @@ +/* See the end of this file for copyright and license terms. */ + +#include + +#include +#include +#include + +/* + * Early boot sequence on amd64. + * + * This is loosely based on the 32-bit example code from the OSDev.org wiki: + * + * mixed with some of the Long Mode tutorial code: + * + * + * When entering from the bootloader, we are still in 32-bit protected mode, + * meaning we have to enable long mode ourselves. + * + * Our basic setup procedure is: + * - set up the stack (unlike on i386, where we do it only as the last step) + * - perform some sanity checks, like whether the CPU actually supports 64 bits + * - load the GDT at its *physical* address and use its 32-bit segments + * - populate the page maps and %cr3 with the 4-level 64-bit structures: + * + the lowest 1GB (including the entire kernel image) is identity mapped + * + the same mapping is mirrored to KERNBASE (0xffffffff80000000) + * + the page tables are mapped recursively at X86_PMAP_OFFSET + * - enable IA-32e mode by setting IA32_EFER.LME = 1 (see Intel SDM vol 4, tbl 2-2) + * - enable paging and jump to the higher half mapping + * - update rsp and rbp to also point to the high address + * - reload the GDT at its higher half address + * - discard the identity mapping of low memory + * - call _boot() + */ + + .code32 /* we enter in 32-bit protected mode */ + + /* from kernel64.ld */ + .extern _image_start_phys + .extern _image_end_phys + + .extern _boot /* main boot routine -- see ./boot.c */ + + /* initial page maps -- see ../mm/amd64/page.c */ + .extern _pml4 + .extern _pdp0 + + /* GDT stuff -- see ../mm/segment.S */ + .extern _x86_gdt_desc + .extern _x86_gdt_desc_phys + .extern _x86_kern_tss + .extern _x86_user_tss + + .section .multiboot.text, "ax", @progbits + +/* C code is linked against high memory, this gets the physical address */ +#define PADDR(c_symbol) (c_symbol - KERNBASE) + +/* + * miscellaneous utility routines + */ + +/* void fix_tss_base(u64 *gdt_entry, struct x86_tss *tss) */ +L_ENTRY(fix_tss_base) + movl 4(%esp), %edi + movl 8(%esp), %eax + movw %ax, 2(%edi) + shrl $16, %eax + movb %al, 4(%edi) + movb %ah, 7(%edi) + ret +L_END(fix_tss_base) + +/* void err_and_hlt(const char *message) */ +L_ENTRY(err_and_hlt) + mov $0x000b8000, %edx + mov 4(%esp), %ecx + movb $0x4f, %ah /* BIOS color code: white text on red background */ + +1: movb (%ecx), %al + testb %al, %al + jz 2f + movw %ax, (%edx) + addl $2, %edx + incl %ecx + jmp 1b + +2: cli /* interrupts should already be off, but it won't hurt */ + hlt + jmp 2b +L_END(err_and_hlt) + +/* + * actual setup routine + */ + +ENTRY(_setup) + /* + * set up the stack + */ + movl $PADDR(stack_top), %esp + pushl $0 /* CPU number -- see include/arch/smp.h */ + pushl $0 /* two longwords to keep the stack 8 byte aligned */ + movl %esp, %ebp + + push %ebx /* temporarily stash multiboot tag address away */ + + /* + * the bootloader should have loaded the multiboot magic + * value into eax, check if that is really the case + */ + cmp $MB2_BOOTLOADER_MAGIC, %eax + je 1f + pushl $errmsg_no_multiboot + jmp err_and_hlt + + /* + * check if the CPU supports the CPUID instruction + * this is done by checking whether we can flip bit 21 in EFLAGS (??) + */ +1: pushf + pop %eax + movl %eax, %ecx + xorl $(1 << 21), %eax + push %eax + popf + pushf + pop %eax + push %ecx /* restore original flags */ + popf + cmp %eax, %ecx + jne 2f + pushl $errmsg_no_cpuid + jmp err_and_hlt + + /* + * check if the CPU supports extended CPUID addresses + */ +2: mov $0x80000000, %eax + cpuid + cmp $0x80000001, %eax + jae 3f + pushl $errmsg_no_ext_cpuid + jmp err_and_hlt + + /* + * check if the CPU supports IA-32e mode + */ +3: mov $0x80000001, %eax + cpuid + test $(1 << 29), %edx + jnz 4f + pushl $errmsg_no_ia32e + jmp err_and_hlt + + /* + * load the base values kernel and user TSS into the corresponding GDT entries + */ +4: pushl $PADDR(_x86_kern_tss) + pushl $PADDR(_x86_gdt + X86_KERN_TSS) + call fix_tss_base + pushl $PADDR(_x86_user_tss) + pushl $PADDR(_x86_gdt + X86_USER_TSS) + call fix_tss_base + addl $16, %esp + + /* + * load our own GDT (at its physical address) and its 32-bit segments + */ + lgdt _x86_gdt_desc_phys + ljmp $(X86_32_KERN_CS), $1f +1: movl $(X86_KERN_DS), %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + movw %ax, %ss + + /* XXX do we really need to load a TSS? */ + movl $X86_KERN_TSS, %eax + ltr %ax + +#if (KERNBASE % (1 << X86_PDP_SHIFT)) != 0 +#error "KERNBASE must be aligned to at least a PDP entry (1 GB)" +#endif +#if (X86_PMAP_OFFSET % (1 << X86_PML4_SHIFT)) != 0 +#error "X86_PMAP_OFFSET must be aligned to at least a PML4 entry (512 GB)" +#endif + +#define V48 0xffff000000000000 +#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDP_SHIFT) % 512 ) * 8) +#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4_SHIFT) * 8 ) + + /* + * statically map the low 2 GB to itself and to the high kernel half + */ + /* for the identity mapping */ + movl $0x00000083, PADDR(_pdp0) /* present (0), write (1), huge (7) */ + movl $0x40000083, PADDR(_pdp0 + 8) + /* for the -2GB at the end of virtual memory (we use the same PDP for both) */ + movl $0x00000083, PADDR(_pdp0 + PDP_OFFSET(KERNBASE)) + movl $0x40000083, PADDR(_pdp0 + PDP_OFFSET(KERNBASE) + 8) + + movl $PADDR(_pdp0 + 0x003), PADDR(_pml4) /* present (0), write (1), huge (7) */ + movl $PADDR(_pdp0 + 0x003), PADDR(_pml4 + PML4_OFFSET(KERNBASE)) + + /* map the PML4 to itself */ + movl $PADDR(_pml4 + 0x003), PADDR(_pml4 + PML4_OFFSET(X86_PMAP_OFFSET)) + movb $0x80, PADDR(_pml4 + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */ + + /* + * ensure paging is disabled by clearing CR0.PG (bit 32) + */ + movl %cr0, %eax + andl $0x7fffffff, %eax + movl %eax, %cr0 + + /* + * enable: + * CR4.PSE (Page Size Extensions, bit 4) + * CR4.PAE (Physical Address Extension, bit 5) + * CR4.PGE (Page Global Enable, bit 7) + */ + movl %cr4, %eax + orl $0x000000b0, %eax + movl %eax, %cr4 + + /* load cr3 with the PML4 */ + movl $PADDR(_pml4), %eax + movl %eax, %cr3 + + /* + * enable IA-32e by setting IA32_EFER.LME (bit 8) + * (and also set No Execute Enable (bit 11) while we're at it) + */ + movl $0xc0000080, %ecx + rdmsr + orl $0x00000900, %eax + wrmsr + + /* + * enable: + * CR0.PG (Paging, bit31) + * CR0.WP (Write Protect, bit 16) + */ + movl %cr0, %eax + orl $0x80010000, %eax + movl %eax, %cr0 + + /* remember when we pushed the multiboot tag address stored in ebx + * like, 100 lines ago? Yeah we restore that now, and put it into + * the right register to pass it as the first parameter to _boot(). */ + pop %edi + + /* we can't jump to the high half of 64-bit memory directly since this is + * still a 32-bit instruction, so we need to add an intermediate step to + * a trampoline which can make the actual 64-bit far jump to high memory */ + ljmpl $X86_64_KERN_CS, $trampoline +END(_setup) + +/* + * native 64-bit code starts here + */ + + .code64 + +L_ENTRY(trampoline) + movabsq $_setup_highmem, %rcx + jmpq *%rcx +L_END(trampoline) + + .text + +ASM_ENTRY(_setup_highmem) + /* + * update all pointers to virtual address space + */ + movabsq $KERNBASE, %rax + addq %rax, %rsp + addq %rax, %rbp + addq %rax, %rdi /* multiboot tag */ + + /* + * reload the GDT, this time with the virtual mapping + */ + lgdt _x86_gdt_desc + /* data segments are ignored in 64-bit mode, load the null descriptor */ +1: xor %eax, %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + movw %ax, %ss + + /* reset RFLAGS */ + pushq $0 + popfq + + /* remove the low memory identity mapping and bonk the TLB */ + movl $0, _pdp0 + movl $0, _pdp0 + 8 + movl $0, _pml4 + movq %cr3, %rax + movq %rax, %cr3 + + callq _boot + + /* this should Never Be Reached(TM) */ + cli +2: hlt + jmp 2b +ASM_END(_setup_highmem) + + .section .multiboot.data, "a", @progbits + +L_DATA(errmsg_no_multiboot) + .asciz "Invalid Multiboot 2 magic number in %eax" +L_END(errmsg_no_multiboot) +L_DATA(errmsg_no_cpuid) + .asciz "CPUID instruction not supported" +L_END(errmsg_no_cpuid) +L_DATA(errmsg_no_ext_cpuid) + .asciz "No extended CPUID features available" +L_END(errmsg_no_ext_cpuid) +L_DATA(errmsg_no_ia32e) + .asciz "CPU does not appear to support IA-32e mode" +L_END(errmsg_no_ia32e) + + .section .bootstrap_stack, "aw", @nobits +stack_bottom: + .skip 16384 /* 16 K for the stack should be plenty for now */ +stack_top: + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/config/kernel64.ld b/arch/x86/config/kernel64.ld new file mode 100644 index 0000000..e7fe43e --- /dev/null +++ b/arch/x86/config/kernel64.ld @@ -0,0 +1,82 @@ +/* See the end of this file for copyright and license terms. */ + +OUTPUT_FORMAT("elf64-x86-64") + +/* not strictly needed because we produce a binary image but can't hurt */ +ENTRY(_start) + +KERNBASE = 0xffffffff80000000; +PAGE_SIZE = 4K; + +SECTIONS { + . = CFG_KERN_ORIGIN; + + _image_start = . + KERNBASE; + _image_start_phys = .; + _kernel_start = . + KERNBASE; + _kernel_start_phys = .; + + .multiboot.data : { + . = ALIGN(8); + KEEP(*(.multiboot.data)) + } + + .multiboot.text : { + . = ALIGN(8); + KEEP(*(.multiboot.text)) + } + + . += KERNBASE; + + /* + * All sections from here on are page aligned so we can + * set different access permissions for each of them + */ + + .text ALIGN(PAGE_SIZE) : AT(ADDR(.text) - KERNBASE) { + _text_start = .; + *(.text .text.* .gnu.linkonce.t.*) + _text_end = .; + } + + .rodata ALIGN(PAGE_SIZE) : AT(ADDR(.rodata) - KERNBASE) { + _rodata_start = .; + *(.rodata .rodata.* .gnu.linkonce.r.*) + _rodata_end = .; + } + + .data ALIGN(PAGE_SIZE) : AT(ADDR(.data) - KERNBASE) { + _data_start = .; + *(.data .data.*) + _data_end = .; + } + + _kernel_end = .; + _kernel_end_phys = . - KERNBASE; + + .bss ALIGN(PAGE_SIZE) : AT(ADDR(.bss) - KERNBASE) { + _bss_start = .; + *(COMMON) + *(.bss) + _bss_end = .; + . = ALIGN(8); + *(.bootstrap_stack) + } + + _image_end = .; + _image_end_phys = . - KERNBASE; +} + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/config/toolchain.cmake b/arch/x86/config/toolchain.cmake index 963e67b..c1cf29a 100644 --- a/arch/x86/config/toolchain.cmake +++ b/arch/x86/config/toolchain.cmake @@ -9,7 +9,7 @@ if (CFG_X86_64) set(CMAKE_SYSTEM_PROCESSOR x86-64) set(_toolchain_common_flags "-m64 -march=x86-64") set(_toolchain_triple x86_64-pc-none-elf) - set(X86_TARGET "elf64-amd64") + set(_toolchain_bits 64) else() set(CMAKE_SYSTEM_PROCESSOR i686) set(_toolchain_common_flags "-m32 -march=i686") @@ -26,7 +26,7 @@ set(CMAKE_RANLIB ${TOOLCHAIN_PATH}/${CMAKE_EXECUTABLE_PREFIX}ranlib${CMAK set(CMAKE_SZE ${TOOLCHAIN_PATH}/${CMAKE_EXECUTABLE_PREFIX}szr${CMAKE_EXECUTABLE_SUFFIX} CACHE INTERNAL "") set(CMAKE_STRIP ${TOOLCHAIN_PATH}/${CMAKE_EXECUTABLE_PREFIX}strip${CMAKE_EXECUTABLE_SUFFIX} CACHE INTERNAL "") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_toolchain_common_flags}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_toolchain_common_flags} -mcmodel=kernel -mno-red-zone -mno-mmx -mno-sse -msoft-float") set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${_toolchain_common_flags}") set(CMAKE_C_COMPILER_TARGET ${_toolchain_triple}) set(CMAKE_ASM_COMPILER_TARGET ${_toolchain_triple}) diff --git a/arch/x86/include/amd64/interrupt.h b/arch/x86/include/amd64/interrupt.h new file mode 100644 index 0000000..8e8e30f --- /dev/null +++ b/arch/x86/include/amd64/interrupt.h @@ -0,0 +1,44 @@ +/* See the end of this file for copyright and license terms. */ + +#pragma once + +#ifndef _ARCH_INTERRUPT_H_ +#error "This file is not meant to be included directly, use " +#endif + +/* + * XXX it's probably not necessary to declare this struct __packed and add + * paddings for the cs and ss members manually, because all members *should* + * get aligned to 8 bytes automatically. I'm absolutely not in the mood for + * verifying this right now tho, so we're playing it safe. + */ + +/** + * @brief A hardware stack frame on amd64. + * I have no idea whether x86 people are actually calling it hardware stack + * frame tbh, this is just the (ARM) terminology i'm used to. Either way, + * this is what gets pushed to the stack automatically when entering an ISR. + */ +struct amd64_hw_frame { + u64 rip; + u16 cs; u16 _pad0; u32 _pad1; + u64 rflags; + + /* unlike on i386, the amd64 *always* pushes %rsp and %ss */ + u64 rsp; + u16 ss; u16 _pad2; u32 _pad3; +} __packed; + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/include/amd64/latom.h b/arch/x86/include/amd64/latom.h index cd62d83..0528a22 100644 --- a/arch/x86/include/amd64/latom.h +++ b/arch/x86/include/amd64/latom.h @@ -173,7 +173,7 @@ static inline bool latom_set_bit(latom_t *latom, int pos) " btsq %1, (%2) \n" " setc %0 \n" : "+r"(ret) - : "r"(pos), "r"(&atom->_value) + : "r"(pos), "r"(&latom->_value) : "cc", "memory" ); @@ -189,7 +189,7 @@ static inline bool latom_clr_bit(latom_t *latom, int pos) " btrq %1, (%2) \n" " setc %b0 \n" : "+r"(ret) - : "r"(pos), "r"(&atom->_value) + : "r"(pos), "r"(&latom->_value) : "cc", "memory" ); diff --git a/arch/x86/include/amd64/page.h b/arch/x86/include/amd64/page.h new file mode 100644 index 0000000..80d1844 --- /dev/null +++ b/arch/x86/include/amd64/page.h @@ -0,0 +1,114 @@ +/* See the end of this file for copyright and license terms. */ + +#pragma once +#ifndef _ARCH_PAGE_H_ +#error "This file is not meant to be included directly, use " +#endif + +/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */ +#define HUGEPAGE_SHIFT 21 + +#include + +#define X86_PT_SHIFT PAGE_SHIFT +#define X86_PD_SHIFT (X86_PT_SHIFT + 9) +#define X86_PDP_SHIFT (X86_PD_SHIFT + 9) +#define X86_PML4_SHIFT (X86_PDP_SHIFT + 9) + +#ifndef _ASM_SOURCE + +#include +#include + +/** + * @brief A single 64-bit Page Table Entry. + * The layout matches that of the Intel SDM, vol 3, sect 4.3, fig 4-4. + * Bits 9 and 10 (`slab` and `atomic`) are marked as AVL in the manual and + * ignored by the MMU. We only use them for `get_pflags()`/`set_pflags()`. + */ +struct x86_page_flags { +/* 0 */bool present:1; /**< Page Fault on access if 0 */ +/* 1 */bool rw:1; /**< Page Fault on write if 0 */ +/* 2 */bool user:1; /**< Page Fault on user mode access if 0 */ +/* 3 */bool write_through:1; /**< Enable write-through caching */ +/* 4 */bool cache_disabled:1; /**< Disable caching in TLB */ +/* 5 */bool accessed:1; /**< 1 if page has been accessed */ +/* 6 */bool dirty:1; /**< 1 if page has been written to */ +/* 7 */bool huge:1; /**< only valid for PDPTEs and PDEs */ +/* 8 */bool global:1; /**< Don't update the TLB on table swap if 1 */ +/* 9 */bool slab:1; /**< Used by the slab allocator */ +/* 10 */bool atomic:1; /**< Allocated atomically */ +/* 11 */unsigned _unused:1; +/* 12 */uintptr_t shifted_address:51; +/* 63 */bool noexec:1; +} __packed; + +#define __PFLAG_PRESENT (1 << 0) +#define __PFLAG_RW (1 << 1) +#define __PFLAG_USER (1 << 2) +#define __PFLAG_WRITE_THROUGH (1 << 3) +#define __PFLAG_NOCACHE (1 << 4) +#define __PFLAG_ACCESSED (1 << 5) +#define __PFLAG_DIRTY (1 << 6) +#define __PFLAG_HUGE (1 << 7) +#define __PFLAG_GLOBAL (1 << 8) +#define __PFLAG_SLAB (1 << 9) +#define __PFLAG_ATOMIC (1 << 10) +#define __PFLAG_NOEXEC (1 << 63) + +/* + * these types are deliberately not merged into one so that the + * compiler can catch accidental assignments to the wrong type + */ + +#define __pmap_entry_union union { \ + struct x86_page_flags flags; \ + uintptr_t val; \ +} +typedef __pmap_entry_union x86_pte_t; +typedef __pmap_entry_union x86_pde_t; +typedef __pmap_entry_union x86_pdpe_t; +typedef __pmap_entry_union x86_pml4e_t; + +typedef struct { x86_pte_t entries[512]; } __aligned(PAGE_SIZE) x86_pt_t; +typedef struct { x86_pde_t entries[512]; } __aligned(PAGE_SIZE) x86_pd_t; +typedef struct { x86_pdpe_t entries[512]; } __aligned(PAGE_SIZE) x86_pdp_t; +typedef struct { x86_pml4e_t entries[512]; } __aligned(PAGE_SIZE) x86_pml4_t; + +#define X86_PMAP_MASK 0x7ffffffffffff000 + +/* you aren't expected to understand any of these, they're just nasty offset calculations */ + +/** @brief Get the linear 48-bit address */ +#define __V48ADDR(ptr) ((uintptr_t)(ptr) & 0x0000ffffffffffff) + +#define X86_PT_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PT_SHIFT ) % 512) +#define X86_PD_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PD_SHIFT ) % 512) +#define X86_PDP_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PDP_SHIFT ) % 512) +#define X86_PML4_INDEX(ptr) ( __V48ADDR(ptr) >> X86_PML4_SHIFT ) + +#define __PT_BASE X86_PMAP_OFFSET +#define __PD_BASE (__PT_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PT_SHIFT)) +#define __PDP_BASE (__PD_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PD_SHIFT)) +#define __PML4_BASE (__PDP_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PDP_SHIFT)) + +#define X86_PTE(ptr) ((x86_pte_t *)( __PT_BASE + (__V48ADDR(ptr) >> X86_PT_SHIFT) )) +#define X86_PDE(ptr) ((x86_pde_t *)( __PD_BASE + (__V48ADDR(ptr) >> X86_PD_SHIFT) )) +#define X86_PDPE(ptr) ((x86_pdpe_t *)( __PDP_BASE + (__V48ADDR(ptr) >> X86_PDP_SHIFT) )) +#define X86_PML4E(ptr) ((x86_pml4e_t *)( __PML4_BASE + (__V48ADDR(ptr) >> X86_PML4_SHIFT) )) + +#endif /* not _ASM_SOURCE */ + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/include/amd64/sched.h b/arch/x86/include/amd64/sched.h new file mode 100644 index 0000000..13b14fc --- /dev/null +++ b/arch/x86/include/amd64/sched.h @@ -0,0 +1,51 @@ +/* See the end of this file for copyright and license terms. */ + +#pragma once + +#ifndef _ARCH_SCHED_H_ +#error "This file is not meant to be included directly, use " +#endif + +/** + * @brief In-kernel context save for the x86_64. + * This precise structure layout is hardcoded in assembly, so don't forget to + * update `arch/x86/sys/amd64/switch.S` when changing it. + */ +struct amd64_context { + /* + * the register itself is %rsp, but it points to the %rip that was + * pushed by the function calling `arch_switch_to()` as a result of + * the CALL instruction + */ + union { + u64 rsp; + u64 *rip; + }; + u64 rbx; + u64 rbp; + u64 r12; + u64 r13; + u64 r14; + u64 r15; +} __packed; + +/** + * @brief Arch dependent Task Control Block (amd64 version). + * This is what's required for in-kernel task switching. + * Treat as a completely opaque type outside of the `arch/x86` directory. + */ +typedef struct amd64_context tcb_t; + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/include/amd64/trap.h b/arch/x86/include/amd64/trap.h new file mode 100644 index 0000000..8f1a40c --- /dev/null +++ b/arch/x86/include/amd64/trap.h @@ -0,0 +1,46 @@ +/* See the end of this file for copyright and license terms. */ + +#pragma once + +#ifndef _ARCH_TRAP_H_ +#error "This file is not meant to be included directly, use " +#endif + +/** + * @brief Complete context save on the amd64. + */ +struct amd64_trap_frame { + /* this doesn't need to be a pointer because unlike on + * i386, the hardware frame always has the same size */ + struct amd64_hw_frame hw_frame; + u64 rsi; + u64 rdi; + u64 rax; + u64 rbx; + u64 rcx; + u64 rdx; + u64 rbp; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; +} __packed; +typedef struct amd64_trap_frame trap_frame_t; + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/include/amd64/vmparam.h b/arch/x86/include/amd64/vmparam.h new file mode 100644 index 0000000..3a0cc07 --- /dev/null +++ b/arch/x86/include/amd64/vmparam.h @@ -0,0 +1,38 @@ +/* See the end of this file for copyright and license terms. */ + +#pragma once +#ifndef _ARCH_VMPARAM_H_ +#error "This file is not meant to be included directly, use " +#endif + +/** @brief Userland memory region */ +#define USER_OFFSET 0x0000000000000000 /* +0 TB */ +#define USER_LENGTH 0x0000800000000000 /* 128 TB */ + +/** @brief Recursive Page Map Level 4 map */ +#define X86_PMAP_OFFSET 0xffff800000000000 /* -128 TB */ +#define X86_PMAP_LENGTH 0x0000004020101000 /* ~ 256.5 GB */ + +/** @brief Direct (contiguous) mapping of physical memory */ +#define DMAP_OFFSET 0xfffff80000000000 /* -8 TB */ +#define DMAP_LENGTH 0x0000040000000000 /* 4 TB */ + +/** @brief Kernel region (image, heap, etc) */ +#define KERN_OFFSET 0xfffffe0000000000 /* -2 TB */ +#define KERN_LENGTH 0x0000020000000000 /* 2 TB */ +/** @brief Where the kernel image is actually mapped to */ +#define KERNBASE 0xffffffff80000000 /* -2 GB */ + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/include/arch/interrupt.h b/arch/x86/include/arch/interrupt.h index 11b104a..4957fcb 100644 --- a/arch/x86/include/arch/interrupt.h +++ b/arch/x86/include/arch/interrupt.h @@ -79,7 +79,7 @@ */ struct x86_idt_entry { u16 offset0; /**< @brief ptr to handler, bits 0:15 */ - u16 selector; /**< @brief GDT selector, use `X86_KERN_CS` */ + u16 selector; /**< @brief GDT selector, use kernel code segment */ u8 _rsvd0; /**< @brief always 0 */ u8 attr; #define X86_IDT_GATE_TASK32 0x5u diff --git a/arch/x86/include/arch/sched.h b/arch/x86/include/arch/sched.h index 0148888..14e4e92 100644 --- a/arch/x86/include/arch/sched.h +++ b/arch/x86/include/arch/sched.h @@ -20,13 +20,6 @@ #include #endif -/** - * @brief Arch dependent Task Control Block (x86 version). - * This is what's required for in-kernel task switching. - * Treat as a completely opaque type outside of the `arch/x86` directory. - */ -typedef struct x86_context tcb_t; - /** * @brief Arch dependent low level task switch routine (x86 version). * `new` must not be equal to `old` or the whole thing probably blows up. diff --git a/arch/x86/include/arch/segment.h b/arch/x86/include/arch/segment.h index 5dde937..b9f8b5b 100644 --- a/arch/x86/include/arch/segment.h +++ b/arch/x86/include/arch/segment.h @@ -4,15 +4,14 @@ #include -/** @brief Kernel code segment GDT selector */ -#define X86_KERN_CS 0x08 -/** @brief Kernel data segment GDT selector */ -#define X86_KERN_DS 0x10 - -/** @brief Userland code segment GDT selector */ -#define X86_USER_CS 0x18 -/** @brief Userland data segment GDT selector */ -#define X86_USER_DS 0x20 +#define X86_32_KERN_CS 0x10 +#define X86_32_USER_CS 0x18 +#define X86_64_KERN_CS 0x20 +#define X86_64_USER_CS 0x28 +#define X86_KERN_DS 0x30 +#define X86_USER_DS 0x38 +#define X86_KERN_TSS 0x40 +#define X86_USER_TSS 0x50 #ifndef _ASM_SOURCE diff --git a/arch/x86/include/i386/interrupt.h b/arch/x86/include/i386/interrupt.h index a52411e..d93e329 100644 --- a/arch/x86/include/i386/interrupt.h +++ b/arch/x86/include/i386/interrupt.h @@ -20,7 +20,7 @@ struct i386_hw_frame { /* * On i386, these two are only pushed when entering from another * (i.e. lower) privilege level (???). This effectively means they don't - * exist unless the CS above is equal to X86_USER_CS (arch/segment.h). + * exist unless the CS above is equal to X86_32_USER_CS (arch/segment.h). */ u32 user_esp; u16 user_ss; u16 _pad1; diff --git a/arch/x86/include/i386/sched.h b/arch/x86/include/i386/sched.h index edbb743..ee69b83 100644 --- a/arch/x86/include/i386/sched.h +++ b/arch/x86/include/i386/sched.h @@ -26,6 +26,13 @@ struct x86_context { register_t ebp; } __packed; +/** + * @brief Arch dependent Task Control Block (i386 version). + * This is what's required for in-kernel task switching. + * Treat as a completely opaque type outside of the `arch/x86` directory. + */ +typedef struct i386_context tcb_t; + /* * This file is part of GayBSD. * Copyright (c) 2021 fef . diff --git a/arch/x86/mm/amd64/page.c b/arch/x86/mm/amd64/page.c new file mode 100644 index 0000000..90f0dd6 --- /dev/null +++ b/arch/x86/mm/amd64/page.c @@ -0,0 +1,153 @@ +/* See the end of this file for copyright and license terms. */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* from linker script */ +extern void _image_start_phys; +extern void _image_end_phys; + +__asmlink x86_pt_t _pt0; +__asmlink x86_pd_t _pd0; +__asmlink x86_pdp_t _pdp0; +__asmlink x86_pml4_t _pml4; + +int map_page(uintptr_t phys, void *virt, enum pflags flags) +{ + flags |= P_PRESENT; + x86_pml4e_t *pml4e = X86_PML4E(virt); + if (!pml4e->flags.present) { + void *page = get_pages(0, M_ATOMIC); + if (page == nil) + return -ENOMEM; + pml4e->val = __p(page) | P_PRESENT | P_RW; + } + + return 0; +} + +/* + * The only difference between this and map_page() is that we can't allocate + * new pages using get_pages() but have to use __early_get_page() instead here. + * So, all we need to do is ensure that map_page() doesn't need to allocate new + * pages when we call it, which it only does if pflags does not have P_HUGE + * set and the page table doesn't exist (present bit in the page directory is + * clear). Therefore, we just need to make sure that, if P_HUGE is *not* + * set, the page table is already allocated and marked as present in the page + * directory. + */ +void __early_map_page(uintptr_t phys, void *virt, enum pflags pflags) +{ +} + +uintptr_t unmap_page(void *virt) +{ +} + +enum pflags get_pflags(void *page) +{ +} + +int set_pflags(void *page, enum pflags pflags) +{ +} + +void x86_isr_page_fault(trap_frame_t *frame, u32 error_code) +{ + void *address; + __asm__ volatile( +" mov %%cr2, %0 \n" + : "=r"(address) + : + ); + + const char *space; + if (error_code & X86_PF_USER) + space = "user"; + else + space = "kernel"; + + const char *rwx; + if (error_code & X86_PF_WRITE) + rwx = "write to"; + else if (error_code & X86_PF_INSTR) + rwx = "exec at"; + else + rwx = "read from"; + + const char *present; + if (error_code & X86_PF_PRESENT) + present = ""; + else + present = " non-mapped"; + + kprintf("\n########## B O N K ##########\n"); + kprintf("Illegal %s %s%s address %p!\n", space, rwx, present, address); + print_regs(frame); + panic("Page fault"); +} + +uintptr_t vtophys(void *virt) +{ + x86_pml4e_t *pml4e = X86_PML4E(virt); + if (!pml4e->flags.present) + return 0; + + x86_pdpe_t *pdpe = X86_PDPE(virt); + if (!pml4e->flags.present) + return 0; + if (pml4e->flags.huge) { + uintptr_t phys_base = pdpe->val & X86_PMAP_MASK; + return phys_base + ((uintptr_t)virt % (1 << X86_PDP_SHIFT)); + } + + x86_pde_t *pde = X86_PDE(virt); + if (!pde->flags.present) + return 0; + if (pde->flags.huge) { + uintptr_t phys_base = pde->val & X86_PMAP_MASK; + return phys_base + ((uintptr_t)virt % (1 << X86_PD_SHIFT)); + } + + x86_pte_t *pte = X86_PTE(virt); + if (!pte->flags.present) + return 0; + uintptr_t phys_base = pte->val & X86_PMAP_MASK; + return phys_base + ((uintptr_t)virt % (1 << X86_PT_SHIFT)); +} + +void vm_flush(void) +{ + register_t tmp; + __asm__ volatile( +" mov %%cr3, %0 \n" +" mov %0, %%cr3 \n" + : "=r"(tmp) + : + : "memory" + ); +} + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/mm/page.c b/arch/x86/mm/i386/page.c similarity index 100% rename from arch/x86/mm/page.c rename to arch/x86/mm/i386/page.c diff --git a/arch/x86/mm/segment.S b/arch/x86/mm/segment.S index bffb9f1..9a9faeb 100644 --- a/arch/x86/mm/segment.S +++ b/arch/x86/mm/segment.S @@ -1,54 +1,68 @@ /* See the end of this file for copyright and license terms. */ #include +#include #include -ASM_ENTRY(x86_replace_gdt) - push %ebp - mov %esp, %ebp + .section .multiboot.data, "a", @progbits - lgdt x86_gdt_desc - - ljmp $(X86_KERN_CS), $1f -1: movl $(X86_KERN_DS), %eax - movw %ax, %ds - movw %ax, %es - movw %ax, %fs - movw %ax, %gs - movw %ax, %ss - - pop %ebp - ret -ASM_END(x86_replace_gdt) + .align 8 + .long 0 + .word 0 +DATA(_x86_gdt_desc_phys) + .word _x86_gdt_end - _x86_gdt - 1 /* limit */ + .quad _x86_gdt - KERNBASE /* base */ +END(_x86_gdt_desc_phys) .data - .align 4 - .word 0 /* padding */ -x86_gdt_desc: - .word x86_gdt_end - x86_gdt - 1 /* limit */ - .long x86_gdt /* base */ + .align 8 + .long 0 /* padding */ + .word 0 /* another padding :) */ +DATA(_x86_gdt_desc) + .word _x86_gdt_end - _x86_gdt - 1 /* limit */ + .quad _x86_gdt /* base */ +END(_x86_gdt_desc) /* - * TODO: The GDT entry structure is so fundamentally fucked up that i gave - * up writing an encoder for it half way through, so we just use these - * hardcoded values for now. They were generated using the code on - * - * and assign the entire 4 GiB region for both code and data as well as - * kernel and user mode. Even the Intel manual says you're not supposed - * to use segmentation anymore and just rely on paging for memory - * protection instead, so we gladly accept their advice. + * These entries were generated using the GDT generator tool in + * arch/x86/tools/gdt.c and are the same on both i386 and amd64. + * + * AMD did the only sane thing and reduced segmentation to its minimum + * in 64-bit mode, where the base and limit values are ignored (64-bit + * segments always refer to the entire linear memory). They still have + * their base and limit set to the entire 32-bit address space though, + * because the CPU makes one last 32-bit segment check when jumping + * from 32-bit to 64-bit mode. + * + * The base values for the two TSS entries are inserted in the assembly + * setup routine (see arch/x86/boot/setup{32,64}.S). */ .align 8 -x86_gdt: - .quad 0x0000000000000000 /* 0x00 null descriptor */ - .quad 0x00cf9a000000ffff /* 0x08 kernel code, full 4 GiB */ - .quad 0x00cf92000000ffff /* 0x10 kernel data, full 4 GiB */ - .quad 0x00cffa000000ffff /* 0x18 user code, full 4 GiB */ - .quad 0x00cff2000000ffff /* 0x20 user data, full 4 GiB */ -x86_gdt_end: - .size x86_gdt, . - x86_gdt +DATA(_x86_gdt) + .quad 0x0000000000000000 /* 0x00 null descriptor */ + .quad 0x0000000000000000 /* 0x08 unused */ + .quad 0x00cf9a000000ffff /* 0x10 kernel code 32-bit */ + .quad 0x00cffa000000ffff /* 0x18 user code 32-bit */ + .quad 0x00af9a000000ffff /* 0x20 kernel code 64-bit */ + .quad 0x00affa000000ffff /* 0x28 user code 64-bit */ + .quad 0x00cf92000000ffff /* 0x30 kernel data */ + .quad 0x00cff2000000ffff /* 0x38 user data */ + .quad 0x0040890000000068 /* 0x40 kernel TSS */ + .quad 0x0000000000000000 /* 0x48 .. pad for 64-bit */ + .quad 0x0040e90000000068 /* 0x50 user TSS */ + .quad 0x0000000000000000 /* 0x58 .. pad for 64-bit */ +END(_x86_gdt) +DATA(_x86_gdt_end) +END(_x86_gdt_end) + +DATA(_x86_kern_tss) + .skip 0x68 +END(_x86_kern_tss) +DATA(_x86_user_tss) + .skip 0x68 +END(_x86_user_tss) /* * This file is part of GayBSD. diff --git a/arch/x86/sys/amd64/idt.S b/arch/x86/sys/amd64/idt.S new file mode 100644 index 0000000..d6c84e7 --- /dev/null +++ b/arch/x86/sys/amd64/idt.S @@ -0,0 +1,39 @@ +/* See the end of this file for copyright and license terms. */ + +#include + +#include + + .text + +/* void x86_load_idt(void) */ +ASM_ENTRY(x86_load_idt) + lidt x86_idt_desc + ret +ASM_END(x86_load_idt) + + .data + + .extern x86_idt + + .align 8 + .long 0 /* padding */ + .word 0 /* ... and another padding */ +x86_idt_desc: + .word X86_INTR_COUNT * 16 - 1 /* limit (size in bytes - 1) */ + .quad x86_idt /* base */ + .size x86_idt_desc, . - x86_idt_desc + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/sys/amd64/irq.S b/arch/x86/sys/amd64/irq.S new file mode 100644 index 0000000..d92dd6a --- /dev/null +++ b/arch/x86/sys/amd64/irq.S @@ -0,0 +1,105 @@ +/* See the end of this file for copyright and license terms. */ + +#include + +#include + +#include + +/* + * XXX We should probably move to exclusively using the APIC on amd64 + * (and i386 too, because the APIC was introduced with the 486 and we don't + * support anything below 686 anyway) + */ + + .data + + /* void (*irq_table[NUM_IRQ])(void); */ + .extern irq_table + + /* there is probably a fancy CPU feature for this, but idk */ +irq_count: + .long 0 + + .text + +/* bool in_irq(void); */ +ASM_ENTRY(in_irq) + movabsq $irq_count, %rdx + xor %eax, %eax + mov %eax, %ecx + not %ecx + testl %ecx, (%rdx) + setne %al + retq +ASM_END(in_irq) + +.macro gen_irq num +ASM_ENTRY(_x86_isr_irq\num ) + push %rax + push %rcx + push %rdx + push %rdi + push %rsi + push %r8 + push %r9 + push %r10 + push %r11 + movabsq $irq_count, %rax + incl (%rax) +#if CFG_DEBUG_IRQ + movl $\num, %edi +#endif + movabsq $(irq_table + \num * 8), %rax + callq *%rax + jmp leave_irq +ASM_END(_x86_isr_irq\num ) +.endm + + gen_irq 0 + gen_irq 1 + /* IRQ 2 is for cascading from PIC2 to PIC1 */ + gen_irq 3 + gen_irq 4 + gen_irq 5 + gen_irq 6 + gen_irq 7 + gen_irq 8 + gen_irq 9 + gen_irq 10 + gen_irq 11 + gen_irq 12 + gen_irq 13 + gen_irq 14 + gen_irq 15 + + .align 4 +leave_irq: + movabsq $irq_count, %rax + decl (%rax) + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rsi + pop %rdi + pop %rdx + pop %rcx + pop %rax + iretq + + .size leave_irq, . - leave_irq + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/sys/amd64/switch.S b/arch/x86/sys/amd64/switch.S new file mode 100644 index 0000000..b034b3d --- /dev/null +++ b/arch/x86/sys/amd64/switch.S @@ -0,0 +1,67 @@ +/* See the end of this file for copyright and license terms. */ + +#include + +/* + * Alright, a lot of stuff that is not immediately obvious to someone who hasn't + * done this sort of thing before is going on here, and since i'm totally new to + * x86 myself, here is some excessive documentation of the entire process (which + * will hopefully also help other newcomers in understanding the actual switching + * mechanism). I think the main reason this particular function might seem a + * little confusing is that it returns to a different place than where it came + * from, which is kind of the whole point if you think about it. + * + * This routine is called from within kernel space, and will perform a switch to + * another task that also runs in kernel space. So, this has nothing to do with + * changing ring levels. When another task switches back to the original task, + * that original task just returns to where it called this function as if + * nothing happened in the meantime. + * As per the System V amd64 ABI, the two arguments `new' and `old' are passed + * using the scratch registers %rdi and %rsi respectively, and the return + * address is stored on the stack. + * + * What we need to do now is store all caller saved registers (which critically + * include the stack pointer) into `old', set their values to the ones from + * `new' (again, including the stack pointer) and then just return. + * The new stack pointer will point to the same stack layout shown above, but + * this time that of the new task we are going to switch to. Since the stack + * also includes the %rip from when the new task called arch_switch_to(), we + * automatically switch to that task when returning. + */ + + .text + +/* void arch_switch_to(tcb_t *new, tcb_t *old); */ +ASM_ENTRY(arch_switch_to) + movq %rsp, (%rsi) /* old->rsp = %rsp */ + movq %rbx, 8(%rsi) /* old->rbx = %rbx */ + movq %rbp, 16(%rsi) /* ... */ + movq %r12, 24(%rsi) + movq %r13, 32(%rsi) + movq %r14, 40(%rsi) + movq %r15, 48(%rsi) + + movq (%rdi), %rsp /* %rsp = new->rsp */ + movq 8(%rdi), %rbx /* %rbx = new->rbx */ + movq 16(%rdi), %rbp /* ... */ + movq 24(%rdi), %r12 + movq 32(%rdi), %r13 + movq 40(%rdi), %r14 + movq 48(%rdi), %r15 + + retq +ASM_END(arch_switch_to) + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/sys/amd64/systm.c b/arch/x86/sys/amd64/systm.c new file mode 100644 index 0000000..bea0b8c --- /dev/null +++ b/arch/x86/sys/amd64/systm.c @@ -0,0 +1,34 @@ +/* See the end of this file for copyright and license terms. */ + +#include + +#include +#include + +void print_regs(const struct amd64_trap_frame *ctx) +{ + kprintf("RIP = %#x:%#016lx\n", ctx->hw_frame.cs, ctx->hw_frame.rip); + kprintf("RFLAGS = %#016lx\n", ctx->hw_frame.rflags); + kprintf("RAX = %#016lx RDI = %#016lx\n", ctx->rax, ctx->rdi); + kprintf("RBX = %#016lx RSI = %#016lx\n", ctx->rbx, ctx->rsi); + kprintf("RCX = %#016lx RSP = %#016lx\n", ctx->rcx, ctx->hw_frame.rsp); + kprintf("RDX = %#016lx RBP = %#016lx\n", ctx->rdx, ctx->rbp); + kprintf("R8 = %#016lx R12 = %#016lx\n", ctx->r8, ctx->r12); + kprintf("R9 = %#016lx R13 = %#016lx\n", ctx->r9, ctx->r13); + kprintf("R10 = %#016lx R14 = %#016lx\n", ctx->r10, ctx->r14); + kprintf("R11 = %#016lx R15 = %#016lx\n", ctx->r11, ctx->r15); +} + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/sys/amd64/trap.S b/arch/x86/sys/amd64/trap.S new file mode 100644 index 0000000..2fb9c97 --- /dev/null +++ b/arch/x86/sys/amd64/trap.S @@ -0,0 +1,142 @@ +/* See the end of this file for copyright and license terms. */ + +#include + +#include + + .text + +/* + * push all registers except %rsi to the stack, and store a pointer to the + * struct amd64_trap_frame in %rdi (%rsi is assumed to have already been pushed, + * right after the hardware frame). + * The pointer is stored in %rdi because that's where you put the first + * parameter for function calls as per the amd64 System V ABI. + */ +.macro prepare_trap_entry + pushq %rdi + movq %rsp, %rdi + /* 16 bytes for %rsi and %rdi, plus sizeof(struct amd64_hw_frame) */ + subq $56, %rdi + + pushq %rax + pushq %rbx + pushq %rcx + pushq %rdx + pushq %rbp + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.endm + +.macro prepare_trap_leave + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rbp + popq %rdx + popq %rcx + popq %rbx + popq %rax + popq %rdi +.endm + +/* + * Low level trap entry points, this is what gets put into the IDT. + * + * Hardware automatically pushes EFLAGS, CS, EIP, and (optionally) an error + * code to the stack. The rest is pushed by software using the pushal + * instruction. I wanted to use the same context save struct for both kinds + * of exceptions (the ones that push an error code and the ones that don't), + * so the pointer to the hardware context save is also pushed to the stack and + * included in struct x86_trap_frame. + */ + +.macro gen_isr_noerror name + .extern x86_isr_\name +ASM_ENTRY(_x86_isr_\name ) + cld + pushq %rsi + prepare_trap_entry + + callq x86_isr_\name + + prepare_trap_leave + popq %rsi + iretq +ASM_END(_x86_isr_\name ) +.endm + +/* + * This is for traps that additionally push an error code to the stack before + * entering the handler. The thing with that error code is that it is *not* + * popped from the stack when leaving again, so we need to account for that + * difference ourselves. + */ +.macro gen_isr_error name + .extern x86_isr_\name +ASM_ENTRY(_x86_isr_\name ) + cld + + /* + * Rather than pushing %rsi, we exchange it with the error code that + * was pushed by hardware in addition to the regular stack frame. + * This produces the same stack layout as the ISRs that don't push an + * error code, and additionally moves said code into the appropriate + * register for passing it as the second argument to the handler. + */ + xchgq %rsi, (%rsp) + prepare_trap_entry + + callq x86_isr_\name + + prepare_trap_leave + popq %rsi + iretq +ASM_END(_x86_isr_\name ) +.endm + +gen_isr_noerror divide_error +gen_isr_noerror debug_exception +gen_isr_noerror nmi +gen_isr_noerror breakpoint +gen_isr_noerror overflow +gen_isr_noerror bound_range_exceeded +gen_isr_noerror invalid_opcode +gen_isr_noerror device_not_available +gen_isr_error double_fault +gen_isr_error invalid_tss +gen_isr_error segment_not_present +gen_isr_error stack_segment_fault +gen_isr_error general_protection +gen_isr_error page_fault +gen_isr_noerror x87_fpu_error +gen_isr_error alignment_check +gen_isr_noerror machine_check +gen_isr_noerror simd_floating_point_exception +gen_isr_noerror virtualization_exception +gen_isr_error control_protection_exception + +/* + * This file is part of GayBSD. + * Copyright (c) 2021 fef . + * + * GayBSD is nonviolent software: you may only use, redistribute, and/or + * modify it under the terms of the Cooperative Nonviolent Public License + * (CNPL) as found in the LICENSE file in the source code root directory + * or at ; either version 7 + * of the license, or (at your option) any later version. + * + * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent + * permitted by applicable law. See the CNPL for details. + */ diff --git a/arch/x86/sys/i386/systm.c b/arch/x86/sys/i386/systm.c index e32a21f..b23e0d8 100644 --- a/arch/x86/sys/i386/systm.c +++ b/arch/x86/sys/i386/systm.c @@ -9,7 +9,7 @@ void print_regs(const struct i386_trap_frame *ctx) { u32 esp; - if (ctx->hw_frame->cs == X86_USER_CS) + if (ctx->hw_frame->cs == X86_32_USER_CS) esp = ctx->hw_frame->user_esp; else esp = ctx->esp - 3 * 4; /* eip, cs, eflags */ diff --git a/arch/x86/sys/interrupt.c b/arch/x86/sys/interrupt.c index 244a734..4681547 100644 --- a/arch/x86/sys/interrupt.c +++ b/arch/x86/sys/interrupt.c @@ -45,7 +45,7 @@ void x86_setup_interrupts(void) void x86_set_gate(u8 vector, void (*handler)(void), u8 flags) { struct x86_idt_entry *entry = &x86_idt[vector]; - entry->selector = X86_KERN_CS; + entry->selector = X86_32_KERN_CS; entry->_rsvd0 = 0; entry->attr = flags; diff --git a/cmake/config-x86.cmake b/cmake/config-x86.cmake index 33b84f0..c314adb 100644 --- a/cmake/config-x86.cmake +++ b/cmake/config-x86.cmake @@ -5,7 +5,7 @@ set_property(CACHE BOOT_TYPE PROPERTY STRINGS "bios" ) -option(CFG_X86_64 "64-bit kernel (32-bit support is fundamentally broken)" OFF) +option(CFG_X86_64 "64-bit kernel (32-bit support is fundamentally broken)" ON) if(CFG_X86_64) set(X86_ARCH amd64) else() diff --git a/cmake/config.cmake b/cmake/config.cmake index 1326fd5..0d0a6b5 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -8,7 +8,7 @@ set_property(CACHE ARCH PROPERTY STRINGS ) include("${CMAKE_CURRENT_LIST_DIR}/config-${ARCH}.cmake") -set(CFG_KERN_ORIGIN "0x00100000" CACHE STRING "Physical address where the kernel is loaded (don't touch this)") +set(CFG_KERN_ORIGIN "0x00400000" CACHE STRING "Physical address where the kernel is loaded (don't touch this)") option(CFG_POISON_PAGES "Poison pages after allocate and free" ON) diff --git a/doc/amd64/memory.md b/doc/amd64/memory.md new file mode 100644 index 0000000..5f843f8 --- /dev/null +++ b/doc/amd64/memory.md @@ -0,0 +1,50 @@ +# Virtual Memory Layout on amd64 + +GayBSD's virtual memory map is based on the one from FreeBSD. +The only difference is that areas for features which aren't implemented (yet) +are unused. +We do this because even though GayBSD is supposed to be an OS written mostly +from scratch, my gut feeling somehow tells me it's better that way. +Also, i trust the FreeBSD people have put way more thought into this than i +probably could have ever done with my limited knowledge about the x86. + +The size specifiers here are powers of two (1 KB = 1024 B). + + start address | offset | end address | size | description +:------------------:|-------------:|:-------------------:|-----------:|:---------------------------- +`00000000 00000000` | +0 | `00007fff ffffffff` | 128 TB | userland area +`00008000 00000000` | +128 TB | `ffff7fff ffffffff` | ~ 1.6M TB | **huge ass hole** +`ffff8000 00000000` | -128 TB | `ffff8040 20100fff` | ~ 0.25 TB | recursive page table +`ffff8040 20101000` | ~ -127.75 TB | `fffff7ff ffffffff` | ~ 119 TB | unused +`fffff800 00000000` | -8 TB | `fffffbff ffffffff` | 4 TB | linear physical memory +`fffffc00 00000000` | -4 TB | `fffffdff ffffffff` | 2 TB | unused +`fffffe00 00000000` | -2 TB | `ffffffff ffffffff` | 2 TB | kernel area + +Kernel address space starts counting from the end of virtual memory space +downwards, therefore the offsets are negative. +Likewise, user space starts from the beginning, meaning positive offsets. + +The **huge ass hole** between user and kernel space is specified in _million TB_ +because i think it gives a better overview of the size ratios between these +individual areas than just writing EB. +It also kind of makes you appreciate the sheer vastness of 64-bit address space. + +Kernel space addresses start at `0xffff800000000000` because the MMU "only" +supports 48-bit linear addresses. +The way i've understood it, the Intel spec says the 17 MSBs of virtual +addresses must be all the same, but other than that are ignored. +So, as far as the MMU is concerned, the huge hole doesn't even exist: +Userspace ranges from `0x000000000000~0x7fffffffffff`, +and everything belonging to the kernel from `0x800000000000~0xffffffffffff` +(note how the leading 0's/f's are missing, these are 48-bit values). + +The linear physical memory is a direct mapping of physical RAM, which is +required because `kmalloc()` needs to be able to allocate *physically* +contiguous memory for DMA transfers. + +The kernel image itself is loaded into physical memory at `0x00400000` by +default, and the entire low 2 GB of physical memory are statically mapped to +the end of virtual memory (-2 GB). That way, we can use `-mcmodel=kernel`, +which prevents the compiler from emitting raw address loads and absolute jumps +(this is significantly faster). +All kernel code resides within the -2 GB region. \ No newline at end of file diff --git a/include/gay/mm.h b/include/gay/mm.h index 4045c1c..ef01c2a 100644 --- a/include/gay/mm.h +++ b/include/gay/mm.h @@ -22,6 +22,7 @@ #include #include +#include #include /** @@ -214,6 +215,7 @@ static inline void *__v(uintptr_t phys) if (phys > phys_end) { kprintf("__v(%p): phys ptr out of range!\n", (void *)phys); return nil; + } # endif return (void *)phys + DMAP_OFFSET; }