Skip to content

Instantly share code, notes, and snippets.

@nuta
Created January 18, 2020 19:38
Show Gist options
  • Select an option

  • Save nuta/c8bc72e69e0082e9fe288dc5a5626a11 to your computer and use it in GitHub Desktop.

Select an option

Save nuta/c8bc72e69e0082e9fe288dc5a5626a11 to your computer and use it in GitHub Desktop.
diff --git a/kernel/arch/x64/arch.h b/kernel/arch/x64/arch.h
index eccd5bb..d766707 100644
--- a/kernel/arch/x64/arch.h
+++ b/kernel/arch/x64/arch.h
@@ -43,4 +43,16 @@ static inline paddr_t into_paddr(void *addr) {
return ((vaddr_t) addr - KERNEL_BASE_ADDR);
}
+static inline unsigned get_cpu_id(void) {
+ return *((volatile uint32_t *) from_paddr(0xfee00020)) >> 24;
+}
+
+static inline bool is_bsp_cpu(void) {
+ return get_cpu_id() == 0;
+}
+
+// FIXME:
+#define IDLE_TASK (&rdgsbase()->idle_task)
+#define CURRENT (rdgsbase()->current_task)
+
#endif
diff --git a/kernel/arch/x64/arch.mk b/kernel/arch/x64/arch.mk
index 818f1e8..de6872d 100644
--- a/kernel/arch/x64/arch.mk
+++ b/kernel/arch/x64/arch.mk
@@ -1,3 +1,3 @@
kernel_objs += arch/x64/task.o arch/x64/vm.o arch/x64/printchar.o
kernel_objs += arch/x64/boot.o arch/x64/init.o arch/x64/interrupt.o
-kernel_objs += arch/x64/trap.o
+kernel_objs += arch/x64/trap.o arch/x64/mp.o
diff --git a/kernel/arch/x64/boot.S b/kernel/arch/x64/boot.S
index 1410235..96b9472 100644
--- a/kernel/arch/x64/boot.S
+++ b/kernel/arch/x64/boot.S
@@ -87,6 +87,51 @@ write_pd_entry:
loop write_pd_entry
jmp enable_long_mode
+//
+// AP boot code. mpboot is located at 0x5000 (copied to the address by the
+// kernel later).
+//
+.code16
+.global mpboot, mpboot_end
+mpboot:
+ cli
+ mov ax, 0
+ mov ds, ax
+ mov es, ax
+
+ // Load GDT and enable Protected Mode.
+ lgdt [0x5f00 /* FIXME: Define a macro instead. */]
+ mov eax, cr0
+ or eax, 1
+ mov cr0, eax
+
+ // jmp 24:mpboot32
+ .byte 0xea
+ .word mpboot32 - mpboot + 0x5000
+ .byte 0x18, 0x00
+
+.code32
+mpboot32:
+ mov ax, 16
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+
+ //
+ // Set the boot (later reused for the cpu-local idle thread) stack:
+ //
+ // ESP = 0xa00000 + (cpu_id * PAGE_SIZE).
+ //
+ mov eax, [0xfee00020] // Get the Local APIC ID.
+ shr eax, 24
+ shl eax, 14 // FIXME: EAX * 4096
+ add eax, 0xa04000 // FIXME:
+ xchg bx,bx
+ mov esp, eax
+
+ lea eax, [enable_long_mode]
+ jmp eax
+mpboot_end:
//
// Common boot code for both BSP and APs.
@@ -123,6 +168,7 @@ enable_long_mode:
// Temporary GDTR/GDT entries. This must be located in the .boot section as its
// address (gdt) must be physical to load.
.align 16
+.global boot_gdtr
boot_gdtr:
.word gdt_end - gdt - 1
.quad gdt
@@ -157,11 +203,23 @@ long_mode_in_low_address:
.code64
.text
long_mode:
+ // Determine the current CPU is BSP or AP.
+ mov edi, 0xfee00020
+ mov eax, [edi]
+ shr eax, 24
+ test eax, eax
+ jz setup_bsp
+setup_ap:
+ lea rax, [rip + mpinit]
+ call rax
+ jmp halt
+
+setup_bsp:
// The kernel no longer access a virtual address around 0x0000_0000. Unmap
// the area to catch bugs (especially NULL pointer dereferences in the
// kernel).
mov rdi, 0x0700000
- mov dword ptr [rdi], 0
+ // FIXME: we still need this on APs! mov dword ptr [rdi], 0
// Clear .bss section
mov al, 0x00
diff --git a/kernel/arch/x64/init.c b/kernel/arch/x64/init.c
index e1288c0..705ce00 100644
--- a/kernel/arch/x64/init.c
+++ b/kernel/arch/x64/init.c
@@ -6,46 +6,45 @@
#include "trap.h"
#include "x64.h"
-static struct gdt gdt;
-static struct idt idt;
-struct tss tss;
-
static void gdt_init(void) {
- uint64_t tss_addr = (uint64_t) &tss;
- gdt.null = 0x0000000000000000;
- gdt.kernel_cs = 0x00af9a000000ffff;
- gdt.kernel_ds = 0x00af92000000ffff;
- gdt.user_cs32 = 0x0000000000000000;
- gdt.user_cs64 = 0x00affa000000ffff;
- gdt.user_ds = 0x008ff2000000ffff;
- gdt.tss_low =
+ uint64_t tss_addr = (uint64_t) &rdgsbase()->tss;
+ struct gdt *gdt = &rdgsbase()->gdt;
+ gdt->null = 0x0000000000000000;
+ gdt->kernel_cs = 0x00af9a000000ffff;
+ gdt->kernel_ds = 0x00af92000000ffff;
+ gdt->user_cs32 = 0x0000000000000000;
+ gdt->user_cs64 = 0x00affa000000ffff;
+ gdt->user_ds = 0x008ff2000000ffff;
+ gdt->tss_low =
0x0000890000000000 | sizeof(struct tss) | ((tss_addr & 0xffff) << 16)
| (((tss_addr >> 16) & 0xff) << 32) | (((tss_addr >> 24) & 0xff) << 56);
- gdt.tss_high = tss_addr >> 32;
+ gdt->tss_high = tss_addr >> 32;
// Update GDTR
struct gdtr gdtr;
- gdtr.laddr = (uint64_t) &gdt;
- gdtr.len = sizeof(gdt) - 1;
+ gdtr.laddr = (uint64_t) gdt;
+ gdtr.len = sizeof(*gdt) - 1;
asm_lgdt((uint64_t) &gdtr);
}
static void idt_init(void) {
+ struct idt *idt = &rdgsbase()->idt;
+
// Initialize IDT entries.
for (int i = 0; i < IDT_DESC_NUM; i++) {
uint64_t handler = (uint64_t) &interrupt_handlers[i];
- idt.descs[i].offset1 = handler & 0xffff;
- idt.descs[i].seg = KERNEL_CS;
- idt.descs[i].ist = IST_RSP0;
- idt.descs[i].info = IDT_INT_HANDLER;
- idt.descs[i].offset2 = (handler >> 16) & 0xffff;
- idt.descs[i].offset3 = (handler >> 32) & 0xffffffff;
- idt.descs[i].reserved = 0;
+ idt->descs[i].offset1 = handler & 0xffff;
+ idt->descs[i].seg = KERNEL_CS;
+ idt->descs[i].ist = IST_RSP0;
+ idt->descs[i].info = IDT_INT_HANDLER;
+ idt->descs[i].offset2 = (handler >> 16) & 0xffff;
+ idt->descs[i].offset3 = (handler >> 32) & 0xffffffff;
+ idt->descs[i].reserved = 0;
}
struct idtr idtr;
- idtr.laddr = (uint64_t) &idt;
- idtr.len = sizeof(idt) - 1;
+ idtr.laddr = (uint64_t) idt;
+ idtr.len = sizeof(*idt) - 1;
asm_lidt((uint64_t) &idtr);
}
@@ -66,11 +65,10 @@ static void pic_init(void) {
}
static void tss_init(void) {
- // Set RSP0 to 0 until the first context switch so that we can notice a
- // bug which occurs an exception during the boot.
- tss.rsp0 = 0;
- tss.iomap_offset = offsetof(struct tss, iomap);
- tss.iomap_last_byte = 0xff;
+ struct tss *tss = &rdgsbase()->tss;
+ tss->rsp0 = 0;
+ tss->iomap_offset = offsetof(struct tss, iomap);
+ tss->iomap_last_byte = 0xff;
asm_ltr(TSS_SEG);
}
@@ -107,6 +105,8 @@ static void calibrate_apic_timer(void) {
// Calibrate the APIC timer interval to invoke the timer interrupt every
// 1/TICK_HZ seconds.
uint64_t counts_per_tick = init_count - read_apic(APIC_REG_TIMER_CURRENT);
+ DBG("counts_per_tick = %d", counts_per_tick);
+ counts_per_tick = 10000; // FIXME:
write_apic(APIC_REG_TIMER_INITCNT, counts_per_tick);
}
@@ -129,37 +129,44 @@ static void apic_init(void) {
}
static void common_setup(void) {
- struct gsbase *gsbase = kmalloc(sizeof(*gsbase));
- ASSERT(gsbase);
+ struct gsbase *gsbase = from_paddr(0xd00000 + get_cpu_id() * ALIGN_UP(sizeof(*gsbase), PAGE_SIZE)); // FIXME: kmalloc
+ INFO("gsbase = %p, rbp = %p", gsbase, __builtin_frame_address(0));
- // Activate RDGSBASE/WRGSBASE instructions.
- asm_write_cr4(asm_read_cr4() | CR4_FSGSBASE);
- // Set RDGSBASE to enable the CPUVAR macro.
- asm_wrgsbase((uint64_t) gsbase);
- // Enable XSAVE/XRSTOR instructions.
- asm_write_cr4(asm_read_cr4() | CR4_OSXSAVE);
+ // Activate RDGSBASE/WRGSBASE and XSAVE/XRSTOR instructions.
+ asm_write_cr4(asm_read_cr4() | CR4_FSGSBASE | CR4_OSXSAVE);
// Set TS flag to issue an interrupt on use of floating-point registers
// during the kernel initialization.
asm_write_cr0(asm_read_cr0() | CR0_TS);
+ // Set RDGSBASE to enable the CPUVAR macro.
+ asm_wrgsbase((uint64_t) gsbase);
apic_init();
gdt_init();
tss_init();
idt_init();
apic_timer_init();
- pic_init();
syscall_init();
}
void init(void) {
+ lock();
serial_init();
+ pic_init();
+ common_setup();
kmain();
}
-void arch_init(void) {
+void mpinit(void) {
+ lock();
+ INFO("Booting CPU #%d...", get_cpu_id());
common_setup();
+ mpmain();
}
void arch_idle(void) {
+ // FIXME:
+ unlock();
asm_stihlt();
+ asm_cli();
+ lock();
}
diff --git a/kernel/arch/x64/interrupt.c b/kernel/arch/x64/interrupt.c
index f2ad1c3..8f54aa0 100644
--- a/kernel/arch/x64/interrupt.c
+++ b/kernel/arch/x64/interrupt.c
@@ -1,7 +1,9 @@
#include <memory.h>
#include <printk.h>
+#include <syscall.h>
#include <task.h>
#include "interrupt.h"
+#include "mp.h"
#include "task.h"
#include "trap.h"
#include "x64.h"
@@ -88,7 +90,13 @@ static void dump_regs(struct interrupt_regs *regs) {
printf("R14 = %p R15 = %p ERR = %p\n", regs->r14, regs->r15, regs->error);
}
-void handle_interrupt(uint8_t vec, struct interrupt_regs *regs) {
+void x64_handle_interrupt(uint8_t vec, struct interrupt_regs *regs) {
+ if (vec == VECTOR_IPI_HALT) {
+ halt_cpu();
+ }
+
+ lock();
+
switch (vec) {
case EXP_PAGE_FAULT: {
vaddr_t addr = asm_read_cr2();
@@ -114,6 +122,9 @@ void handle_interrupt(uint8_t vec, struct interrupt_regs *regs) {
switch_fpu();
break;
+ case VECTOR_IPI_RESCHEDULE:
+ task_switch();
+ break;
default:
if (vec <= 20) {
printf("Exception #%d\n", vec);
@@ -132,6 +143,16 @@ void handle_interrupt(uint8_t vec, struct interrupt_regs *regs) {
PANIC("Unexpected interrupt #%d", vec);
}
}
+
+ unlock();
+}
+
+uintmax_t x64_handle_syscall(uintmax_t arg1, uintmax_t arg2, uintmax_t arg3,
+ uintmax_t arg4, uintmax_t arg5, uintmax_t type) {
+ lock();
+ uint64_t ret = handle_syscall(arg1, arg2, arg3, arg4, arg5, type);
+ unlock();
+ return ret;
}
void interrupt_init(void) {
diff --git a/kernel/arch/x64/interrupt.h b/kernel/arch/x64/interrupt.h
index 1e18bb2..e9b4f55 100644
--- a/kernel/arch/x64/interrupt.h
+++ b/kernel/arch/x64/interrupt.h
@@ -30,7 +30,9 @@ struct interrupt_regs {
uint64_t ss;
} PACKED;
-void handle_interrupt(uint8_t vec, struct interrupt_regs *regs);
+void x64_handle_interrupt(uint8_t vec, struct interrupt_regs *regs);
+uintmax_t x64_handle_syscall(uintmax_t arg1, uintmax_t arg2, uintmax_t arg3,
+ uintmax_t arg4, uintmax_t arg5, uintmax_t type);
struct task;
error_t irq_set_owner(unsigned irq, struct task *task);
diff --git a/kernel/arch/x64/mp.c b/kernel/arch/x64/mp.c
new file mode 100644
index 0000000..ebdebb4
--- /dev/null
+++ b/kernel/arch/x64/mp.c
@@ -0,0 +1,235 @@
+#include "mp.h"
+#include <printk.h>
+#include <string.h>
+#include <x64.h>
+
+// Note: these symbols points to **physical** addresses.
+extern char boot_gdtr[];
+extern char mpboot[];
+extern char mpboot_end[];
+
+static unsigned num_cpus = 1;
+
+unsigned get_num_cpus(void) {
+ return num_cpus;
+}
+
+static void udelay(int usec) {
+ while (usec-- > 0) {
+ asm_in8(0x80);
+ }
+}
+
+static void send_ipi(uint8_t vector, enum ipi_dest dest, uint8_t dest_apic_id,
+ enum ipi_mode mode) {
+ uint64_t data =
+ ((uint64_t) dest_apic_id << 56)
+ | ((uint64_t) dest << 18)
+ | (1ULL << 14) // Level: assert
+ | ((uint64_t) mode << 8)
+ | vector;
+
+ write_apic(APIC_REG_ICR_HIGH, data >> 32);
+ write_apic(APIC_REG_ICR_LOW, data & 0xffffffff);
+}
+
+static struct mp_float_ptr *look_for_floatptr_table(paddr_t start,
+ paddr_t end) {
+ vaddr_t end_vaddr = (vaddr_t) from_paddr(end);
+ for (uint32_t *p = from_paddr(start); (vaddr_t) p < end_vaddr; p++) {
+ if (*p == MP_FLOATPTR_SIGNATURE) {
+ return (struct mp_float_ptr *) p;
+ }
+ }
+
+ return NULL;
+}
+
+static paddr_t ioapic_paddr;
+
+static uint32_t ioapic_read(uint8_t reg) {
+ *((uint32_t *) from_paddr(ioapic_paddr)) = reg;
+ return *((uint32_t *) from_paddr(ioapic_paddr + 0x10));
+}
+
+static void ioapic_write(uint8_t reg, uint32_t data) {
+ *((uint32_t *) from_paddr(ioapic_paddr)) = reg;
+ *((uint32_t *) from_paddr(ioapic_paddr + 0x10)) = data;
+}
+
+static void ioapic_enable_irq(uint8_t vector, uint8_t irq) {
+ ioapic_write(IOAPIC_REG_NTH_IOREDTBL_HIGH(irq), 0);
+ ioapic_write(IOAPIC_REG_NTH_IOREDTBL_LOW(irq), vector);
+}
+
+void enable_irq(uint8_t irq) {
+ ioapic_enable_irq(VECTOR_IRQ_BASE + irq, irq);
+}
+
+void ack_irq(void) {
+ write_apic(APIC_REG_EOI, 0);
+}
+
+static void ioapic_init(paddr_t ioapic_addr) {
+ int max;
+ ioapic_paddr = ioapic_addr;
+
+ // symmetric I/O mode
+ asm_out8(0x22, 0x70);
+ asm_out8(0x23, 0x01);
+
+ // get the maxinum number of entries in IOREDTBL
+ max = (int) (ioapic_read(IOAPIC_REG_IOAPICVER) >> 16) + 1;
+
+ // disable all hardware interrupts
+ for (int i = 0; i < max; i++) {
+ ioapic_write(IOAPIC_REG_NTH_IOREDTBL_HIGH(i), 0);
+ ioapic_write(IOAPIC_REG_NTH_IOREDTBL_LOW(i), 1 << 16 /* masked */);
+ }
+}
+
+static void read_mp_table(void) {
+ struct mp_table_header *mptblhdr;
+ struct mp_ioapic_entry *ioapic_entry;
+ void *entry_ptr;
+
+ struct mp_float_ptr *mpfltptr = look_for_floatptr_table(0xf0000, 0x100000);
+ if (mpfltptr == NULL) {
+ PANIC("MP table not found");
+ return;
+ }
+
+ mptblhdr = (struct mp_table_header *) from_paddr(
+ (paddr_t) mpfltptr->mptable_header_addr);
+ if (mptblhdr->signature != MP_MPTABLE_SIGNATURE) {
+ PANIC("invalid MP table");
+ return;
+ }
+
+ entry_ptr = (void *) ((paddr_t) mptblhdr + sizeof(struct mp_table_header));
+ for (int i = 0; i < mptblhdr->entry_count; i++) {
+ size_t size;
+ uint8_t type = *((uint8_t *) entry_ptr);
+ switch (type) {
+ case MP_BASETABLE_IOAPIC_ENTRY:
+ ioapic_entry = (struct mp_ioapic_entry *) entry_ptr;
+ size = sizeof(struct mp_ioapic_entry);
+ if (ioapic_entry->ioapic_flags != 0) {
+ ioapic_init(ioapic_entry->memmaped_ioapic_addr);
+ }
+ break;
+ case MP_BASETABLE_BUS_ENTRY:
+ size = sizeof(struct mp_bus_entry);
+ break;
+ case MP_BASETABLE_PROCESSOR_ENTRY:
+ size = sizeof(struct mp_processor_entry);
+ struct mp_processor_entry *entry = (void *) entry_ptr;
+ if (entry->localapic_id != get_cpu_id()) {
+ num_cpus++;
+ }
+ break;
+ case MP_BASETABLE_IOINT_ASSIGN_ENTRY:
+ size = sizeof(struct mp_ioint_assign_entry);
+ break;
+ case MP_BASETABLE_LOCALINT_ASSIGN_ENTRY:
+ size = sizeof(struct mp_localint_assign_entry);
+ break;
+ default:
+ PANIC("unknown mp table entry: %d", type);
+ }
+
+ entry_ptr = (void *) ((paddr_t) entry_ptr + (paddr_t) size);
+ }
+}
+
+static void start_aps(void) {
+ // we need to copy into the lower address...
+ memcpy(from_paddr(0x5f00), from_paddr((paddr_t) boot_gdtr), sizeof(struct gdtr));
+
+ memcpy(from_paddr(AP_BOOT_CODE_PADDR),
+ from_paddr((paddr_t) mpboot),
+ (size_t) mpboot_end - (size_t) mpboot);
+
+ for (unsigned apic_id = 1; apic_id < get_num_cpus(); apic_id++) {
+ INFO("starting CPU #%d...", apic_id);
+
+ send_ipi(0, IPI_DEST_UNICAST, apic_id, IPI_MODE_INIT);
+ udelay(20000);
+ send_ipi(AP_BOOT_CODE_PADDR >> 12, IPI_DEST_UNICAST,
+ apic_id, IPI_MODE_STARTUP);
+ udelay(4000);
+ }
+}
+
+void mp_start(void) {
+ read_mp_table();
+ start_aps();
+}
+
+volatile int giant_lock = 8899;
+volatile unsigned lock_owner = 0xffff;
+
+void lock(void) {
+ uint64_t rflags;
+ __asm__ __volatile__("pushfq; popq %%rax" : "=a"(rflags));
+ if (rflags & 0x200) {
+ __asm__ __volatile__("cli");
+ WARN("STI'ed");
+ } else {
+// OOPS("locking...");
+ }
+
+ if (giant_lock == 12345 && lock_owner == get_cpu_id()) {
+ PANIC("CPU #%d: already locked!", get_cpu_id());
+ }
+
+ while (!__sync_bool_compare_and_swap(&giant_lock, 8899, 12345)) {
+ __asm__ __volatile__("pause");
+ }
+ lock_owner = get_cpu_id();
+}
+
+void unlock(void) {
+// OOPS("unlock!");
+ if (giant_lock != 12345) {
+ lock();
+ PANIC("unlock fialed %d owner=%d: %d: %s",
+ giant_lock, lock_owner, get_cpu_id(), CURRENT->name);
+ }
+
+ lock_owner = 0xffff;
+ __sync_bool_compare_and_swap(&giant_lock, 12345, 8899);
+}
+
+
+void checklock(void) {
+ if (CURRENT != IDLE_TASK) {
+ if (giant_lock != 12345) {
+ PANIC("%d: %s: invalid lock %x: owner=%d",
+ get_cpu_id(), CURRENT->name,
+ giant_lock, lock_owner);
+ }
+
+ uint64_t rflags;
+ __asm__ __volatile__("pushfq; popq %%rax" : "=a"(rflags));
+ if (rflags & 0x200) {
+ __asm__ __volatile__("cli");
+ WARN("STI'ed");
+ }
+ }
+}
+
+void mp_reschedule(unsigned cpu) {
+ send_ipi(VECTOR_IPI_RESCHEDULE, IPI_DEST_UNICAST, cpu, IPI_MODE_FIXED);
+}
+
+void halt_cpu(void) {
+ while (true) {
+ __asm__ __volatile__("cli; hlt");
+ }
+}
+
+void halt(void) {
+ send_ipi(VECTOR_IPI_HALT, IPI_DEST_ALL_BUT_SELF, 0, IPI_MODE_FIXED);
+ halt_cpu();
+}
diff --git a/kernel/arch/x64/mp.h b/kernel/arch/x64/mp.h
new file mode 100644
index 0000000..db96fff
--- /dev/null
+++ b/kernel/arch/x64/mp.h
@@ -0,0 +1,111 @@
+#ifndef __X64_MP_H__
+#define __X64_MP_H__
+
+#include <types.h>
+
+//
+// MP Table
+//
+#define MP_FLOATPTR_SIGNATURE 0x5f504d5f /* "_MP_" */
+#define MP_MPTABLE_SIGNATURE 0x504d4350 /* "PCMP" */
+#define MP_BASETABLE_PROCESSOR_ENTRY 0
+#define MP_BASETABLE_BUS_ENTRY 1
+#define MP_BASETABLE_IOAPIC_ENTRY 2
+#define MP_BASETABLE_IOINT_ASSIGN_ENTRY 3
+#define MP_BASETABLE_LOCALINT_ASSIGN_ENTRY 4
+
+struct mp_float_ptr {
+ uint32_t signature;
+ uint32_t mptable_header_addr;
+ uint8_t length;
+ uint8_t spec_rev;
+ uint8_t checksum;
+ uint8_t info1;
+ uint8_t info2;
+ uint8_t info3[3];
+} PACKED;
+
+struct mp_table_header {
+ uint32_t signature;
+ uint16_t base_table_length;
+ uint8_t spec_rev;
+ uint8_t checksum;
+ uint8_t oem_id[8];
+ uint8_t product_id[12];
+ uint32_t oem_table_pointer;
+ uint16_t oem_table_size;
+ uint16_t entry_count;
+ uint32_t memmaped_localapic_addr;
+ uint16_t extended_table_length;
+ uint8_t extended_table_checksum;
+ uint8_t reserved;
+} PACKED;
+
+struct mp_processor_entry {
+ uint8_t type; // 0
+ uint8_t localapic_id;
+ uint8_t localapic_ver;
+ uint8_t cpu_flags;
+ uint32_t cpu_signature;
+ uint32_t feature_flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+} PACKED;
+
+struct mp_bus_entry {
+ uint8_t type; // 1
+ uint8_t id;
+ uint8_t type_str[6];
+} PACKED;
+
+struct mp_ioapic_entry {
+ uint8_t type; // 2
+ uint8_t ioapic_id;
+ uint8_t ioapic_ver;
+ uint8_t ioapic_flags;
+ uint32_t memmaped_ioapic_addr;
+} PACKED;
+
+struct mp_ioint_assign_entry {
+ uint8_t type; // 3
+ uint8_t int_type;
+ uint16_t int_flags;
+ uint8_t src_bus_id;
+ uint8_t src_bus_irq;
+ uint8_t dest_ioapic_id;
+ uint8_t dest_ioapic_intin;
+} PACKED;
+
+struct mp_localint_assign_entry {
+ uint8_t type; // 4
+ uint8_t int_type;
+ uint16_t int_flags;
+ uint8_t src_bus_id;
+ uint8_t src_bus_irq;
+ uint8_t dest_localapic_id;
+ uint8_t dest_localapic_intin;
+} PACKED;
+
+//
+// IPI
+//
+enum ipi_dest {
+ IPI_DEST_UNICAST = 0,
+ IPI_DEST_SELF = 1,
+ IPI_DEST_ALL_BUT_SELF = 2,
+ IPI_DEST_ALL = 3,
+};
+
+enum ipi_mode {
+ IPI_MODE_FIXED = 0,
+ IPI_MODE_INIT = 5,
+ IPI_MODE_STARTUP = 6,
+};
+
+
+void mp_start(void);
+void mp_init(void);
+void mp_halt(void);
+void halt_cpu(void);
+
+#endif
diff --git a/kernel/arch/x64/task.c b/kernel/arch/x64/task.c
index 70b19fa..e4b41ff 100644
--- a/kernel/arch/x64/task.c
+++ b/kernel/arch/x64/task.c
@@ -9,8 +9,9 @@
static struct task *current_fpu_owner = NULL;
static void update_tss_iomap(struct task *task) {
+ struct tss *tss = &rdgsbase()->tss;
bool allowed = task->regs.ioport;
- memset(tss.iomap, (allowed) ? 0x00 : 0xff, TSS_IOMAP_SIZE);
+ memset(tss->iomap, (allowed) ? 0x00 : 0xff, TSS_IOMAP_SIZE);
}
error_t arch_task_create(struct task *task, vaddr_t ip) {
@@ -65,14 +66,21 @@ void arch_task_destroy(struct task *task) {
kfree(task->regs.xsave);
}
+
+extern volatile unsigned lock_owner;//FIXME
void arch_task_switch(struct task *prev, struct task *next) {
// Disable interrupts in case they're not yet disabled.
asm_cli();
+
+ if (prev == IDLE_TASK && lock_owner != get_cpu_id()) {
+ lock();
+ }
+
// Switch the page table.
asm_write_cr3(next->vm.pml4);
// Update the kernel stack for syscall/interrupt/exception handlers.
rdgsbase()->rsp0 = next->regs.rsp0;
- tss.rsp0 = next->regs.rsp0;
+ rdgsbase()->tss.rsp0 = next->regs.rsp0;
// Update the I/O bitmap.
update_tss_iomap(next);
// Set TS flag for lazy FPU context switching.
diff --git a/kernel/arch/x64/trap.S b/kernel/arch/x64/trap.S
index 125b8be..c6a0059 100644
--- a/kernel/arch/x64/trap.S
+++ b/kernel/arch/x64/trap.S
@@ -43,9 +43,10 @@ syscall_entry:
push r14
push r15
+ cli
mov r9, rax // syscall type
mov rcx, r10 // arg3
- call handle_syscall
+ call x64_handle_syscall
// Restore callee-saved registers.
pop r15
@@ -110,7 +111,7 @@ interrupt_handlers:
.set i, i + 1
.endr
-.extern handle_interrupt
+.extern x64_handle_interrupt
interrupt_common:
//
// The current stack frame:
@@ -158,8 +159,7 @@ interrupt_common:
push rax
mov rsi, rsp
-
- call handle_interrupt
+ call x64_handle_interrupt
pop rax
pop rbx
diff --git a/kernel/arch/x64/vm.c b/kernel/arch/x64/vm.c
index 09ed7cc..2bba5a6 100644
--- a/kernel/arch/x64/vm.c
+++ b/kernel/arch/x64/vm.c
@@ -53,6 +53,8 @@ error_t vm_create(struct vm *vm) {
}
memcpy(pml4, from_paddr(KERNEL_PML4_PADDR), PAGE_SIZE);
+ //FIXME:
+ pml4[0] = 0;
vm->pml4 = into_paddr(pml4);
return OK;
}
diff --git a/kernel/arch/x64/x64.h b/kernel/arch/x64/x64.h
index 6b6b22b..5c01e38 100644
--- a/kernel/arch/x64/x64.h
+++ b/kernel/arch/x64/x64.h
@@ -4,19 +4,6 @@
#include <arch.h>
#include <task.h>
-//
-// CPU-local variables. Accessible through GS segment in kernel mode.
-//
-struct gsbase {
- uint64_t rsp0;
-} PACKED;
-
-static inline struct gsbase *rdgsbase(void) {
- uint64_t gsbase;
- __asm__ __volatile__("rdgsbase %0" : "=r"(gsbase));
- return (struct gsbase *) gsbase;
-}
-
//
// Global Descriptor Table (GDT)
//
@@ -90,18 +77,6 @@ struct idtr {
#define MSR_FMASK 0xc0000084
#define EFER_SCE 0x01
-//
-// SYSCALL/SYSRET
-//
-
-// SYSRET constraints.
-STATIC_ASSERT(USER_CS32 + 8 == USER_DS);
-STATIC_ASSERT(USER_CS32 + 16 == USER_CS64);
-
-// Clear IF bit to disable interrupts when we enter the syscall handler
-// or an interrupt occurs before doing SWAPGS.
-#define SYSCALL_RFLAGS_MASK 0x200
-
//
// Task State Segment (TSS)
//
@@ -121,8 +96,6 @@ struct tss {
uint8_t iomap_last_byte;
} PACKED;
-extern struct tss tss;
-
//
// Page Table
//
@@ -158,7 +131,9 @@ extern struct tss tss;
#define APIC_REG_TIMER_DIV 0xfee003e0
#define IOAPIC_IOREGSEL_OFFSET 0x00
#define IOAPIC_IOWIN_OFFSET 0x10
-#define VECTOR_IRQ_BASE 32
+#define VECTOR_IPI_RESCHEDULE 32
+#define VECTOR_IPI_HALT 33
+#define VECTOR_IRQ_BASE 48
#define IOAPIC_ADDR 0xfec00000
#define IOAPIC_REG_IOAPICVER 0x01
#define IOAPIC_REG_NTH_IOREDTBL_LOW(n) (0x10 + ((n) *2))
@@ -172,10 +147,6 @@ static inline void write_apic(paddr_t addr, uint32_t data) {
*((volatile uint32_t *) from_paddr(addr)) = data;
}
-static inline unsigned get_cpu_id(void) {
- return read_apic(APIC_REG_ID) >> 24;
-}
-
//
// APIC Timer.
//
@@ -183,6 +154,43 @@ static inline unsigned get_cpu_id(void) {
#define APIC_TIMER_DIV 0x03
#define TICK_HZ 1000
+//
+// CPU-local variables. Accessible through GS segment in kernel mode.
+//
+struct gsbase {
+ uint64_t rsp0;
+ struct task *current_task;
+ struct task idle_task;
+ struct gdt gdt;
+ struct idt idt;
+ struct tss tss;
+} PACKED;
+
+static inline struct gsbase *rdgsbase(void) {
+ uint64_t gsbase;
+ __asm__ __volatile__("rdgsbase %0" : "=r"(gsbase));
+ return (struct gsbase *) gsbase;
+}
+
+//
+// MP
+//
+#define AP_BOOT_CODE_PADDR 0x5000 // TODO: kmalloc
+void lock(void);
+void unlock(void);
+
+//
+// SYSCALL/SYSRET
+//
+
+// SYSRET constraints.
+STATIC_ASSERT(USER_CS32 + 8 == USER_DS);
+STATIC_ASSERT(USER_CS32 + 16 == USER_CS64);
+
+// Clear IF bit to disable interrupts when we enter the syscall handler
+// or an interrupt occurs before doing SWAPGS.
+#define SYSCALL_RFLAGS_MASK 0x200
+
//
// Inline assembly.
//
diff --git a/kernel/main.c b/kernel/main.c
index 08cf41a..0e96eb0 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -7,23 +7,33 @@
/// Initializes the kernel and starts the first task.
void kmain(void) {
printf("\nBooting Resea...\n");
- set_stack_canary();
memory_init();
- arch_init();
task_init();
+ mp_start();
// Create the first userland task (init).
struct task *task = get_task_by_tid(INIT_TASK_TID);
ASSERT(task);
task_create(task, "init", INITFS_ADDR, 0, CAP_ALL);
- // Do a very first context switch on this CPU
+ mpmain();
+}
+
+void mpmain(void) {
+ set_stack_canary();
+
+ // Initialize the idle task for this CPU.
+ IDLE_TASK->tid = 0;
+ task_create(IDLE_TASK, "(idle)", 0, 0, CAP_IPC);
+ CURRENT = IDLE_TASK;
+
+ // Do the very first context switch on this CPU.
+ INFO("Booted CPU #%d", get_cpu_id());
task_switch();
// We're now in the current CPU's idle task.
while (true) {
- // Halt the current CPU until an interrupt occurrs.
+ // Halt the CPU until an interrupt arrives...
arch_idle();
- task_switch();
}
}
diff --git a/kernel/main.h b/kernel/main.h
index e02d856..0e91dde 100644
--- a/kernel/main.h
+++ b/kernel/main.h
@@ -2,9 +2,10 @@
#define __MAIN_H__
void kmain(void);
+void mpmain(void);
// Implemented in arch.
-void arch_init(void);
+void mp_start(void);
void arch_idle(void);
#endif
diff --git a/kernel/syscall.c b/kernel/syscall.c
index a84cc91..0d47550 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -68,6 +68,16 @@ tid_t sys_ipc(tid_t dst, tid_t src, userptr_t m, userptr_t r, uint32_t flags) {
}
}
+ // Copy the message into the receiver task's buffer and resume it.
+ // Copy first to cause page fault here...
+ size_t send_len = IPC_SEND_LEN(flags);
+ ASSERT(send_len <= sizeof(CURRENT->buffer));//FIXME:
+ if (flags & IPC_KERNEL) {
+ memcpy(&CURRENT->buffer, (const void *) m, send_len);
+ } else {
+ copy_from_user(&CURRENT->buffer, m, send_len);
+ }
+
// Wait until the destination (receiver) task gets ready for receiving.
while (true) {
if (dst_task->state == TASK_BLOCKED
@@ -92,21 +102,22 @@ tid_t sys_ipc(tid_t dst, tid_t src, userptr_t m, userptr_t r, uint32_t flags) {
}
}
- // Ensure that the message is not too large.
- size_t send_len = IPC_SEND_LEN(flags);
- if (send_len > dst_task->buffer_len) {
- return ERR_TOO_LARGE_MESSAGE;
- }
- // Copy the message into the receiver task's buffer and resume it.
+ // Try again since it can be overwritten. FIXME:
if (flags & IPC_KERNEL) {
- memcpy(&dst_task->buffer, (const void *) m, send_len);
+ memcpy(&CURRENT->buffer, (const void *) m, send_len);
} else {
- copy_from_user(&dst_task->buffer, m, send_len);
+ copy_from_user(&CURRENT->buffer, m, send_len);
+ }
+
+ // Ensure that the message is not too large.
+ if (send_len > dst_task->buffer_len) {
+ return ERR_TOO_LARGE_MESSAGE;
}
dst_task->buffer_len = send_len;
dst_task->src = (flags & IPC_KERNEL) ? 0 : CURRENT->tid;
+ memcpy(&dst_task->buffer, (const void *) &CURRENT->buffer, send_len);
task_resume(dst_task);
}
@@ -122,6 +133,7 @@ tid_t sys_ipc(tid_t dst, tid_t src, userptr_t m, userptr_t r, uint32_t flags) {
return ERR_TOO_LARGE_MESSAGE;
}
+retry_recv:
// Check if there're pending events.
if (!src && CURRENT->events) {
if (recv_len < sizeof(struct events_msg)) {
@@ -137,15 +149,18 @@ tid_t sys_ipc(tid_t dst, tid_t src, userptr_t m, userptr_t r, uint32_t flags) {
return OK;
}
+ CURRENT->receiving = true;
CURRENT->src = src;
CURRENT->buffer_len = recv_len;
task_block(CURRENT);
// Resume the sender tasks.
+// WARN("recving at %s", CURRENT->name);
LIST_FOR_EACH (task, &CURRENT->senders, struct task, sender_next) {
task_resume(task);
list_remove(&task->sender_next);
}
+// WARN("done %s", CURRENT->name);
// Notify the waiter tasks that this task is now waiting for a message.
LIST_FOR_EACH (task, &CURRENT->waiters, struct task, waiter_next) {
@@ -155,6 +170,13 @@ tid_t sys_ipc(tid_t dst, tid_t src, userptr_t m, userptr_t r, uint32_t flags) {
// Sleep until a sender task resumes this task...
task_switch();
+ CURRENT->receiving = false;
+
+ // FIXME:
+ if (CURRENT->events & EVENT_ABORTED) {
+ CURRENT->events &= ~EVENT_ABORTED;
+ goto retry_recv;
+ }
// We've received a message in the buffer. Copy it into the kernel/user
// buffer. The size of a message is checked in the sender.
diff --git a/kernel/task.c b/kernel/task.c
index 3b6f023..6af763b 100644
--- a/kernel/task.c
+++ b/kernel/task.c
@@ -7,10 +7,8 @@
#include "syscall.h"
#include "x64.h"
-struct task idle_task;
-struct task *current_task;
-static struct task tasks[PROCS_MAX];
static struct list_head runqueue;
+static struct task tasks[PROCS_MAX];
/// Returns the task structfor the task ID. It returns NULL if the ID is
/// invalid.
@@ -42,14 +40,16 @@ error_t task_create(struct task *task, const char *name, vaddr_t ip,
}
// Initialize fields.
- INFO("new task #%d: %s", task->tid, name);
+ INFO("new task #%d: %s (%p)", task->tid, name, task);
task->state = TASK_BLOCKED;
+ task->receiving = false;
task->destroyed = false;
task->caps = caps;
task->events = EVENT_NONE;
task->pager = pager;
task->timeout = 0;
task->quantum = 0;
+ task->cpu = task->tid % get_num_cpus();
strncpy(task->name, name, sizeof(task->name));
list_invalidate(&task->runqueue_next);
list_invalidate(&task->sender_next);
@@ -132,26 +132,16 @@ void task_resume(struct task *task) {
task->state = TASK_RUNNABLE;
list_push_back(&runqueue, &task->runqueue_next);
+ // FIXME:
+ mp_reschedule(task->cpu);
}
-// Notifys events to the task.
+// Notifies events to the task.
void task_notify(struct task *task, events_t events) {
task->events |= events;
-
- // Try sending a message to the task if it's waiting for a message. If
- // failed, keep the events in `task->events`.
- if (task->state == TASK_BLOCKED) {
- struct events_msg m;
- m.type = M_EVENTS;
- m.events = task->events;
-
- error_t err = sys_ipc(
- task->tid, 0, (userptr_t) &m, 0,
- IPC_FLAGS(IPC_SEND | IPC_NOBLOCK | IPC_KERNEL, sizeof(m), 0, 0));
- if (err == OK) {
- // Successfully sent the event message. Clear the pending events.
- task->events = EVENT_NONE;
- }
+ if (task->receiving) {
+ task->events |= EVENT_ABORTED;
+ task_resume(task);
}
}
@@ -161,13 +151,14 @@ static struct task *scheduler(struct task *current) {
list_push_back(&runqueue, &current->runqueue_next);
}
- struct list_head *next = list_pop_front(&runqueue);
- if (!next) {
- // No runnable threads. Enter the idle thread.
- return IDLE_TASK;
+ LIST_FOR_EACH(task, &runqueue, struct task, runqueue_next) {
+ if (task->cpu == get_cpu_id()) {
+ list_remove(&task->runqueue_next);
+ return task;
+ }
}
- return LIST_CONTAINER(next, struct task, runqueue_next);
+ return IDLE_TASK;
}
/// Do a context switch: save the current register state on the stack and
@@ -193,16 +184,23 @@ void task_switch(void) {
/// 1/TICK_HZ second.
void handle_timer_interrupt(void) {
// Handle task timeouts.
- for (int i = 0; i < PROCS_MAX; i++) {
- struct task *task = &tasks[i];
- if (task->state == TASK_INACTIVE || !task->timeout) {
- continue;
+ if (is_bsp_cpu()) {
+ for (int i = 0; i < PROCS_MAX; i++) {
+ struct task *task = &tasks[i];
+ if (task->state == TASK_INACTIVE || !task->timeout) {
+ continue;
+ }
+
+ task->timeout--;
+ if (!task->timeout) {
+ task_notify(task, EVENT_TIMER);
+ }
}
+ }
- task->timeout--;
- if (!task->timeout) {
- task_notify(task, EVENT_TIMER);
- }
+ // FIXME:
+ if (CURRENT == IDLE_TASK) {
+ task_switch();
}
// Switch the context if the current task has spend its time slice.
@@ -220,8 +218,4 @@ void task_init(void) {
tasks[i].state = TASK_INACTIVE;
tasks[i].tid = i + 1;
}
-
- IDLE_TASK->tid = 0;
- task_create(IDLE_TASK, "(idle)", 0, 0, CAP_IPC);
- CURRENT = IDLE_TASK;
}
diff --git a/kernel/task.h b/kernel/task.h
index 67720ab..121ccf7 100644
--- a/kernel/task.h
+++ b/kernel/task.h
@@ -6,7 +6,7 @@
#include <types.h>
#include "memory.h"
-#define TASK_TIME_SLICE 20 /* in milliseconds */
+#define TASK_TIME_SLICE 10 /* in milliseconds */
#define PROCS_MAX 32
#define TASK_NAME_MAX 16
@@ -19,11 +19,13 @@ struct task {
struct regs regs;
tid_t tid;
int state;
+ bool receiving;
bool destroyed;
char name[TASK_NAME_MAX];
caps_t caps;
struct vm vm;
tid_t pager;
+ unsigned cpu;
unsigned quantum;
struct message buffer;
@@ -39,11 +41,6 @@ struct task {
struct list_head waiter_next;
};
-extern struct task idle_task;
-extern struct task *current_task;
-#define IDLE_TASK (&idle_task)
-#define CURRENT (current_task)
-
error_t task_create(struct task *task, const char *name, vaddr_t ip,
tid_t pager, caps_t caps);
error_t task_destroy(struct task *task);
@@ -57,8 +54,13 @@ void handle_timer_interrupt(void);
void task_init(void);
// Implemented in arch.
+unsigned get_cpu_id(void);
+unsigned get_num_cpus(void);
+void mp_reschedule(unsigned cpu);
error_t arch_task_create(struct task *task, vaddr_t ip);
void arch_task_destroy(struct task *task);
void arch_task_switch(struct task *prev, struct task *next);
+#include <x64.h> // FIXME:
+
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment