diff options
Diffstat (limited to 'src/arm/PL1')
30 files changed, 2435 insertions, 0 deletions
diff --git a/src/arm/PL1/PL1_common/global.h b/src/arm/PL1/PL1_common/global.h new file mode 100644 index 0000000..4e17b44 --- /dev/null +++ b/src/arm/PL1/PL1_common/global.h @@ -0,0 +1,38 @@ +#ifndef GLOBAL_H +#define GLOBAL_H + +#include <stdint.h> + +// board type, raspi2 +#define RASPI 2 + +// conditionally #define PERIF_BASE +#if RASPI == 4 + +#define PERIF_BASE 0xFE000000 + +#elif RASPI == 3 || RASPI == 2 + +#define PERIF_BASE 0x3F000000 + +#else // if RASPI == 1 + +#define PERIF_BASE 0x20000000 + +#endif + +// GPIO_BASE is #define'd in terms of PERIF_BASE +// (as in sane kernels - like linux, not like in wiki.osdev codes...) +#define GPIO_BASE (PERIF_BASE + 0x200000) + +inline static uint32_t rd32(uint32_t addr) +{ + return *(uint32_t volatile*) addr; +} + +inline static void wr32(uint32_t addr, uint32_t value) +{ + *(uint32_t volatile*) addr = value; +} + +#endif // GLOBAL_H diff --git a/src/arm/PL1/PL1_common/uart.c b/src/arm/PL1/PL1_common/uart.c new file mode 100644 index 0000000..4dd1c2b --- /dev/null +++ b/src/arm/PL1/PL1_common/uart.c @@ -0,0 +1,103 @@ +#include <stddef.h> +#include <stdint.h> +#include "uart.h" +#include "global.h" + +// Loop <delay> times in a way that the compiler won't optimize away +static inline void delay(int32_t count) +{ + asm volatile("__delay_%=: subs %[count], %[count], #1; bne __delay_%=\n" + : "=r"(count): [count]"0"(count) : "cc"); +} + +void uart_init() +{ + // Disable PL011_UART. + wr32(PL011_UART_CR, 0); + + // Setup the GPIO pin 14 && 15. + + // Disable pull up/down for all GPIO pins & delay for 150 cycles. + wr32(GPPUD, 0); + delay(150); + + // Disable pull up/down for pin 14,15 & delay for 150 cycles. + wr32(GPPUDCLK0, (1 << 14) | (1 << 15)); + delay(150); + + // Write 0 to GPPUDCLK0 to make it take effect. + wr32(GPPUDCLK0, 0); + + // Set integer & fractional part of baud rate. + // Divider = UART_CLOCK/(16 * Baud) + // Fraction part register = (Fractional part * 64) + 0.5 + // UART_CLOCK = 3000000; Baud = 115200. + + // Divider = 3000000 / (16 * 115200) = 1.627 = ~1. + wr32(PL011_UART_IBRD, 1); + // Fractional part register = (.627 * 64) + 0.5 = 40.6 = ~40. + wr32(PL011_UART_FBRD, 40); + + // Set 8 bit data transmission (1 stop bit, no parity) + // and disable FIFO to be able to receive interrupt every received + // char, not every 2 chars + wr32(PL011_UART_LCRH, (1 << 5) | (1 << 6)); + + // set interrupt to come when transmit FIFO becomes ≤ 1/8 full + // or receive FIFO becomes ≥ 1/8 full + // (not really matters, since we disabled FIFOs) + wr32(PL011_UART_IFLS, 0); + + // Enable PL011_UART, receive & transfer part of UART.2 + wr32(PL011_UART_CR, (1 << 0) | (1 << 8) | (1 << 9)); + + // At first, it's probably safer to disable interrupts :) + uart_irq_disable(); + + // The above disables the entire uart irq; + // Also disable single sources within it + wr32(PL011_UART_IMSC, 0); +} + +inline static _Bool can_transmit(void) +{ + return !(rd32(PL011_UART_FR) & (1 << 5)); +} + +inline static _Bool can_receive(void) +{ + return !(rd32(PL011_UART_FR) & (1 << 4)); +} + +void putchar(char c) +{ + while (!can_transmit()); + + wr32(PL011_UART_DR, c); +} + +char getchar(void) +{ + while (!can_receive()); + + return rd32(PL011_UART_DR); +} + +_Bool putchar_non_blocking(char c) +{ + if (can_transmit()) + { + wr32(PL011_UART_DR, c); + return 0; + } + + return 1; +} + +int getchar_non_blocking(void) +{ + if (can_receive()) + return rd32(PL011_UART_DR); + + return -1; +} diff --git a/src/arm/PL1/PL1_common/uart.h b/src/arm/PL1/PL1_common/uart.h new file mode 100644 index 0000000..96f3634 --- /dev/null +++ b/src/arm/PL1/PL1_common/uart.h @@ -0,0 +1,106 @@ +#ifndef UART_H +#define UART_H + +#include <stdint.h> +#include "global.h" +#include "interrupts.h" + +// The offsets for reach register. + +// Controls actuation of pull up/down to ALL GPIO pins. +#define GPPUD (GPIO_BASE + 0x94) + +// Controls actuation of pull up/down for specific GPIO pin. +#define GPPUDCLK0 (GPIO_BASE + 0x98) + +// The base address for UART. +#define PL011_UART_BASE (GPIO_BASE + 0x1000) + +// The offsets for reach register for the UART. +#define PL011_UART_DR (PL011_UART_BASE + 0x00) +#define PL011_UART_RSRECR (PL011_UART_BASE + 0x04) +#define PL011_UART_FR (PL011_UART_BASE + 0x18) +#define PL011_UART_ILPR (PL011_UART_BASE + 0x20) +#define PL011_UART_IBRD (PL011_UART_BASE + 0x24) +#define PL011_UART_FBRD (PL011_UART_BASE + 0x28) +#define PL011_UART_LCRH (PL011_UART_BASE + 0x2C) +#define PL011_UART_CR (PL011_UART_BASE + 0x30) +#define PL011_UART_IFLS (PL011_UART_BASE + 0x34) +#define PL011_UART_IMSC (PL011_UART_BASE + 0x38) +#define PL011_UART_RIS (PL011_UART_BASE + 0x3C) +#define PL011_UART_MIS (PL011_UART_BASE + 0x40) +#define PL011_UART_ICR (PL011_UART_BASE + 0x44) +#define PL011_UART_DMACR (PL011_UART_BASE + 0x48) +#define PL011_UART_ITCR (PL011_UART_BASE + 0x80) +#define PL011_UART_ITIP (PL011_UART_BASE + 0x84) +#define PL011_UART_ITOP (PL011_UART_BASE + 0x88) +#define PL011_UART_TDR (PL011_UART_BASE + 0x8C) + +void uart_init(); +void putchar(char c); +char getchar(void); +_Bool putchar_non_blocking(char c); +int getchar_non_blocking(void); + +// TODO experiment to see if this gives us raw uart irq or the uart +// irq bit or'd with it's enable bit (not crucial for now, sice in our +// code this function only gets called when this irq is enabled) +static inline _Bool uart_irq_pending(void) +{ + return + ((uint32_t) 1 << 25) & rd32(ARM_IRQ_PENDING_2); +} + +static inline void uart_irq_disable(void) +{ + // Mask uart in arm peripheral interrupts + wr32(ARM_DISABLE_IRQS_2, ((uint32_t) 1) << 25); +} + +static inline void uart_irq_enable(void) +{ + // Unmask uart in arm peripheral interrupts + wr32(ARM_ENABLE_IRQS_2, ((uint32_t) 1) << 25); +} + +static inline _Bool uart_recv_irq_pending(void) +{ + return (1 << 4) & rd32(PL011_UART_MIS); +} + +static inline void uart_recv_irq_disable(void) +{ + wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) & ~(1 << 4)); +} + +static inline void uart_recv_irq_enable(void) +{ + wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) | (1 << 4)); +} + +static inline void uart_clear_recv_irq(void) +{ + wr32(PL011_UART_ICR, (1 << 4)); +} + +static inline _Bool uart_send_irq_pending(void) +{ + return (1 << 5) & rd32(PL011_UART_MIS); +} + +static inline void uart_send_irq_disable(void) +{ + wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) & ~(1 << 5)); +} + +static inline void uart_send_irq_enable(void) +{ + wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) | (1 << 5)); +} + +static inline void uart_clear_send_irq(void) +{ + wr32(PL011_UART_ICR, (1 << 5)); +} + +#endif // UART_H diff --git a/src/arm/PL1/kernel/armclock.h b/src/arm/PL1/kernel/armclock.h new file mode 100644 index 0000000..0792ad3 --- /dev/null +++ b/src/arm/PL1/kernel/armclock.h @@ -0,0 +1,76 @@ +#ifndef ARMCLOCK_H +#define ARMCLOCK_H + +#include <stdint.h> + +#include "global.h" +#include "interrupts.h" +#define ARMCLK_LOAD (ARM_BASE + 0x400) +#define ARMCLK_VALUE (ARM_BASE + 0x404) +#define ARMCLK_CONTROL (ARM_BASE + 0x408) +#define ARMCLK_IRQ_CLR_ACK (ARM_BASE + 0x40C) +#define ARMCLK_RAW_IRQ (ARM_BASE + 0x410) +#define ARMCLK_MASKED_IRQ (ARM_BASE + 0x414) +#define ARMCLK_RELOAD (ARM_BASE + 0x418) +#define ARMCLK_PRE_DRIVER (ARM_BASE + 0x41C) +#define ARMCLK_FREE_RUNNING_COUNTER (ARM_BASE + 0x420) + +typedef union armclk_control +{ + uint32_t raw; + struct + { + uint32_t one_shot_mode : 1; // bit 0; unused in RPi + uint32_t counter_23bit : 1; // bit 1 + uint32_t pre_scale : 2; // bits 3:2 + uint32_t bit_4 : 1; // bit 4 + uint32_t interrupt_enable : 1; // bit 5 + uint32_t periodic_mode : 1; // bit 6; unused in RPi + uint32_t timer_enable : 1; // bit 7 + uint32_t halt_in_debug : 1; // bit 8 + uint32_t free_running_enable : 1; // bit 9 + uint32_t bits_15_10 : 6; // bits 15:10 + uint32_t free_running_pre_scaler : 8; // bits 23:16 + uint32_t bits_31_24 : 8; // bits 31:24 + } fields; +} armclk_control_t; + +static inline void armclk_init(void) +{ + armclk_control_t ctrl = (armclk_control_t) (uint32_t) 0; + ctrl.fields.timer_enable = 1; + ctrl.fields.interrupt_enable = 1; + ctrl.fields.counter_23bit = 1; + wr32(ARMCLK_CONTROL, ctrl.raw); +} + +static inline void armclk_enable_timer_irq(void) +{ + armclk_control_t ctrl = (armclk_control_t) rd32(ARMCLK_CONTROL); + ctrl.fields.interrupt_enable = 1; + wr32(ARMCLK_CONTROL, ctrl.raw); + + wr32(ARM_ENABLE_BASIC_IRQS, 1); +} + +static inline void armclk_disable_timer_irq(void) +{ + armclk_control_t ctrl = (armclk_control_t) rd32(ARMCLK_CONTROL); + ctrl.fields.interrupt_enable = 0; + wr32(ARMCLK_CONTROL, ctrl.raw); + + wr32(ARM_DISABLE_BASIC_IRQS, 1); +} + +static inline void armclk_irq_settimeout(uint32_t timeout) +{ + wr32(ARMCLK_IRQ_CLR_ACK, 0); + wr32(ARMCLK_LOAD, timeout); +} + +static inline _Bool armclk_irq_pending(void) +{ + return rd32(ARM_IRQ_BASIC_PENDING) & 1; +} + +#endif // ARMCLOCK_H diff --git a/src/arm/PL1/kernel/atags.c b/src/arm/PL1/kernel/atags.c new file mode 100644 index 0000000..e2e6a24 --- /dev/null +++ b/src/arm/PL1/kernel/atags.c @@ -0,0 +1,103 @@ +#include "atags.h" +#include "io.h" + +static inline struct atag_header *next_tag(struct atag_header *tag) +{ + return (struct atag_header*) (((uint32_t*) tag) + tag->size); +} + +#define TAG_CONTENTS_FUN(tagname) \ + static inline struct atag_##tagname *tagname##_tag_contents \ + (struct atag_header *tag) \ + { \ + return (struct atag_##tagname*) (tag + 1); \ + } + +TAG_CONTENTS_FUN(header) +TAG_CONTENTS_FUN(core) +TAG_CONTENTS_FUN(mem) +TAG_CONTENTS_FUN(videotext) +TAG_CONTENTS_FUN(ramdisk) +TAG_CONTENTS_FUN(initrd2) +TAG_CONTENTS_FUN(serialnr) +TAG_CONTENTS_FUN(revision) +TAG_CONTENTS_FUN(videolfb) +TAG_CONTENTS_FUN(cmdline) + +uint32_t find_memory_size(struct atag_header *atags) +{ + // we silently assume there will only be one mem atag + while (atags->tag != ATAG_MEM && atags->tag != ATAG_NONE) + atags = next_tag(atags); + + if (atags->tag == ATAG_NONE) + return 0; + + struct atag_mem *mem_tag = mem_tag_contents(atags); + + // our design assumes address 0x0 is available, so we reject mem + // atag saying otherwise + if (mem_tag->start != 0) + { + puts("ignoring information about memory, " + "that doesn't start at 0x0"); + return 0; + } + + return mem_tag->size; +} + +void print_tag(struct atag_header *tag) +{ +#define TAG_CASE(tagname_upcase, tagname_locase, instructions) \ + case ATAG_##tagname_upcase: \ + puts("ATAG_" #tagname_upcase ":"); \ + { \ + struct atag_##tagname_locase *contents = \ + tagname_locase##_tag_contents(tag); \ + instructions; \ + } \ + break + + switch (tag->tag) + { + TAG_CASE(CORE, core, + prints(" flags: 0x"); + printhex(contents->flags); puts(""); + prints(" page size: "); + printdec(contents->pagesize); puts(""); + prints(" root device: "); + printdec(contents->rootdev); puts("");); + TAG_CASE(MEM, mem, + prints(" memory size: 0x"); + printhex(contents->size); puts(""); + prints(" memory start: 0x"); + printhex(contents->start); puts("");); + // the rest are unimportant for now, + // as they're not passed by qemu + TAG_CASE(VIDEOTEXT, videotext, (void) contents;); + TAG_CASE(RAMDISK, ramdisk, (void) contents;); + TAG_CASE(INITRD2, initrd2, (void) contents;); + TAG_CASE(SERIAL, serialnr, (void) contents;); + TAG_CASE(REVISION, revision, (void) contents;); + TAG_CASE(VIDEOLFB, videolfb, (void) contents;); + TAG_CASE(CMDLINE, cmdline, (void) contents;); + + case ATAG_NONE: + puts("ATAG_NONE"); + break; + default: + prints("!! unknown tag: 0x"); printhex(tag->tag); puts(" !!"); + } +} + +void print_atags(struct atag_header *atags) +{ + while (atags->tag != ATAG_NONE) + { + print_tag(atags); + atags = next_tag(atags); + } + + print_tag(atags); // also print ATAG_NONE +} diff --git a/src/arm/PL1/kernel/atags.h b/src/arm/PL1/kernel/atags.h new file mode 100644 index 0000000..4b6879f --- /dev/null +++ b/src/arm/PL1/kernel/atags.h @@ -0,0 +1,102 @@ +#ifndef ATAGS_H +#define ATAGS_H + +#include <stdint.h> + +#define ATAG_NONE 0x00000000 +#define ATAG_CORE 0x54410001 +#define ATAG_MEM 0x54410002 +#define ATAG_VIDEOTEXT 0x54410003 +#define ATAG_RAMDISK 0x54410004 +#define ATAG_INITRD2 0x54420005 +#define ATAG_SERIAL 0x54410006 +#define ATAG_REVISION 0x54410007 +#define ATAG_VIDEOLFB 0x54410008 +#define ATAG_CMDLINE 0x54410009 + +struct atag_header +{ + uint32_t size; + uint32_t tag; +}; + +struct atag_core +{ + uint32_t flags; + uint32_t pagesize; + uint32_t rootdev; +}; + +struct atag_mem +{ + uint32_t size; + uint32_t start; +}; + +struct atag_videotext +{ + uint8_t x; + uint8_t y; + uint16_t video_page; + uint8_t video_mode; + uint8_t video_cols; + uint16_t video_ega_bx; + uint8_t video_lines; + uint8_t video_isvga; + uint16_t video_points; +}; + +struct atag_ramdisk +{ + uint32_t flags; + uint32_t size; + uint32_t start; +}; + +struct atag_initrd2 +{ + uint32_t start; + uint32_t size; +}; + +struct atag_serialnr +{ + uint32_t low; + uint32_t high; +}; + +struct atag_revision +{ + uint32_t rev; +}; + +struct atag_videolfb +{ + uint16_t lfb_width; + uint16_t lfb_height; + uint16_t lfb_depth; + uint16_t lfb_linelength; + uint32_t lfb_base; + uint32_t lfb_size; + uint8_t red_size; + uint8_t red_pos; + uint8_t green_size; + uint8_t green_pos; + uint8_t blue_size; + uint8_t blue_pos; + uint8_t rsvd_size; + uint8_t rsvd_pos; +}; + +struct atag_cmdline +{ + char cmdline[1]; +}; + +uint32_t find_memory_size(struct atag_header *atags); + +void print_tag(struct atag_header *tag); + +void print_atags(struct atag_header *atags); + +#endif // ATAGS_H diff --git a/src/arm/PL1/kernel/bcmclock.h b/src/arm/PL1/kernel/bcmclock.h new file mode 100644 index 0000000..462346d --- /dev/null +++ b/src/arm/PL1/kernel/bcmclock.h @@ -0,0 +1,35 @@ +#ifndef BCMCLOCK_H +#define BCMCLOCK_H + +#include <stdint.h> +#include "interrupts.h" +#include "global.h" + +#define ST_BASE (PERIF_BASE + 0x3000) // System Timer + +#define ST_CS (ST_BASE + 0x0) // System Timer Control/Status +#define ST_CLO (ST_BASE + 0x4) // System Timer Counter Lower 32 bits +#define ST_CHI (ST_BASE + 0x8) // System Timer Counter Higher 32 bits +#define ST_C0 (ST_BASE + 0xC) // System Timer Compare 0 +#define ST_C1 (ST_BASE + 0x10) // System Timer Compare 1 +#define ST_C2 (ST_BASE + 0x14) // System Timer Compare 2 +#define ST_C3 (ST_BASE + 0x18) // System Timer Compare 3 + +static inline void bcmclk_enable_timer_irq(void) +{ + wr32(ARM_ENABLE_IRQS_1, 1 << 3); +} + +static inline void bcmclk_disable_timer_irq(void) +{ + wr32(ARM_DISABLE_IRQS_1, 1 << 3); +} + +static inline void bcmclk_irq_settimeout(uint32_t timeout) +{ + uint32_t clock_now = rd32(ST_CLO); + wr32(ST_C3, clock_now + timeout); + wr32(ST_CS, 1 << 3); +} + +#endif // BCMCLOCK_H diff --git a/src/arm/PL1/kernel/cp_regs.h b/src/arm/PL1/kernel/cp_regs.h new file mode 100644 index 0000000..e5e7063 --- /dev/null +++ b/src/arm/PL1/kernel/cp_regs.h @@ -0,0 +1,114 @@ +#ifndef CP_REGS_H +#define CP_REGS_H + +#include <stdint.h> + +// SCTLR - System Control Register + +// Wandering why I didn't typedef this struct with fields? +// That's because +typedef union +{ + uint32_t raw; + struct + { + uint32_t M : 1; // bit 0 + uint32_t A : 1; // bit 1 + uint32_t C : 1; // bit 2 + uint32_t Bits_4_3 : 2; // bits 3:4 + uint32_t CP15BEN : 1; // bit 5 + uint32_t Bit_6 : 1; // bit 6 + uint32_t B : 1; // bit 7 + uint32_t Bits_9_8 : 2; // bits 9:8 + uint32_t SW : 1; // bit 10 + uint32_t Z : 1; // bit 11 + uint32_t I : 1; // bit 12 + uint32_t V : 1; // bit 13 + uint32_t RR : 1; // bit 14 + uint32_t Bit_15 : 1; // bit 15 + uint32_t Bit_16 : 1; // bit 16 + uint32_t HA : 1; // bit 17 + uint32_t Bit_18 : 1; // bit 18 + uint32_t WXN : 1; // bit 19 + uint32_t UWXN : 1; // bit 20 + uint32_t FI : 1; // bit 21 + uint32_t U : 1; // bit 22 + uint32_t Bit_23 : 1; // bit 23 + uint32_t VE : 1; // bit 24 + uint32_t EE : 1; // bit 25 + uint32_t Bit_26 : 1; // bit 26 + uint32_t NMFI : 1; // bit 27 + uint32_t TRE : 1; // bit 28 + uint32_t AFE : 1; // bit 29 + uint32_t TE : 1; // bit 30 + uint32_t Bit_31 : 1; // bit 31 + } fields; +} SCTLR_t; + +// DACR - Domain Access Control Register +// DACR holds 16 pairs of bits; each pair represents access +// permissions to a respective memory domain. There's no point +// declaring a union for this. +typedef uint32_t DACR_t; + +inline static uint8_t domain_permissions(DACR_t DACR_contents, + int domain) +{ + return (DACR_contents << (30 - 2 * domain)) >> 30; +} + +inline static DACR_t set_domain_permissions(DACR_t DACR_contents, + int domain, + uint8_t permissions) +{ + uint32_t clear_domain_permissions_mask = ~(0b11 << (2 * domain)); + uint32_t new_domain_permissions_mask = + ((uint32_t) permissions) << (2 * domain); + + return (DACR_contents & clear_domain_permissions_mask) + | new_domain_permissions_mask; +} + +#define DOMAIN_NO_ACCESS 0b00 +#define DOMAIN_CLIENT_ACCESS 0b01 +#define DOMAIN_RESERVED 0b10 +#define DOMAIN_MANAGER_ACCESS 0b11 + +// TTBR - Translation Table Base Register (there're 2 of them with +// (almost) the same structure) + +// A field in TTBCR determines how long the address field is in TTBR0, +// but here we'll ignore this and just assume the greatest possible +// length of this field (18 bits). In TTBR1 it's always 18 bits. +typedef union +{ + uint32_t raw; + struct + { + + uint32_t C : 1; // bit 0 + uint32_t S : 1; // bit 1 + uint32_t IMP : 1; // bit 2 + uint32_t RGN : 2; // bits 4:3 + uint32_t NOS : 1; // bit 5 + uint32_t IRGN_0 : 1; // bit 6 + uint32_t Bits_13_6 : 7; // bits 13:7 + uint32_t Bits_31_14 : 18; // bits 31:14 + // with multiprocessing extensions the cacheable bit becomes + // upper IRGN bit +#define IRGN_1 C + + // i'm not sure 'interprocess region bits' is the right name, + // I'm just guessing (by analogy to RGN -> region bits) +#define TTBR_CACHEABLE_BIT C +#define TTBR_INTERPROCESS_REGION_BITS_1 IRGN_1 +#define TTBR_SHAREABLE_BIT S +#define TTBR_IMPLEMENTATION_DEFINED_BIT IMP +#define TTBR_REGION_BITS_1_0 RGN +#define TTBR_INTERPROCESS_REGION_BITS_0 IRGN_0 +#define TTBR_NON_OUTER_SHAREABLE_BIT NOS +#define TTBR_TRANSLATION_TABLE_BASE_ADDRESS Bits_31_14 + } fields; +} TTBR_t; + +#endif // CP_REGS_H diff --git a/src/arm/PL1/kernel/demo_functionality.c b/src/arm/PL1/kernel/demo_functionality.c new file mode 100644 index 0000000..217a858 --- /dev/null +++ b/src/arm/PL1/kernel/demo_functionality.c @@ -0,0 +1,121 @@ +#include "io.h" +#include "psr.h" +#include "memory.h" +#include "translation_table_descriptors.h" +#include "ramfs.h" +#include "strings.h" +#include "paging.h" +#include "armclock.h" +#include "scheduler.h" + +void demo_paging_support(void) +{ + uint32_t ID_MMFR0; + // get contents of coprocessor register to check for paging support + asm("mrc p15, 0, %0, c0, c1, 4" : "=r" (ID_MMFR0)); + + char *paging; + + switch(ID_MMFR0 & 0xf) /* lowest 4 bits indicate VMSA support */ + { + case 0 : paging = "no paging"; break; + case 1 : paging = "implementation defined paging"; break; + case 2 : paging = "VMSAv6, with cache and TLB type registers"; break; + case 3 : paging = "VMSAv7, with support for remapping and access flag"; break; + case 4 : paging = "VMSAv7 with PXN bit supported"; break; + case 5 : paging = "VMSAv7, PXN and long format descriptors. EPAE is supported."; break; + default : paging = "?_? unknown paging ?_?"; + } + + puts(paging); +} + +void demo_current_mode(void) +{ + + // get content of current program status register to check the current + // processor mode (should be system, as we set it in boot.S) + PSR_t CPSR = read_CPSR(); + + char *mode_name; + + switch(CPSR.fields.PSR_MODE_4_0) + { + case MODE_USER : mode_name = "User (PL0)"; break; + case MODE_FIQ : mode_name = "FIQ (PL1)"; break; + case MODE_IRQ : mode_name = "IRQ (PL1)"; break; + case MODE_SUPERVISOR : mode_name = "Supervisor (PL1)"; break; + case MODE_MONITOR : mode_name = "Monitor (PL1)"; break; + case MODE_ABORT : mode_name = "Abort (PL1)"; break; + case MODE_HYPERVISOR : mode_name = "Hyp (PL2)"; break; + case MODE_UNDEFINED : mode_name = "Undefined (PL1)"; break; + case MODE_SYSTEM : mode_name = "System (PL1)"; break; + default : mode_name = "Unknown mode"; break; + } + + prints("current mode: "); + puts(mode_name); +} + +#define TRANSLATION_TABLE \ + ((short_section_descriptor_t volatile*) TRANSLATION_TABLE_BASE) + +extern char + _binary_ramfs_img_start, + _binary_ramfs_img_end, + _binary_ramfs_img_size; + +void __attribute__((noreturn)) demo_setup_PL0(void) +{ + // find PL_0_test.img im ramfs + struct ramfile PL_0_test_img; + + if (find_file(&_binary_ramfs_img_start, "PL_0_test.img", + &PL_0_test_img)) + { + puts("PL_0_test.img not found :("); + asm volatile ("wfi"); + } + + // dummy value 5 for now, as we haven't implemented processes yet + uint16_t physical_section_number = claim_and_map_section + ((void*) 5, PL0_SECTION_NUMBER, AP_2_0_MODEL_RW_ALL); + + if (physical_section_number == SECTION_NULL) + { + puts("Couldn't claim memory section for unprivileged code :("); + while(1); + } + + size_t physical_section_start = + (((size_t) physical_section_number) << 20); + + // check that translation works... by copying a string using one + // mapping and reading it using other :D + char str_part1[] = "mapped section for PL0 code (0x"; + char str_part2[] = " -> 0x"; + char str_part3[] = ")"; + + char *string_end = (char*) physical_section_start; + + memcpy(string_end, str_part1, sizeof(str_part1) - 1); + uint32_to_hex(VIRTUAL_PL0_MEMORY_START, + string_end += sizeof(str_part1) - 1); + memcpy(string_end += 8, str_part2, sizeof(str_part2) - 1); + uint32_to_hex(physical_section_start, + string_end += sizeof(str_part2) - 1); + memcpy(string_end += 8, str_part3, sizeof(str_part3)); + + puts((char*) VIRTUAL_PL0_MEMORY_START); + + // now paste a userspace program to that section + memcpy((void*) VIRTUAL_PL0_MEMORY_START, + PL_0_test_img.file_contents, PL_0_test_img.file_size); + + puts("copied PL0 code to it's section"); + + puts("All ready! scheduling!"); + + schedule_new(VIRTUAL_PL0_MEMORY_START, // the new pc + VIRTUAL_PL0_MEMORY_END); // the new sp +} diff --git a/src/arm/PL1/kernel/demo_functionality.h b/src/arm/PL1/kernel/demo_functionality.h new file mode 100644 index 0000000..a338c71 --- /dev/null +++ b/src/arm/PL1/kernel/demo_functionality.h @@ -0,0 +1,16 @@ +#ifndef DEMO_FUNCTIONALITY_H +#define DEMO_FUNCTIONALITY_H + +void demo_paging_support(void); + +void demo_current_mode(void); + +//void demo_setup_libkernel(void); + +void demo_setup_PL0(void); + +//void demo_go_unprivileged(void); + +//void demo_setup_interrupts(void); + +#endif // DEMO_FUNCTIONALITY_H diff --git a/src/arm/PL1/kernel/interrupt_vector.S b/src/arm/PL1/kernel/interrupt_vector.S new file mode 100644 index 0000000..1ec80f7 --- /dev/null +++ b/src/arm/PL1/kernel/interrupt_vector.S @@ -0,0 +1,56 @@ +_interrupt_vectors: + b reset_handler_caller + b undef_handler_caller + b svc_handler_caller + b abort_handler_caller + b abort_handler_caller + b generic_handler_caller + b irq_handler_caller + b fiq_handler_caller + +reset_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =reset_handler + bx r5 + +undef_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =undefined_instruction_vector + bx r5 + +svc_handler_caller: + ldr sp, =_supervisor_stack_top + push {r0-r12, lr} + mov r0, sp + ldr r5, =supervisor_call_handler + blx r5 + ldm sp!, {r0-r12, pc} ^ + +abort_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =abort_handler + bx r5 + +generic_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =generic_handler + bx r5 + +irq_handler_caller: + ldr sp, =_irq_stack_top + sub lr, #4 + push {r0-r12, lr} + mov r0, sp + ldr r3, =irq_handler + blx r3 + ldm sp!, {r0-r12, pc} ^ + +fiq_handler_caller: + ldr sp, =_fiq_stack_top + ldr r5, =fiq_handler + bx r5 + +irq: + mov sp, #0x8000 + ldr r5, =abort_handler + subs pc,lr,#4 diff --git a/src/arm/PL1/kernel/interrupts.c b/src/arm/PL1/kernel/interrupts.c new file mode 100644 index 0000000..121d79c --- /dev/null +++ b/src/arm/PL1/kernel/interrupts.c @@ -0,0 +1,135 @@ +#include "io.h" +#include "uart.h" +#include "svc_interface.h" +#include "armclock.h" +#include "scheduler.h" +/** + @brief The undefined instruction interrupt handler +**/ + + +void __attribute__((noreturn)) setup(void); + +// from what I've heard, reset is never used on the Pi; +// in our case it should run once - when stage1 of the kernel +// jumps to stage2 +void reset_handler(void) +{ + setup(); +} + +void undefined_instruction_vector(void) +{ + error("Undefined instruction occured"); +} + +uint32_t supervisor_call_handler(uint32_t regs[14]) +{ + switch(regs[0]) { + case UART_PUTCHAR: + if (putchar_non_blocking(regs[1])) + schedule_wait_for_output(regs, regs[1]); + break; + case UART_GETCHAR: + { + int c; + if ((c = getchar_non_blocking()) == -1) + schedule_wait_for_input(regs); + + regs[0] = c; + break; + } + case UART_WRITE: + error("UART_WRITE not implemented!!!!!"); + break; + default: + // perhaps we should kill the process now? + error("unknown supervisor call type!!!!!"); + } + + return 0; // a dummy value +} + +void abort_handler(void) +{ + // TODO maybe dump registers here? + error("re-entered system due to data/prefetch abort"); +} + +void generic_handler(void) +{ + error("something weird happened"); +} + +void irq_handler(uint32_t regs[14]) +{ + if (armclk_irq_pending()) + { + write_SPSR(PL1_PSR); + asm volatile("mov r0, %[context]\n\r" + "mov lr, %[return_func]\n\r" + "subs pc, lr, #0" :: + [context]"r" (regs), + [return_func]"r" (schedule_save_context) : + "memory"); + } + else if (uart_irq_pending()) + { + if (uart_recv_irq_pending()) + { + uart_clear_recv_irq(); + scheduler_try_input(); + } + if (uart_send_irq_pending()) + { + uart_clear_send_irq(); + scheduler_try_output(); + } + + if (read_SPSR().fields.PSR_MODE_4_0 != MODE_USER) + { + write_SPSR(PL1_PSR); + asm volatile("mov lr, %0\n\r" + "subs pc, lr, #0" :: + "r" (schedule) : "memory"); + } + } + else + error("unknown irq"); + + // important - don't allow this handler to return if irq came from + // PL1 (likely supervisor, because we don't really use system) mode +} + +void fiq_handler(void) +{ + error("fiq happened"); +} + + +/* Here is your interrupt function */ +//void +//__attribute__((interrupt("IRQ"))) +//__attribute__((section(".interrupt_vectors.text"))) +//irq_handler2(void) { +// /* You code goes here */ +//// uart_puts("GOT INTERRUPT!\r\n"); +// +// local_timer_clr_reload_reg_t temp = { .IntClear = 1, .Reload = 1 }; +// QA7->TimerClearReload = temp; // Clear interrupt & reload +//} + +///* here is your main */ +//int enable_timer(void) { +// +// QA7->TimerRouting.Routing = LOCALTIMER_TO_CORE0_IRQ; // Route local timer IRQ to Core0 +// QA7->TimerControlStatus.ReloadValue = 100; // Timer period set +// QA7->TimerControlStatus.TimerEnable = 1; // Timer enabled +// QA7->TimerControlStatus.IntEnable = 1; // Timer IRQ enabled +// QA7->TimerClearReload.IntClear = 1; // Clear interrupt +// QA7->TimerClearReload.Reload = 1; // Reload now +// QA7->Core0TimerIntControl.nCNTPNSIRQ_IRQ = 1; // We are in NS EL1 so enable IRQ to core0 that level +// QA7->Core0TimerIntControl.nCNTPNSIRQ_FIQ = 0; // Make sure FIQ is zero +//// uart_puts("Enabled Timer\r\n"); +// return(0); +//}
\ No newline at end of file diff --git a/src/arm/PL1/kernel/interrupts.h b/src/arm/PL1/kernel/interrupts.h new file mode 100644 index 0000000..c2818ee --- /dev/null +++ b/src/arm/PL1/kernel/interrupts.h @@ -0,0 +1,47 @@ +#ifndef RPI_MMU_EXAMPLE_INTERRUPTS_H +#define RPI_MMU_EXAMPLE_INTERRUPTS_H + +#include <stdint.h> + +// ARM control block +// called "base address for the ARM interrupt register" elsewhere +#define ARM_BASE (PERIF_BASE + 0xB000) +#define ARM_IRQ_BASIC_PENDING (ARM_BASE + 0x200) +#define ARM_IRQ_PENDING_1 (ARM_BASE + 0x204) +#define ARM_IRQ_PENDING_2 (ARM_BASE + 0x208) +#define ARM_FIQ_CONTROL (ARM_BASE + 0x20C) +#define ARM_ENABLE_IRQS_1 (ARM_BASE + 0x210) +#define ARM_ENABLE_IRQS_2 (ARM_BASE + 0x214) +#define ARM_ENABLE_BASIC_IRQS (ARM_BASE + 0x218) +#define ARM_DISABLE_IRQS_1 (ARM_BASE + 0x21C) +#define ARM_DISABLE_IRQS_2 (ARM_BASE + 0x220) +#define ARM_DISABLE_BASIC_IRQS (ARM_BASE + 0x224) +//offset of peripherals+ offset for first addresable register for interupt controller +#define RPI_INTERRUPT_CONTROLLER_BASE ( 0x3F000000UL + 0xB200 ) +// Bits in the Enable_Basic_IRQs register to enable various interrupts. +// According to the BCM2835 ARM Peripherals manual, section 7.5 */ +#define RPI_BASIC_ARM_TIMER_IRQ (1 << 0) +#define RPI_BASIC_ARM_MAILBOX_IRQ (1 << 1) +#define RPI_BASIC_ARM_DOORBELL_0_IRQ (1 << 2) +#define RPI_BASIC_ARM_DOORBELL_1_IRQ (1 << 3) +#define RPI_BASIC_GPU_0_HALTED_IRQ (1 << 4) +#define RPI_BASIC_GPU_1_HALTED_IRQ (1 << 5) +#define RPI_BASIC_ACCESS_ERROR_1_IRQ (1 << 6) +#define RPI_BASIC_ACCESS_ERROR_0_IRQ (1 << 7) + +// @brief The interrupt controller memory mapped register set +typedef struct { + volatile uint32_t IRQ_basic_pending; + volatile uint32_t IRQ_pending_1; + volatile uint32_t IRQ_pending_2; + volatile uint32_t FIQ_control; + volatile uint32_t Enable_IRQs_1; + volatile uint32_t Enable_IRQs_2; + volatile uint32_t Enable_Basic_IRQs; + volatile uint32_t Disable_IRQs_1; + volatile uint32_t Disable_IRQs_2; + volatile uint32_t Disable_Basic_IRQs; +} rpi_irq_controller_t; + +extern rpi_irq_controller_t* RPI_GetIrqController(void); +#endif //RPI_MMU_EXAMPLE_INTERRUPTS_H diff --git a/src/arm/PL1/kernel/kernel_stage1.S b/src/arm/PL1/kernel/kernel_stage1.S new file mode 100644 index 0000000..e770513 --- /dev/null +++ b/src/arm/PL1/kernel/kernel_stage1.S @@ -0,0 +1,168 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the kernel. + * + * Kernel copies it's embedded stage 2 to address 0x0 and jumps to + * it (to the reset handler). Registers r0 - r2 are arguments for + * the kernel, but we're not using them for now. + * + * This file is based on (and almost identical with) loader_stage1.S + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still + // occur when the main core overwrites this part of memory + wfe + + // we'll use the size of stage1 to determine where we have free + // space after it. We'll then copy our atags/fdt there, so + // it doesn't get overwritten by stage2 we deploy at 0x0 +atags_magic: + .word 0x54410001 + +proceed: + // load the second word of structure passed to us through r2; + // if it's atags, it's second word should be the magic number + // Btw, location of ATAGS is always 0x100. + ldr r3, [r2, #4] + adr r4, atags_magic + ldr r4, [r4] + + // compare second word of assumed atags with magic number + // to see, if it's really atags and not sth else (i.e. fdt) + cmp r3, r4 + + // normally at start r0 contains value 0; + // value 3 in r0 would tell stage2 code, we found no atags :( + movne r0, #3 + bne stage2_blob_copying + + // if atags was found, copying of it takes place here + + // the following loop finds, where atags ends + // r3 shall point to currently looked-at tag + mov r3, r2 + +find_end_of_atags_loop: + // load first word of tag header to r4 (it contains tag size) + ldr r4, [r3] + // make r3 point at the next tag (by adding 4*tag_size to it) + add r3, r4, lsl #2 + + // load second word of tag header to r5 (it contains tag type) + ldr r5, [r3, #4] + + // if tag value is 0, it is the last tag + cmp r5, #0 + bne find_end_of_atags_loop + + add r3, #8 // make r3 point at the end of last tag + sub r3, r2 // get atags size in r3 + + // at this pont r2 and r3 point at start and size of atags, + // respectively; now we'll compute, where we're going to have + // free space to put atags in; we want to put atags either + // right after our blob or, if if it doesn't fit between + // blob end and the address stage1 is loaded at, after stage1 + + // get blob size to r5 + adr r5, blob_size + ldr r5, [r5] + + // we could only copy atags to a 4-aligned address + mov r6, #4 + bl aling_r5_to_r6 + + // compute where atags copied right after blob would end + add r6, r5, r3 + // we can only overwrite stuff before the copying loop + adr r7, copy_atags_loop + cmp r6, r7 + ble copy_atags + + // atags wouldn't fit - use memory after stage1 as destination + adr r5, _boot + adr r6, stage1_size + ldr r6, [r6] + add r5, r6 + mov r6, #4 + bl aling_r5_to_r6 + +copy_atags: + // now copy atags (r2 - atags start; r3 - atags size; + // r5 - destination; r4 - iterator; r6 - buffor) + mov r4, #0 + +copy_atags_loop: + ldr r6, [r2, r4] + str r6, [r5, r4] + add r4, #4 + cmp r4, r3 + blo copy_atags_loop + + mov r2, r5 // place the new atags address in r2 + b stage2_blob_copying // atags stuff done; proceed + +// mini-function, that does what the label says; clobbers r7 +aling_r5_to_r6: + sub r5, #1 + sub r7, r6, #1 + bic r5, r7 + add r5, r6 + mov pc, lr + + +stage2_blob_copying: // copy stage2 of the kernel to address 0x0 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0 + + // load blob size to r5 + // The size might get too big for an immediate value, so + // we load it from memory. + adr r5, blob_size + ldr r5, [r5] + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // This initial piece of code might get overwritten when we + // copy stage2, so the actual copying loop shall be after + // stage2 blob. We want this asm code to be PIC, so we're + // computing address of stage2_end into r7. + add r7, r3, r5 + bx r7 + +blob_size: + .word stage2_end - stage2_start +stage1_size: + .word stage1_end - _boot + +.align 4 +stage2_start: + .incbin "kernel_stage2.img" +stage2_end: + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Call stage2 of the kernel (branch to 0x0, + // which is the reset handler). + bx r4 + +stage1_end: diff --git a/src/arm/PL1/kernel/kernel_stage1.ld b/src/arm/PL1/kernel/kernel_stage1.ld new file mode 100644 index 0000000..3130634 --- /dev/null +++ b/src/arm/PL1/kernel/kernel_stage1.ld @@ -0,0 +1,27 @@ +ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */ + +/* Code starts at 0x8000 - that's where RPis in 32-bit mode load + * kernel at. My experiments do, however, show, that qemu emulating + * RPi2 loads the kernel at 0x10000! (took some pain to find out). + * rpi-open-firmware, on the other hand, loads kernel at 0x2000000! + * This is not really a problem, since: + * 1. We can use our bootloader to load the kernel at 0x8000 + * 2. We've rewritten stage 1 of both bootloader and kernel in + * careful assembly, so that they should work regardless of + * where they are loaded. + * 3. In qemu, we can load kernel.elf instead of raw binary + * (qemu will do the right thing then) + */ + +SECTIONS +{ + + . = 0x8000; + + __start = .; + .kernel_stage1 : + { + KEEP(kernel_stage1.o) + } + __end = .; +} diff --git a/src/arm/PL1/kernel/kernel_stage2.ld b/src/arm/PL1/kernel/kernel_stage2.ld new file mode 100644 index 0000000..9411ca2 --- /dev/null +++ b/src/arm/PL1/kernel/kernel_stage2.ld @@ -0,0 +1,80 @@ +/* This sesond stage of the kernel is run from address 0x0 */ + +TRANSLATION_TABLE_SIZE = 4096 * 4; +SECTIONS_LIST_SIZE = 4096 * 8; +MMU_SECTION_SIZE = 1 << 20; + +SECTIONS +{ + + . = 0x0; + + __start = .; + .interrupt_vector : + { + KEEP(interrupt_vector.o) + } + . = ALIGN(4); + .embedded_ramfs : + { + ramfs_embeddable.o + } + .rest_of_kernel : + { + *(.text) + *(.data) + *(.rodata) + *(.bss) + *(/COMMON/) + *(*) + } + __end = .; + + . = ALIGN(1 << 14); + + .translation_table (NOLOAD) : + { + _translation_table_start = .; + + . = . + TRANSLATION_TABLE_SIZE; + + _translation_table_end = .; + } + + .sections_list (NOLOAD) : + { + _sections_list_start = .; + + . = . + SECTIONS_LIST_SIZE; + + _sections_list_end = .; + } + + . = ALIGN(1 << 20); + . = . + MMU_SECTION_SIZE; + + .stack (NOLOAD) : + { + _stack_start = .; + + _fiq_stack_start = .; + + . = . + (1 << 18); + + _fiq_stack_top = .; + + _irq_stack_start = .; + + . = . + (1 << 18); + + _irq_stack_top = .; + + _supervisor_stack_start = .; + + . = . + (1 << 19); + + _supervisor_stack_top = .; + + _stack_end = .; + } +} diff --git a/src/arm/PL1/kernel/memory.h b/src/arm/PL1/kernel/memory.h new file mode 100644 index 0000000..bdeba52 --- /dev/null +++ b/src/arm/PL1/kernel/memory.h @@ -0,0 +1,72 @@ +#ifndef MEMORY_H +#define MEMORY_H + +#include <stddef.h> + +// These macros were heavily used b4 I moved all the address +// computation to the linker script. Now I'm just keeping them +// in case they're needed for something else :) +#define POWER_OF_2(EXP) (((size_t) 1) << EXP) + +#define ALIGN_POWER_OF_2(ADDR, EXP) \ + (((ADDR - 1) & ~(POWER_OF_2(EXP) - 1)) + POWER_OF_2(EXP)) + +#define SECTION_SIZE POWER_OF_2(20) + +#define ALIGN_SECTION(ADDR) ALIGN_POWER_OF_2(ADDR, 20) + + +// memory layout + +#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0) + +// all those symbols are defined in the linker script +extern char __end; +extern char __start; +extern char _translation_table_start; +extern char _translation_table_end; +extern char _sections_list_start; +extern char _sections_list_end; +extern char _stack_start; +extern char _fiq_stack_start; +extern char _fiq_stack_top; +extern char _irq_stack_start; +extern char _irq_stack_top; +extern char _supervisor_stack_start; +extern char _supervisor_stack_top; +extern char _stack_end; + +#define KERNEL_START ((size_t) &__start) // this is 0x0 +#define KERNEL_END ((size_t) &__end) + +// first 2^14 aligned address after the kernel +#define TRANSLATION_TABLE_BASE ((size_t) &_translation_table_start) +#define TRANSLATION_TABLE_END ((size_t) &_translation_table_end) + +// another 32KB after the translation table are used for sections list +#define SECTIONS_LIST_START ((size_t) &_sections_list_start) +#define SECTIONS_LIST_END ((size_t) &_sections_list_end) + +// first section after the translation table is left unused; +// the next section is used as the stack +#define STACK_START ((size_t) &_stack_start) +#define FIQ_STACK_START ((size_t) &_fiq_stack_start) +#define FIQ_STACK_END ((size_t) &_fiq_stack_top) +#define IRQ_STACK_START ((size_t) &_irq_stack_start) +#define IRQ_STACK_END ((size_t) &_irq_stack_top) +#define SUPERVISOR_STACK_START ((size_t) &_supervisor_stack_start) +#define SUPERVISOR_STACK_END ((size_t) &_supervisor_stack_top) +#define STACK_END ((size_t) &_stack_end) + +#define PRIVILEGED_MEMORY_END STACK_END + + +// the following describes the virtual section for our PL0 programs +#define PL0_SECTION_NUMBER ((size_t) 0xaaa) + +#define VIRTUAL_PL0_MEMORY_START (PL0_SECTION_NUMBER << 20) +#define VIRTUAL_PL0_MEMORY_END \ + (VIRTUAL_PL0_MEMORY_START + SECTION_SIZE) + +#endif // MEMORY_H + diff --git a/src/arm/PL1/kernel/paging.c b/src/arm/PL1/kernel/paging.c new file mode 100644 index 0000000..771c681 --- /dev/null +++ b/src/arm/PL1/kernel/paging.c @@ -0,0 +1,249 @@ +#include "cp_regs.h" +#include "strings.h" +#include "memory.h" +#include "translation_table_descriptors.h" +#include "io.h" + +#include "paging.h" + +void setup_flat_map(void) +{ + // compute translation table base address + // translation table shall start at first 2^14-bytes aligned + // address after the kernel image + + prints("chosen lvl1 translation table address: 0x"); + printhex(TRANSLATION_TABLE_BASE); + puts(""); + + // flat map all memory + puts("preparing translation table"); + short_descriptor_lvl1_t volatile *translation_table = + (short_descriptor_lvl1_t*) TRANSLATION_TABLE_BASE; + + for (uint32_t i = 0; i < 4096; i++) + translation_table[i].section_fields = + (short_section_descriptor_t) { + .SECTION_BASE_ADDRESS_31_20 = i, + .SECTION_OR_SUPERSECTION_BIT = DESCRIBES_SECTION, + .ACCESS_PERMISSIONS_2 = AP_2_0_MODEL_RW_PL1 >> 2, + .ACCESS_PERMISSIONS_1_0 = AP_2_0_MODEL_RW_PL1 & 0b011, + .DESCRIPTOR_TYPE_1 = + SHORT_DESCRIPTOR_SECTION_OR_SUPERSECTION >> 1, + // rest of fields are 0s + }; + + // meddle with domain settings + puts("setting domain0 to client access and blocking other domains"); + + DACR_t DACR = 0; + DACR = set_domain_permissions(DACR, 0, DOMAIN_CLIENT_ACCESS); + for (int i = 1; i < 16; i++) + DACR = set_domain_permissions(DACR, i, DOMAIN_NO_ACCESS); + + // the above should do the same as this: + // DACR = 1; + + asm("mcr p15, 0, %0, c3, c0, 0" :: "r" (DACR)); + + // meddle with SCTLR, which determines how some bits in + // table descriptors work and also controls caches + // we don't want to use access flag, so we set AFE to 0 + // we don't want TEX remap, so we set TRE to 0 + // we also disable data and instruction caches and the MMU + + // some of this is redundant (i.e. MMU should already be disabled) + puts("setting C, I, AFE and TRE to 0 in SCTLR"); + + SCTLR_t SCTLR; + asm("mrc p15, 0, %0, c1, c0, 0" : "=r" (SCTLR.raw)); + + SCTLR.fields.M = 0; // disable MMU + SCTLR.fields.C = 0; // disable data cache + SCTLR.fields.I = 0; // disable instruction cache + SCTLR.fields.TRE = 0; // disable TEX remap + SCTLR.fields.AFE = 0; // disable access flag usage + asm("mcr p15, 0, %0, c1, c0, 0\n\r" + "isb" :: "r" (SCTLR.raw) : "memory"); + + // TODO: move invalidation instructions to some header as inlines + + puts("invalidating instruction cache, branch prediction," + " and entire main TLB"); + + // invalidate instruction cache + asm("mcr p15, 0, r0, c7, c5, 0\n\r" // r0 gets ignored + "isb" ::: "memory"); + + // invalidate branch-prediction + asm("mcr p15, 0, r0, c7, c5, 6\n\r" // r0 - same as above + "isb" ::: "memory"); + + // invalidate main Translation Lookup Buffer + asm("mcr p15, 0, %0, c8, c7, 0\n\r" + "isb" :: "r" (0) : "memory"); + + // now set TTBCR to use TTBR0 exclusively + puts("Setting TTBCR.N to 0, so that TTBR0 is used everywhere"); + + uint32_t TTBCR = 0; + asm("mcr p15, 0, %0, c2, c0, 2" :: "r" (TTBCR)); + + // Now do stuff with TTBR0 + TTBR_t TTBR0; + TTBR0.raw = 0; + TTBR0.fields.TTBR_TRANSLATION_TABLE_BASE_ADDRESS = + TRANSLATION_TABLE_BASE >> 14; + // rest of TTBR0 remains 0s + + asm("mcr p15, 0, %0, c2, c0, 0" :: "r" (TTBR0.raw)); + + // enable MMU + puts("enabling the MMU"); + + // redundant - we already have SCTLR contents in the variable + // asm("mrc p15, 0, %0, c1, c0, 0" : "=r" (SCTLR.raw)); + + SCTLR.fields.M = 1; + + asm("mcr p15, 0, %0, c1, c0, 0\n\r" + "isb" :: "r" (SCTLR.raw) : "memory"); +} + +#define OWNER_FREE ((void*) 0) +#define OWNER_KERNEL ((void*) 1) +#define OWNER_SPLIT ((void*) 2) + +// we want to maintain a list of free and used physical sections +struct section_node +{ + // we're going to add processes, process management and + // struct process. Then, owner will be struct process*. + void *owner; // 0 if free, 1 if used by kernel, 2 if split to pages + + // it's actually a 2-directional lists; + // end of list is marked by reference to SECTION_NULL; + // we use offsets into sections_list array instead of pointers; + uint16_t prev, next; +}; + +static struct section_node volatile *sections_list; + +static uint16_t + all_sections_count, kernel_sections_count, + split_sections_count, free_sections_count; + +// those are SECTION_NULL when the corresponding count is 0; +static uint16_t + first_free_section, first_kernel_section, first_split_section; + +void setup_pager_structures(uint32_t available_mem) +{ + all_sections_count = available_mem / SECTION_SIZE; + kernel_sections_count = PRIVILEGED_MEMORY_END / SECTION_SIZE; + free_sections_count = all_sections_count - kernel_sections_count; + split_sections_count = 0; + + sections_list = (struct section_node*) SECTIONS_LIST_START; + + first_split_section = SECTION_NULL; + + for (uint16_t i = 0; i < kernel_sections_count; i++) + sections_list[i] = (struct section_node) { + .owner = OWNER_KERNEL, + .prev = i == 0 ? SECTION_NULL : i - 1, + .next = i == kernel_sections_count - 1 ? SECTION_NULL : i + 1 + }; + + first_kernel_section = 0; + + for (uint16_t i = kernel_sections_count; + i < all_sections_count; i++) + sections_list[i] = (struct section_node) { + .owner = OWNER_FREE, + .prev = i == kernel_sections_count ? SECTION_NULL : i - 1, + .next = i == all_sections_count - 1 ? SECTION_NULL : i + 1 + }; + + first_free_section = kernel_sections_count; + + puts("Initialized kernel's internal structures for paging"); + prints("We have "); printdect(free_sections_count); + puts(" free sections left for use"); +} + +// return section number or SECTION_NULL in case of failure +static uint16_t claim_section(void *owner) +{ + if (!free_sections_count) + return SECTION_NULL; // failure + + uint16_t section = first_free_section; + + if (--free_sections_count) + { + uint16_t next; + + next = sections_list[section].next; + sections_list[next].prev = SECTION_NULL; + + first_free_section = next; + } + else + first_free_section = SECTION_NULL; + + if (owner == OWNER_KERNEL) + { + sections_list[first_kernel_section].prev = section; + + sections_list[section] = (struct section_node) { + .owner = owner, + .prev = SECTION_NULL, + .next = first_kernel_section + }; + + kernel_sections_count++; + + first_kernel_section = section; + } + else + sections_list[section] = (struct section_node) { + .owner = owner, + .prev = SECTION_NULL, + .next = SECTION_NULL + }; + + return section; +} + +// return values like claim_section() +uint16_t claim_and_map_section +(void *owner, uint16_t where_to_map, uint8_t access_permissions) +{ + uint16_t section = claim_section(owner); + + if (section == SECTION_NULL) + return section; + + short_section_descriptor_t volatile *section_entry = + &((short_section_descriptor_t*) + TRANSLATION_TABLE_BASE)[where_to_map]; + + short_section_descriptor_t descriptor = *section_entry; + + // set up address of section + descriptor.SECTION_BASE_ADDRESS_31_20 = section; + + // set requested permissions on section + descriptor.ACCESS_PERMISSIONS_2 = access_permissions >> 2; + descriptor.ACCESS_PERMISSIONS_1_0 = access_permissions & 0b011; + + // write modified descriptor to the table + *section_entry = descriptor; + + // invalidate main Translation Lookup Buffer + asm("mcr p15, 0, r1, c8, c7, 0\n\r" + "isb" ::: "memory"); + + return section; +} diff --git a/src/arm/PL1/kernel/paging.h b/src/arm/PL1/kernel/paging.h new file mode 100644 index 0000000..4ac8efa --- /dev/null +++ b/src/arm/PL1/kernel/paging.h @@ -0,0 +1,14 @@ +#ifndef PAGING_H +#define PAGING_H + +void setup_flat_map(void); + +void setup_pager_structures(uint32_t available_mem); + +#define SECTION_NULL 0xffff + +// returns section number or SECTION_NULL in case of failure +uint16_t claim_and_map_section +(void *owner, uint16_t where_to_map, uint8_t access_permissions); + +#endif // PAGING_H diff --git a/src/arm/PL1/kernel/psr.h b/src/arm/PL1/kernel/psr.h new file mode 100644 index 0000000..f300a7a --- /dev/null +++ b/src/arm/PL1/kernel/psr.h @@ -0,0 +1,88 @@ +#ifndef PSR_H +#define PSR_H + +#include <stdint.h> + +enum execution_mode { + MODE_USER = 0b10000, + MODE_FIQ = 0b10001, + MODE_IRQ = 0b10010, + MODE_SUPERVISOR = 0b10011, + MODE_MONITOR = 0b10110, + MODE_ABORT = 0b10111, + MODE_HYPERVISOR = 0b11010, + MODE_UNDEFINED = 0b11011, + MODE_SYSTEM = 0b11111, +}; + +typedef union +{ + uint32_t raw; + struct + { + uint32_t M_4_0 : 5; // bits 4:0 + uint32_t T : 1; // bit 5 + uint32_t F : 1; // bit 6 + uint32_t I : 1; // bit 7 + uint32_t A : 1; // bit 8 + uint32_t E : 1; // bit 9 + uint32_t IT_7_2 : 6; // bits 15:10 + uint32_t GE_3_0 : 4; // bits 19:16 + uint32_t Bits_23_20 : 4; // bits 23:20 + uint32_t J : 1; // bit 24 + uint32_t IT_1_0 : 2; // bits 26:25 + uint32_t Q : 1; // bit 27 + uint32_t V : 1; // bit 28 + uint32_t C : 1; // bit 29 + uint32_t Z : 1; // bit 30 + uint32_t N : 1; // bit 31 +#define PSR_MODE_4_0 M_4_0 +#define PSR_THUMB_BIT T +#define PSR_FIQ_MASKK_BIT F +#define PSR_IRQ_MASK_BIT I +#define PSR_ASYNC_ABORT_MASK_BIT A +#define PSR_ENDIANNESS_BIT E +#define PSR_IF_THEN_STATE_7_2 IT_7_2 +#define PSR_GREATER_THAN_OR_EQUAL_FLAGS GE_3_0 + // bits 23:20 are reserved +#define PSR_JAZELLE_BIT J +#define PSR_IF_THEN_STATE_1_0 IT_1_0 +#define PSR_CUMULATIVE_SATURATION_BIT Q +#define PSR_OVERFLOW_CONDITION_BIT V +#define PSR_CARRY_CONDITION_BIT C +#define PSR_ZERO_CONDITION_BIT Z +#define PSR_NEGATIVE_CONDITION_BIT N + } fields; +} PSR_t; + +inline static PSR_t read_CPSR(void) +{ + PSR_t CPSR; + // get content of current program status register + asm("mrs %0, cpsr" : "=r" (CPSR.raw) :: "memory"); + + return CPSR; +} + +inline static void write_CPSR(PSR_t CPSR) +{ + // set content of current program status register + asm("msr cpsr, %0" :: "r" (CPSR.raw) : "memory"); +} + +inline static PSR_t read_SPSR(void) +{ + PSR_t SPSR; + // get content of saved program status register + asm("mrs %0, spsr" : "=r" (SPSR.raw) :: "memory"); + + return SPSR; +} + +inline static void write_SPSR(PSR_t SPSR) +{ + // set content of saved program status register + asm("msr spsr, %0" :: "r" (SPSR.raw)); +} + +#endif // PSR_H diff --git a/src/arm/PL1/kernel/ramfs.c b/src/arm/PL1/kernel/ramfs.c new file mode 100644 index 0000000..cc66b4c --- /dev/null +++ b/src/arm/PL1/kernel/ramfs.c @@ -0,0 +1,65 @@ +// driver for the read-only ramfs +// see makefs.c for details + +#include <stdint.h> +#include "ramfs.h" + +static int strcmp(char const *str1, char const *str2) +{ + while (1) + { + int c1 = (unsigned char) *str1, c2 = (unsigned char) *str2; + + if (!c1 && !c2) + return 0; + + if (c1 != c2) + return c1 - c2; + + str1++; str2++; + } +} + +static uint32_t strlen(char const *str1) +{ + uint32_t len = 0; + + while (str1[len]) + len++; + + return len; +} + +static inline char *align4(char *addr) +{ + return (char*) (((uint32_t) addr - 1) & ~0b11) + 4; +} + +int find_file(void *ramfs, char *filename, struct ramfile *buf) +{ + char *fs_file = ramfs; + + while (*fs_file) + { + uint32_t *fs_file_size = (uint32_t*) + align4(fs_file + strlen(fs_file) + 1); + + char *fs_file_contents = (char*) (fs_file_size + 1); + + if (!strcmp(fs_file, filename)) + { + buf->file_size = *fs_file_size; + + buf->file_name = fs_file; + + buf->file_contents = fs_file_contents; + + return 0; + } + + // move to the next file in ramfs + fs_file = align4(fs_file_contents + *fs_file_size); + } + + return -1; // reached end of ramfs; file not found +} diff --git a/src/arm/PL1/kernel/ramfs.h b/src/arm/PL1/kernel/ramfs.h new file mode 100644 index 0000000..cf45736 --- /dev/null +++ b/src/arm/PL1/kernel/ramfs.h @@ -0,0 +1,16 @@ +#ifndef RAMFS_H +#define RAMFS_H + +struct ramfile +{ + char *file_name; + uint32_t file_size; + char *file_contents; +}; + +// search for file named filename in ramfs; +// If found - return 0 and fill buf fields with file's info. +// Otherwise return a non-zero value. +int find_file(void *ramfs, char *filename, struct ramfile *buf); + +#endif // RAMFS_H diff --git a/src/arm/PL1/kernel/scheduler.c b/src/arm/PL1/kernel/scheduler.c new file mode 100644 index 0000000..141ba1d --- /dev/null +++ b/src/arm/PL1/kernel/scheduler.c @@ -0,0 +1,156 @@ +#include "scheduler.h" +#include "uart.h" +#include "strings.h" +#include "armclock.h" +#include "memory.h" +#include "io.h" + +// for now we only have 1 process in "queue" +// later there is going to be an actual queue +uint32_t PL0_regs[14] = {0}; // contains r0-r12, pc +uint32_t PL0_sp; +uint32_t PL0_lr; + +PSR_t PL0_PSR; // to be put into spsr when jumping to user mode + +PSR_t PL1_PSR; + +// when set, it means process used GETCHAR system call and once we get +// a char, we have to return it +_Bool waiting_for_input = 0; + +// when set, it means process used PUTCHAR system call and once we +// manage to put the char, we can return to process +_Bool waiting_for_output = 0; +char waiting_output; + +// 0 is kernel code in system mode is being run +// 1 if our process is being run +// later when we have many processes and this will hold process id +uint32_t current_process; + +void setup_scheduler_structures(void) +{ + PL1_PSR = read_CPSR(); +} + +void scheduler_try_output(void) +{ + if (waiting_for_output) + if (!putchar_non_blocking(waiting_output)) + { + waiting_for_output = 0; + uart_send_irq_disable(); + } +} + +void scheduler_try_input(void) +{ + if (waiting_for_input) + if ((PL0_regs[0] = getchar_non_blocking()) != (uint32_t) (-1)) + { + waiting_for_input = 0; + uart_recv_irq_disable(); + } +} + +void __attribute__((noreturn)) +schedule_new(uint32_t pc, uint32_t sp) +{ + PL0_regs[13] = pc; + PL0_sp = sp; + PL0_lr = 0; + + PL0_PSR = read_CPSR(); + PL0_PSR.fields.PSR_MODE_4_0 = MODE_USER; + PL0_PSR.fields.PSR_IRQ_MASK_BIT = 0; + + schedule(); +} + +void __attribute__((noreturn)) +schedule_wait_for_output(uint32_t regs[14], char c) +{ + if (current_process == 0) + error("SYSTEM tried waiting for output!"); + + waiting_for_output = 1; + waiting_output = c; + uart_send_irq_enable(); + + schedule_save_context(regs); +} + +void __attribute__((noreturn)) +schedule_wait_for_input(uint32_t regs[14]) +{ + if (current_process == 0) + error("SYSTEM tried waiting for input!"); + + waiting_for_input = 1; + uart_recv_irq_enable(); + + schedule_save_context(regs); +} + +void __attribute__((noreturn)) +schedule_save_context(uint32_t regs[14]) +{ + memcpy(PL0_regs, regs, sizeof(PL0_regs)); + + PL0_PSR = read_SPSR(); + + asm volatile("cps %[sysmode]\n\r" + "isb\n\r" + "mov %[sp_transfer], sp\n\r" + "mov %[lr_transfer], lr\n\r" + "cps %[supmode]\n\r" + "isb\n\r" : + [sp_transfer]"=r" (PL0_sp), + [lr_transfer]"=r" (PL0_lr): + [sysmode]"I" (MODE_SYSTEM), + [supmode]"I" (MODE_SUPERVISOR) : "memory"); + + schedule(); +} + +void __attribute__((noreturn)) schedule(void) +{ + current_process = 0; + armclk_disable_timer_irq(); + + if (waiting_for_input || waiting_for_output) + { + PSR_t new_CPSR = PL1_PSR; + new_CPSR.fields.PSR_IRQ_MASK_BIT = 0; + + write_CPSR(new_CPSR); + + asm volatile("wfi"); + + __builtin_unreachable(); + } + + current_process = 1; + + asm volatile("cps %[sysmode]\n\r" + "isb\n\r" + "mov sp, %[stackaddr]\n\r" + "mov lr, %[linkaddr]\n\r" + "cps %[supmode]\n\r" + "isb" :: + [sysmode]"I" (MODE_SYSTEM), + [supmode]"I" (MODE_SUPERVISOR), + [stackaddr]"r" (PL0_sp), + [linkaddr]"r" (PL0_lr) : "memory"); + + armclk_irq_settimeout(0x00100000); + armclk_enable_timer_irq(); + + write_SPSR(PL0_PSR); + + asm volatile("ldm %0, {r0 - r12, pc} ^" :: + "r" (PL0_regs) : "memory"); + + __builtin_unreachable(); +} diff --git a/src/arm/PL1/kernel/scheduler.h b/src/arm/PL1/kernel/scheduler.h new file mode 100644 index 0000000..8c0f569 --- /dev/null +++ b/src/arm/PL1/kernel/scheduler.h @@ -0,0 +1,32 @@ +#ifndef SCHEDULER_H +#define SCHEDULER_H + +#include <stdint.h> + +#include "psr.h" + +extern PSR_t PL1_PSR; + +void setup_scheduler_structures(void); + +// to be called by irq handler when respective uart interrupt happens +void scheduler_try_output(void); + +// to be called by irq handler when respective uart interrupt happens +void scheduler_try_input(void); + +void __attribute__((noreturn)) +schedule_wait_for_output(uint32_t regs[14], char c); + +void __attribute__((noreturn)) +schedule_wait_for_input(uint32_t regs[14]); + +void __attribute__((noreturn)) +schedule_save_context(uint32_t regs[14]); + +void __attribute__((noreturn)) schedule(void); + +void __attribute__((noreturn)) +schedule_new(uint32_t pc, uint32_t sp); + +#endif diff --git a/src/arm/PL1/kernel/setup.c b/src/arm/PL1/kernel/setup.c new file mode 100644 index 0000000..bf7c9a1 --- /dev/null +++ b/src/arm/PL1/kernel/setup.c @@ -0,0 +1,116 @@ +#include "uart.h" +#include "io.h" +#include "demo_functionality.h" +#include "paging.h" +#include "atags.h" +// for POWER_OF_2() macro... perhaps the macro should be moved +#include "memory.h" +#include "armclock.h" +#include "scheduler.h" + +void setup(uint32_t r0, uint32_t machine_type, + struct atag_header *atags) +{ + uart_init(); + + // When we attach screen session after loading kernel with socat + // we miss kernel's greeting... So we'll make the kernel wait for + // one char we're going to send from within screen + getchar(); + + puts("Hello, kernel World!"); + + prints("ARM machine type: 0x"); printhext(machine_type); puts(""); + + uint32_t memory_size = 0; + + // value 3 introduced by stage1 code means no atags was found + if (r0 == 3) + { + puts ("No ATAGS was found!"); + } + else + { + prints("ATAGS copied to 0x"); + printhex((uint32_t) atags); puts(""); + + puts("__ ATAGS contents __"); + + print_atags(atags); + + puts("__ end of ATAGS contents __"); + + memory_size = find_memory_size(atags); + } + + if (memory_size) + { + char *unit; + uint32_t size_in_unit; + + if (memory_size % POWER_OF_2(10)) + { + unit = "B"; + size_in_unit = memory_size; + } + else if (memory_size % POWER_OF_2(20)) + { + unit = "KB"; + size_in_unit = memory_size / POWER_OF_2(10); + } + else if (memory_size % POWER_OF_2(30)) + { + unit = "MB"; + size_in_unit = memory_size / POWER_OF_2(20); + } + else + { + unit = "GB"; + size_in_unit = memory_size / POWER_OF_2(30); + } + + prints ("memory available: "); + printdect (size_in_unit); + puts (unit); + } + else + { + // Most Pis have more, but qemu might give us little + puts("Couldn't determine available memory - assuming 192MB"); + memory_size = 192 * POWER_OF_2(20); + } + + // assume we need at least one section for PL0 + if (memory_size < PRIVILEGED_MEMORY_END + SECTION_SIZE) + { + puts("Not enough memory to continue"); + while (1); + } + + // prints some info + demo_paging_support(); + + // prints some info + demo_current_mode(); + + setup_pager_structures(memory_size); + + // prints some info and sets upp translation table, turns on MMU + setup_flat_map(); + + puts("Initializing clock"); + // sets some general settings for arm timer + armclk_init(); + + puts("Setting up scheduler's internal structures"); + setup_scheduler_structures(); + + puts("Switching uart to use irqs"); + + // note, that kernel's puts() is still going to use blocking io + uart_irq_enable(); + + // prints some info and sets up a section for PL0 code, loads a blob + // there, then runs scheduler... never, ever, ever returns + demo_setup_PL0(); +} diff --git a/src/arm/PL1/kernel/translation_table_descriptors.h b/src/arm/PL1/kernel/translation_table_descriptors.h new file mode 100644 index 0000000..981c3c7 --- /dev/null +++ b/src/arm/PL1/kernel/translation_table_descriptors.h @@ -0,0 +1,180 @@ +#ifndef TRANSLATION_TABLE_DESCRIPTORS_H +#define TRANSLATION_TABLE_DESCRIPTORS_H + +#include <stdint.h> + +// ARM lets you choose between 32-bit abd 64-bit translation table +// descriptors (called short and long descriptors respectively). +// The format of the descriptor differs depending on what it describes +// (section, supersection, a page table, etc...) and table of which +// level of lookup it belongs to. + +// Even in case of descriptor of a specified type (e.g. short-format +// section descriptor), a given field inside it may have different +// meanings depending on settings in coprocessor registers... (yeah, ARM +// looks a bit messy... all for backward compatibility, i guess) + + +////// Here are the definitions for short-format descriptors + +//// short-format page table descriptor + +typedef struct +{ + uint32_t Bits_1_0 : 1; // bits 1:0 + uint32_t PXN : 1; // bit 2 + uint32_t NS : 1; // bit 3 + uint32_t SBZ : 1; // bit 4 + uint32_t Domain_3_0 : 4; // bits 8:5 + uint32_t Bit_9 : 1; // bit 9 + uint32_t Bits_31_10 : 22; // bits 31:10 +#define DESCRIPTOR_TYPE_1_0 Bits_1_0 +#define PRIVILEGED_EXECUTE_NEVER_BIT PXN +#define NON_SECURE_BIT NS + // me thinks SBZ means "should be zero", + // but me sees no point #defining it +#define DOMAIN_3_0 Domain_3_0 +#define IMPLEMENTATION_DEFINED_BIT Bit_9 +#define PAGE_TABLE_BASE_ADDRESS_31_10 Bits_31_10 +} short_page_table_descriptor_t; + + +//// short-format section descriptor + +typedef struct +{ + uint32_t PXN : 1; // bit 0 + uint32_t Bit_1 : 1; // bit 1 + uint32_t B : 1; // bit 2 + uint32_t C : 1; // bit 3 + uint32_t XN : 1; // bit 4 + uint32_t Domain_3_0 : 4; // bits 8:5 + uint32_t Bit_9 : 1; // bit 9 + uint32_t AP_1_0 : 2; // bit 11:10 + uint32_t TEX_2_0 : 3; // bits 14:12 + uint32_t AP_2 : 1; // bit 15 + uint32_t S : 1; // bit 16 + uint32_t nG : 1; // bit 17 + uint32_t Bit_18 : 1; // bit 18 + uint32_t NS : 1; // bit 19 + uint32_t PA_31_20 : 12; // bits 31:20 + // some of these are already defined the same for page table + //#define PRIVILEGED_EXECUTE_NEVER_BIT PXN +#define DESCRIPTOR_TYPE_1 Bit_1 +#define BUFFERABLE_BIT B +#define CACHEABLE_BIT C +#define EXECUTE_NEVER_BIT XN + //#define DOMAIN_3_0 Domain_3_0 + //#define IMPLEMENTATION_DEFINED_BIT Bit_9 +#define ACCESS_PERMISSIONS_1_0 AP_1_0 +#define TYPE_EXTENSION_2_0 TEX_2_0 +#define ACCESS_PERMISSIONS_2 AP_2 +#define SHAREABLE_BIT S +#define NON_GLOBAL_BIT nG +#define SECTION_OR_SUPERSECTION_BIT Bit_18 + //#define NON_SECURE_BIT NS +#define SECTION_BASE_ADDRESS_31_20 PA_31_20 +} short_section_descriptor_t; + + +//// short-format supersection descriptor + +typedef struct +{ + uint32_t PXN : 1; // bit 0 + uint32_t Bit_1 : 1; // bit 1 + uint32_t B : 1; // bit 2 + uint32_t C : 1; // bit 3 + uint32_t XN : 1; // bit 4 + uint32_t PA_39_36 : 4; // bits 8:5 + uint32_t Bit_9 : 1; // bit 9 + uint32_t AP_1_0 : 2; // bit 11:10 + uint32_t TEX_2_0 : 3; // bits 14:12 + uint32_t AP_2 : 1; // bit 15 + uint32_t S : 1; // bit 16 + uint32_t nG : 1; // bit 17 + uint32_t Bit_18 : 1; // bit 18 + uint32_t NS : 1; // bit 19 + uint32_t PA_35_32 : 4; // bits 23:20 + uint32_t PA_31_24 : 8; // bits 31:24 + // most of these are already defined the same for section + //#define PRIVILEGED_EXECUTE_NEVER_BIT PXN + //#define DESCRIPTOR_TYPE_1 Bit_1 + //#define BUFFERABLE_BIT B + //#define CACHEABLE_BIT C + //#define EXECUTE_NEVER_BIT XN +#define SUPERSECTION_BASE_ADDRESS_39_36 PA_39_36 + //#define IMPLEMENTATION_DEFINED_BIT Bit_9 + //#define ACCESS_PERMISSIONS_1_0 AP_1_0 + //#define TYPE_EXTENSION_2_0 TEX_2_0 + //#define ACCESS_PERMISSIONS_2 AP_2 + //#define SHAREABLE_BIT S + //#define NON_GLOBAL_BIT nG + //#define SECTION_OR_SUPERSECTION_BIT Bit_18 + //#define NON_SECURE_BIT NS +#define SUPERSECTION_BASE_ADDRESS_35_32 PA_35_32 +#define SUPERSECTION_BASE_ADDRESS_31_24 PA_31_24 +} short_supersection_descriptor_t; + + +//// possible access permission field values + +// How AP[2:0] is used depends on settings in SCTLR.AFE + +// Meaning of #define'd names below: +// RW - read-write +// RO - read-only +// PL1 - a given permission applies to privilege level PL1 +// PL2 - a given permission applies to privilege level PL2 +// ALL - a given permission applies to both privilege levels +// If only a permission for one privilege level is given in the name, +// it means the other one has no access. + +// When SCTLR.AFE is 0 (access flag not used) and short-format +// descritor table is used, the following access permission control +// schema for AP[2:0] is used: +#define AP_2_0_MODEL_NO_ACCESS 0b000 +#define AP_2_0_MODEL_RW_PL1 0b001 +#define AP_2_0_MODEL_RW_PL1_RO_PL0 0b010 +#define AP_2_0_MODEL_RW_ALL 0b011 +#define AP_2_0_MODEL_RESERVED 0b100 +#define AP_2_0_MODEL_RO_PL1 0b101 +#define AP_2_0_MODEL_RO_ALL_DEPRECATED 0b110 // use 0b111 instead +#define AP_2_0_MODEL_RO_ALL 0b111 // reserved in VMSAv6 +// TODO: the #define's of RO_ALL and reserved could be done +// conditionally depending on the VMSA version available (either give +// the programmer #including this the possibility to #define their +// VMSA version or assume the VMSA version respective to the ARM +// version we're compiling against) + + +//// Values for bit18, that determines whether a descriptor describes +// section or supersection: +#define DESCRIBES_SECTION 0b0 +#define DESCRIBES_SUPERSECTION 0b1 + + +//// short-format descriptor generic type + +typedef union +{ + uint32_t raw; + uint8_t descriptor_type : 2; + + short_page_table_descriptor_t page_table_fields; + short_section_descriptor_t section_fields; + short_supersection_descriptor_t supersection_fields; +} short_descriptor_lvl1_t; + + +//// possible values of descriptor_type field: + +#define SHORT_DESCRIPTOR_INVALID 0b00 +#define SHORT_DESCRIPTOR_PAGE_TABLE 0b01 +#define SHORT_DESCRIPTOR_SECTION_OR_SUPERSECTION 0b10 +#define SHORT_DESCRIPTOR_SECTION_OR_SUPERSECTION_PXN 0b11 +// on an implementation that does not support the PXN attribute +// 0b11 should not be used +#define SHORT_DESCRIPTOR_RESERVED 0b11 + +#endif // TRANSLATION_TABLE_DESCRIPTORS_H diff --git a/src/arm/PL1/loader/loader_stage1.S b/src/arm/PL1/loader/loader_stage1.S new file mode 100644 index 0000000..69d78c5 --- /dev/null +++ b/src/arm/PL1/loader/loader_stage1.S @@ -0,0 +1,55 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the loader. + * + * Loader copies it's embedded stage 2 to address 0x4000 + * and jumps to it. Registers r0 - r2 are arguments for the kernel + * and should be left intact. + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still occur + // when the main core overwrites this part of memory + wfe + +proceed: + // copy stage2 of the loader to address 0x4000 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0x4000 + + // load blob size to r5 + mov r5, #(stage2_end - stage2_start) + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Initialize the stack + // _stack_top is defined in loader_stage1_linker.ld + ldr sp, =_stack_top + + // Call stage2 of the loader (branch to 0x4000) + bx r4 + +.align 4 +stage2_start: + .incbin "loader_stage2.img" +stage2_end: diff --git a/src/arm/PL1/loader/loader_stage1_linker.ld b/src/arm/PL1/loader/loader_stage1_linker.ld new file mode 100644 index 0000000..711fcbf --- /dev/null +++ b/src/arm/PL1/loader/loader_stage1_linker.ld @@ -0,0 +1,16 @@ +ENTRY(_boot) + +SECTIONS +{ + /* see linker.ld for details */ + . = 0x2000000; + + __start = .; + loader_stage1 : + { + KEEP(loader_stage1.o) + } + __end = .; + + _stack_top = 0x8000; +} diff --git a/src/arm/PL1/loader/loader_stage2.c b/src/arm/PL1/loader/loader_stage2.c new file mode 100644 index 0000000..fc3ae1c --- /dev/null +++ b/src/arm/PL1/loader/loader_stage2.c @@ -0,0 +1,33 @@ +#include <stddef.h> +#include <stdint.h> +#include "uart.h" +#include "io.h" +#include "global.h" + +void *const kernel_load_addr = ((void*) 0x8000); + +void _stage2_main(uint32_t r0, uint32_t r1, uint32_t atags) +{ + uart_init(); + + // get kernel size via uart (little endian) + uint32_t b0, b1, b2, b3; + + b0 = getchar(); + b1 = getchar(); + b2 = getchar(); + b3 = getchar(); + + uint32_t kernel_size = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + + // load kernel at kernel_load_addr + char *dst = kernel_load_addr, *end = dst + kernel_size; + + while (dst < end) + *(dst++) = getchar(); + + // jump to kernel + ((void(*)(uint32_t, uint32_t, uint32_t)) kernel_load_addr) + (r0, r1, atags); +} + diff --git a/src/arm/PL1/loader/loader_stage2_linker.ld b/src/arm/PL1/loader/loader_stage2_linker.ld new file mode 100644 index 0000000..33e79e9 --- /dev/null +++ b/src/arm/PL1/loader/loader_stage2_linker.ld @@ -0,0 +1,16 @@ +ENTRY(_stage2_main) + +SECTIONS +{ + /* see loader_stage1.S for details */ + . = 0x4000; + + __start = .; + loader_stage2 : + { + KEEP(loader_stage2.o(.text)) + loader_stage2.o + uart.o + } + __end = .; +} |