diff options
-rw-r--r-- | Makefile | 29 | ||||
-rw-r--r-- | boot.S | 27 | ||||
-rw-r--r-- | demo_functionality.c | 26 | ||||
-rw-r--r-- | interrupt_vector.S | 53 | ||||
-rw-r--r-- | interrupts.c | 55 | ||||
-rw-r--r-- | kernel_stage1.S | 67 | ||||
-rw-r--r-- | kernel_stage1.ld | 27 | ||||
-rw-r--r-- | kernel_stage2.ld | 52 | ||||
-rw-r--r-- | linker.ld | 47 | ||||
-rw-r--r-- | memory.h | 43 | ||||
-rw-r--r-- | setup.c (renamed from kernel.c) | 11 |
11 files changed, 288 insertions, 149 deletions
@@ -1,8 +1,10 @@ -CFLAGS=-mcpu=cortex-a7 -ffreestanding -std=gnu11 -Wall -Wextra -O2 -fPIC -I. +CFLAGS=-mcpu=cortex-a7 -ffreestanding -std=gnu11 -Wall -Wextra -O2 -I. ELFFLAGS=-nostdlib -lgcc ARM_OBJECTS=kernel.o paging.o demo_functionality.o PL0_test.o uart.o loader_stage1.o loader_stage2.o +KERNEL_STAGE2_OBJECTS=setup.o interrupt_vector.o interrupts.o uart.o demo_functionality.o paging.o ramfs_embeddable.o ramfs.o + RAMFS_FILES=PL_0_test.img all : kernel.img @@ -10,20 +12,32 @@ all : kernel.img %.o : %.c arm-none-eabi-gcc $(CFLAGS) -c $^ -o $@ -%.o : %.S - arm-none-eabi-as -mcpu=cortex-a7 $^ -o $@ - %.img : %.elf arm-none-eabi-objcopy $^ -O binary $@ +%.o : %.S + arm-none-eabi-as -mcpu=cortex-a7 $^ -o $@ + %_embeddable.o : %.img arm-none-eabi-objcopy -I binary -O elf32-littlearm -B arm --rename-section .data=.rodata $^ $@ PL_0_test.elf : PL0_test.o uart.o arm-none-eabi-gcc -T PL0_test.ld -o $@ $(ELFFLAGS) $^ -kernel.elf : boot.o kernel.o uart.o demo_functionality.o paging.o interrupt_vector.o interrupts.o ramfs_embeddable.o ramfs.o - arm-none-eabi-gcc -T linker.ld -o $@ $(ELFFLAGS) $^ +kernel_stage1.o : kernel_stage1.S kernel_stage2.img + arm-none-eabi-as -mcpu=cortex-a7 $< -o $@ + +kernel.elf : kernel_stage1.ld kernel_stage1.o + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) kernel_stage1.o + +kernel.img : kernel.elf + arm-none-eabi-objcopy $^ -O binary $@ + +kernel_stage2.elf : kernel_stage2.ld $(KERNEL_STAGE2_OBJECTS) + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) $(KERNEL_STAGE2_OBJECTS) + +#kernel.elf : boot.o kernel.o uart.o demo_functionality.o paging.o interrupt_vector.o interrupts.o ramfs_embeddable.o ramfs.o +# arm-none-eabi-gcc -T linker.ld -o $@ $(ELFFLAGS) $^ loader_stage2.elf : loader_stage2.o uart.o arm-none-eabi-gcc -T loader_stage2_linker.ld -o $@ $(ELFFLAGS) $^ @@ -42,6 +56,9 @@ loader.img : loader.elf qemu-elf : kernel.elf qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $^ +qemu-img : kernel.img + qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $^ + qemu-bin : loader.img kernel.img pipe_image ./pipe_image --stdout | qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $< @@ -1,27 +0,0 @@ -// armv7 mode - -// Entry point for the kernel. -// r15 -> should begin execution at 0x8000. -// r0 -> 0x00000000 -// r1 -> 0x00000C42 -// r2 -> 0x00000100 - start of ATAGS -// preserve these registers as argument for kernel_main - -.global _boot // make entry point label global -_boot: - // Only let the first core execute - mrc p15, 0, r3, c0, c0, 5 - and r3, r3, #3 - cmp r3, #0 - beq proceed - // this is a kind of blef - races can theoretically still occur - // when the main core overwrites this part of memory - wfe - -proceed: - // Initialize the stack (_stack_top is defined in linker.ld) - ldr sp, =_stack_top - - // Call kernel_main - ldr r3, =kernel_main - bx r3 diff --git a/demo_functionality.c b/demo_functionality.c index 4b002d6..420639b 100644 --- a/demo_functionality.c +++ b/demo_functionality.c @@ -53,7 +53,7 @@ void demo_current_mode(void) uart_puts(mode_name); } -#define TRANSLATION_TABLE \ +#define TRANSLATION_TABLE \ ((short_section_descriptor_t volatile*) TRANSLATION_TABLE_BASE) extern char @@ -144,29 +144,7 @@ void demo_go_unprivileged(void) write_SPSR(new_SPSR); uart_puts("All ready, jumping to PL0 code\n\r"); - + asm volatile("ldm %0, {r0 - r15} ^" :: "r" (PL0_regs)); } - -extern char - __interrupts_start, - __interrupts_end, - __interrupts_size; - -extern void (*volatile system_reentry_point)(void); - -void system_reentry(void) -{ - uart_puts("re-entered system"); - while(1); -} - -void demo_setup_interrupts(void) -{ - system_reentry_point = system_reentry; - - for (size_t i = 0; i < (size_t) &__interrupts_size; i++) - ((volatile char*) 0)[i] = - (&__interrupts_start)[i]; -} diff --git a/interrupt_vector.S b/interrupt_vector.S index d20bf6d..6037b7c 100644 --- a/interrupt_vector.S +++ b/interrupt_vector.S @@ -1,22 +1,45 @@ -.section ".interrupts.vector" - -.global abort_handler -.local generic_handler -.global _interrupt_vectors _interrupt_vectors: - b generic_handler - b generic_handler - b generic_handler + b reset_handler_caller + b undef_handler_caller + b svc_handler_caller b abort_handler_caller b abort_handler_caller - b generic_handler - b generic_handler - -.section ".interrupts.text" + b generic_handler_caller + b irq_handler_caller + b fiq_handler_caller + +reset_handler_caller: + ldr sp, =_stack_top + ldr r5, =reset_handler + bx r5 + +undef_handler_caller: + ldr sp, =_stack_top + ldr r5, =undefined_instruction_vector + bx r5 -generic_handler: - b generic_handler +svc_handler_caller: + ldr sp, =_stack_top + ldr r5, =supervisor_call_handler + bx r5 + abort_handler_caller: - mov sp, #0x8000 + ldr sp, =_stack_top ldr r5, =abort_handler bx r5 + +generic_handler_caller: + ldr sp, =_stack_top + ldr r5, =generic_handler + bx r5 + +irq_handler_caller: + ldr sp, =_stack_top + ldr r5, =irq_handler + bx r5 + +fiq_handler_caller: + ldr sp, =_stack_top + ldr r5, =fiq_handler + bx r5 + diff --git a/interrupts.c b/interrupts.c index 6952f89..1b0590a 100644 --- a/interrupts.c +++ b/interrupts.c @@ -1,10 +1,20 @@ #include "uart.h" -/** - @brief The undefined instruction interrupt handler - If an undefined instruction is encountered, the CPU will start - executing this function. Just trap here as a debug solution. -*/ +void setup(void); + +void reset_handler(void) +{ + static _Bool setup_done; + + if (!setup_done) + setup(); + + setup_done = 1; + + // TODO do something here + while(1); +} + void __attribute__((interrupt("UNDEF"))) __attribute__((section(".interrupts.text"))) @@ -17,13 +27,40 @@ undefined_instruction_vector(void) } } -void __attribute__((section(".interrupts.data"))) -(*system_reentry_point) (void); +void supervisor_call_handler(void) +{ + uart_puts("something svc happened\n\r"); + + while(1); +} void __attribute__((interrupt("ABORT"))) -__attribute__((section(".interrupts.text"))) abort_handler(void) { - system_reentry_point(); + uart_puts("re-entered system\n\r"); + + while(1); +} + +void generic_handler(void) +{ + uart_puts("something weird happened\n\r"); + + while(1); +} + +void irq_handler(void) +{ + uart_puts("irq happened\n\r"); + + while(1); +} + +void fiq_handler(void) +{ + uart_puts("fiq happened\n\r"); + + while(1); } + diff --git a/kernel_stage1.S b/kernel_stage1.S new file mode 100644 index 0000000..1e0f614 --- /dev/null +++ b/kernel_stage1.S @@ -0,0 +1,67 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the kernel. + * + * Kernel copies it's embedded stage 2 to address 0x0 and jumps to + * it (to the reset handler). Registers r0 - r2 are arguments for + * the kernel, but we're not using them for now. + * + * This file is based on (and almost identical with) loader_stage1.S + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still + // occur when the main core overwrites this part of memory + wfe + +proceed: + // copy stage2 of the kernel to address 0x0 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0 + + // load blob size to r5 + // The size might get too big for an immediate value, so + // we load it from memory. + adr r5, blob_size + ldr r5, [r5] + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // This initial piece of code might get overwritten when we + // copy stage2, so the actual copying loop shall be after + // stage2 blob. We want this asm code to be PIC, so we're + // computing address of stage2_end into r7. + add r7, r3, r5 + bx r7 + +blob_size: + .word stage2_end - stage2_start + +.align 4 +stage2_start: + .incbin "kernel_stage2.img" +stage2_end: + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Call stage2 of the kernel (branch to 0x0, + // which is the reset handler). + bx r4 diff --git a/kernel_stage1.ld b/kernel_stage1.ld new file mode 100644 index 0000000..3130634 --- /dev/null +++ b/kernel_stage1.ld @@ -0,0 +1,27 @@ +ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */ + +/* Code starts at 0x8000 - that's where RPis in 32-bit mode load + * kernel at. My experiments do, however, show, that qemu emulating + * RPi2 loads the kernel at 0x10000! (took some pain to find out). + * rpi-open-firmware, on the other hand, loads kernel at 0x2000000! + * This is not really a problem, since: + * 1. We can use our bootloader to load the kernel at 0x8000 + * 2. We've rewritten stage 1 of both bootloader and kernel in + * careful assembly, so that they should work regardless of + * where they are loaded. + * 3. In qemu, we can load kernel.elf instead of raw binary + * (qemu will do the right thing then) + */ + +SECTIONS +{ + + . = 0x8000; + + __start = .; + .kernel_stage1 : + { + KEEP(kernel_stage1.o) + } + __end = .; +} diff --git a/kernel_stage2.ld b/kernel_stage2.ld new file mode 100644 index 0000000..d3a23bf --- /dev/null +++ b/kernel_stage2.ld @@ -0,0 +1,52 @@ +/* This sesond stage of the kernel is run from address 0x0 */ + +TRANSLATION_TABLE_SIZE = 4096 * 4; +MMU_SECTION_SIZE = 1 << 20; + +SECTIONS +{ + + . = 0x0; + + __start = .; + .kernel_stage2 : + { + KEEP(interrupt_vector.o) + . = ALIGN(4); + ramfs_embeddable.o + (*) + } + __end = .; + + . = ALIGN(1 << 14); + + .translation_table (NOLOAD) : + { + _translation_table_start = .; + + . = . + TRANSLATION_TABLE_SIZE; + + _translation_table_end = .; + } + + . = ALIGN(1 << 20); + . = . + MMU_SECTION_SIZE; + + .stack (NOLOAD) : + { + _stack_start = .; + + . = . + MMU_SECTION_SIZE; + + _stack_top = .; + } + + .unprivileged_memory (NOLOAD) : + { + _unprivileged_memory_start = .; + + . = . + MMU_SECTION_SIZE; + + _unprivileged_memory_end = .; + } +} diff --git a/linker.ld b/linker.ld deleted file mode 100644 index 444bbf6..0000000 --- a/linker.ld +++ /dev/null @@ -1,47 +0,0 @@ -ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */ - -SECTIONS -{ - /* Starts at 0x8000 - that's where RPis in 32-bit mode load */ - /* kernel at. My experiments do, however, show, that qemu */ - /* emulating RPi2 loads the kernel at 0x10000! (took some pain */ - /* to find out). rpi-open-firmware, on the other hand, loads */ - /* kernel at 0x2000000! */ - /* This is not really a problem, since: */ - /* 1. We can use our bootloader to load the kernel at 0x8000 */ - /* 2. Stage 1 of the bootloader is written in careful */ - /* assembly, so that the loader itself should work */ - /* regardless of where it is loaded. */ - /* 3. In qemu, we can load kernel.elf instead of raw binary */ - /* (qemu will do the right thing then) */ - - . = 0x8000; - - /* RPi in 64-bit mode uses address 0x80000 instead */ - - __start = .; - .kernel : - { - __kernel_start = .; - KEEP(boot.o) - . = ALIGN(4); - ramfs_embeddable.o - *(EXCLUDE_FILE (libkernel.o interrupt_vector.o interrupts.o) *) - __kernel_end = .; - } - __kernel_size = __kernel_end - __kernel_start; - - .interrupts : - { - __interrupts_start = .; - KEEP(*(.interrupts.vector)) - interrupt_vector.o - interrupts.o - __interrupts_end = .; - } - __interrupts_size = __interrupts_end - __interrupts_start; - - __end = .; - - _stack_top = __start; -} @@ -1,7 +1,10 @@ #ifndef MEMORY_H #define MEMORY_H -#define POWER_OF_2(EXP) (((uint32_t) 1) << EXP) +// These macros were heavily used b4 I moved all the address +// computation to the linker script. Now I'm just keeping them +// in case they're needed for something else :) +#define POWER_OF_2(EXP) (((size_t) 1) << EXP) #define ALIGN_POWER_OF_2(ADDR, EXP) \ (((ADDR - 1) & ~(POWER_OF_2(EXP) - 1)) + POWER_OF_2(EXP)) @@ -10,33 +13,45 @@ #define ALIGN_SECTION(ADDR) ALIGN_POWER_OF_2(ADDR, 20) -#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0) -#define STACK_START ((uint32_t) 0x4000) -#define STACK_END ((uint32_t) 0x8000) +// memory layout + +#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0) +// all those symbols are defined in the linker script extern const char __end; extern const char __start; +extern const char _translation_table_start; +extern const char _translation_table_end; +extern const char _stack_start; +extern const char _stack_top; +extern const char _unprivileged_memory_start; +extern const char _unprivileged_memory_end; -#define KERNEL_START ((uint32_t) &__start) // this is 0x8000 -#define KERNEL_END ((uint32_t) &__end) +#define KERNEL_START ((size_t) &__start) // this is 0x0 +#define KERNEL_END ((size_t) &__end) // first 2^14 aligned address after the kernel -#define TRANSLATION_TABLE_BASE ALIGN_POWER_OF_2(KERNEL_END, 14) - -#define TRANSLATION_TABLE_END \ - (TRANSLATION_TABLE_BASE + (uint32_t) (4096 * 4)) +#define TRANSLATION_TABLE_BASE ((size_t) &_translation_table_start) +#define TRANSLATION_TABLE_END ((size_t) &_translation_table_end) -#define PRIVILEGED_MEMORY_END ALIGN_SECTION(TRANSLATION_TABLE_END) +// first section after the translation table is left unused; +// the next section is used as the stack +#define STACK_START ((size_t) &_stack_start) +#define STACK_END ((size_t) &_stack_top) -#define UNPRIVILEGED_MEMORY_START PRIVILEGED_MEMORY_END +#define PRIVILEGED_MEMORY_END STACK_END +#define UNPRIVILEGED_MEMORY_START \ + ((size_t) &_unprivileged_memory_start) // equal to STACK_END #define UNPRIVILEGED_MEMORY_END \ - (UNPRIVILEGED_MEMORY_START + SECTION_SIZE) + ((size_t) &_unprivileged_memory_end) -#define PL0_SECTION_NUMBER ((uint32_t) 0b101010101010) +#define PL0_SECTION_NUMBER ((size_t) 0xaaa) #define VIRTUAL_PL0_MEMORY_START (PL0_SECTION_NUMBER << 20) +#define VIRTUAL_PL0_MEMORY_END \ + (VIRTUAL_PL0_MEMORY_START + SECTION_SIZE) #endif // MEMORY_H @@ -2,7 +2,7 @@ #include "demo_functionality.h" #include "paging.h" -void kernel_main(void) +void setup(void) { uart_init(); @@ -22,16 +22,13 @@ void kernel_main(void) // prints some info and sets upp translation table, turns on MMU setup_flat_map(); + // prints some info and sets up a section for PL0 code, + // loads a blob there demo_setup_PL0(); - demo_setup_interrupts(); - - // prints some info and sets up a section for PL0 code, loads a blob - // there and jumps to it... never, ever, ever returns + // jumps to unprivileged code... never, ever, ever returns demo_go_unprivileged(); - while(1); - while (1) { char c = uart_getc(); |