From 615e3302c9dd358bb64cd56d1f3814ad8d5df84d Mon Sep 17 00:00:00 2001 From: vetch Date: Sat, 4 Jan 2020 19:37:32 +0100 Subject: rearranged files, updated makefile --- src/boot/kernel_stage1.S | 168 +++++++++++++++++++++++++++++++++++++++ src/boot/kernel_stage1.ld | 27 +++++++ src/boot/kernel_stage2.ld | 80 +++++++++++++++++++ src/boot/loader_stage1.S | 55 +++++++++++++ src/boot/loader_stage1_linker.ld | 16 ++++ src/boot/loader_stage2.c | 33 ++++++++ src/boot/loader_stage2_linker.ld | 16 ++++ src/boot/psr.h | 88 ++++++++++++++++++++ src/boot/setup.c | 116 +++++++++++++++++++++++++++ 9 files changed, 599 insertions(+) create mode 100644 src/boot/kernel_stage1.S create mode 100644 src/boot/kernel_stage1.ld create mode 100644 src/boot/kernel_stage2.ld create mode 100644 src/boot/loader_stage1.S create mode 100644 src/boot/loader_stage1_linker.ld create mode 100644 src/boot/loader_stage2.c create mode 100644 src/boot/loader_stage2_linker.ld create mode 100644 src/boot/psr.h create mode 100644 src/boot/setup.c (limited to 'src/boot') diff --git a/src/boot/kernel_stage1.S b/src/boot/kernel_stage1.S new file mode 100644 index 0000000..e770513 --- /dev/null +++ b/src/boot/kernel_stage1.S @@ -0,0 +1,168 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the kernel. + * + * Kernel copies it's embedded stage 2 to address 0x0 and jumps to + * it (to the reset handler). Registers r0 - r2 are arguments for + * the kernel, but we're not using them for now. + * + * This file is based on (and almost identical with) loader_stage1.S + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still + // occur when the main core overwrites this part of memory + wfe + + // we'll use the size of stage1 to determine where we have free + // space after it. We'll then copy our atags/fdt there, so + // it doesn't get overwritten by stage2 we deploy at 0x0 +atags_magic: + .word 0x54410001 + +proceed: + // load the second word of structure passed to us through r2; + // if it's atags, it's second word should be the magic number + // Btw, location of ATAGS is always 0x100. + ldr r3, [r2, #4] + adr r4, atags_magic + ldr r4, [r4] + + // compare second word of assumed atags with magic number + // to see, if it's really atags and not sth else (i.e. fdt) + cmp r3, r4 + + // normally at start r0 contains value 0; + // value 3 in r0 would tell stage2 code, we found no atags :( + movne r0, #3 + bne stage2_blob_copying + + // if atags was found, copying of it takes place here + + // the following loop finds, where atags ends + // r3 shall point to currently looked-at tag + mov r3, r2 + +find_end_of_atags_loop: + // load first word of tag header to r4 (it contains tag size) + ldr r4, [r3] + // make r3 point at the next tag (by adding 4*tag_size to it) + add r3, r4, lsl #2 + + // load second word of tag header to r5 (it contains tag type) + ldr r5, [r3, #4] + + // if tag value is 0, it is the last tag + cmp r5, #0 + bne find_end_of_atags_loop + + add r3, #8 // make r3 point at the end of last tag + sub r3, r2 // get atags size in r3 + + // at this pont r2 and r3 point at start and size of atags, + // respectively; now we'll compute, where we're going to have + // free space to put atags in; we want to put atags either + // right after our blob or, if if it doesn't fit between + // blob end and the address stage1 is loaded at, after stage1 + + // get blob size to r5 + adr r5, blob_size + ldr r5, [r5] + + // we could only copy atags to a 4-aligned address + mov r6, #4 + bl aling_r5_to_r6 + + // compute where atags copied right after blob would end + add r6, r5, r3 + // we can only overwrite stuff before the copying loop + adr r7, copy_atags_loop + cmp r6, r7 + ble copy_atags + + // atags wouldn't fit - use memory after stage1 as destination + adr r5, _boot + adr r6, stage1_size + ldr r6, [r6] + add r5, r6 + mov r6, #4 + bl aling_r5_to_r6 + +copy_atags: + // now copy atags (r2 - atags start; r3 - atags size; + // r5 - destination; r4 - iterator; r6 - buffor) + mov r4, #0 + +copy_atags_loop: + ldr r6, [r2, r4] + str r6, [r5, r4] + add r4, #4 + cmp r4, r3 + blo copy_atags_loop + + mov r2, r5 // place the new atags address in r2 + b stage2_blob_copying // atags stuff done; proceed + +// mini-function, that does what the label says; clobbers r7 +aling_r5_to_r6: + sub r5, #1 + sub r7, r6, #1 + bic r5, r7 + add r5, r6 + mov pc, lr + + +stage2_blob_copying: // copy stage2 of the kernel to address 0x0 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0 + + // load blob size to r5 + // The size might get too big for an immediate value, so + // we load it from memory. + adr r5, blob_size + ldr r5, [r5] + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // This initial piece of code might get overwritten when we + // copy stage2, so the actual copying loop shall be after + // stage2 blob. We want this asm code to be PIC, so we're + // computing address of stage2_end into r7. + add r7, r3, r5 + bx r7 + +blob_size: + .word stage2_end - stage2_start +stage1_size: + .word stage1_end - _boot + +.align 4 +stage2_start: + .incbin "kernel_stage2.img" +stage2_end: + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Call stage2 of the kernel (branch to 0x0, + // which is the reset handler). + bx r4 + +stage1_end: diff --git a/src/boot/kernel_stage1.ld b/src/boot/kernel_stage1.ld new file mode 100644 index 0000000..3130634 --- /dev/null +++ b/src/boot/kernel_stage1.ld @@ -0,0 +1,27 @@ +ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */ + +/* Code starts at 0x8000 - that's where RPis in 32-bit mode load + * kernel at. My experiments do, however, show, that qemu emulating + * RPi2 loads the kernel at 0x10000! (took some pain to find out). + * rpi-open-firmware, on the other hand, loads kernel at 0x2000000! + * This is not really a problem, since: + * 1. We can use our bootloader to load the kernel at 0x8000 + * 2. We've rewritten stage 1 of both bootloader and kernel in + * careful assembly, so that they should work regardless of + * where they are loaded. + * 3. In qemu, we can load kernel.elf instead of raw binary + * (qemu will do the right thing then) + */ + +SECTIONS +{ + + . = 0x8000; + + __start = .; + .kernel_stage1 : + { + KEEP(kernel_stage1.o) + } + __end = .; +} diff --git a/src/boot/kernel_stage2.ld b/src/boot/kernel_stage2.ld new file mode 100644 index 0000000..9411ca2 --- /dev/null +++ b/src/boot/kernel_stage2.ld @@ -0,0 +1,80 @@ +/* This sesond stage of the kernel is run from address 0x0 */ + +TRANSLATION_TABLE_SIZE = 4096 * 4; +SECTIONS_LIST_SIZE = 4096 * 8; +MMU_SECTION_SIZE = 1 << 20; + +SECTIONS +{ + + . = 0x0; + + __start = .; + .interrupt_vector : + { + KEEP(interrupt_vector.o) + } + . = ALIGN(4); + .embedded_ramfs : + { + ramfs_embeddable.o + } + .rest_of_kernel : + { + *(.text) + *(.data) + *(.rodata) + *(.bss) + *(/COMMON/) + *(*) + } + __end = .; + + . = ALIGN(1 << 14); + + .translation_table (NOLOAD) : + { + _translation_table_start = .; + + . = . + TRANSLATION_TABLE_SIZE; + + _translation_table_end = .; + } + + .sections_list (NOLOAD) : + { + _sections_list_start = .; + + . = . + SECTIONS_LIST_SIZE; + + _sections_list_end = .; + } + + . = ALIGN(1 << 20); + . = . + MMU_SECTION_SIZE; + + .stack (NOLOAD) : + { + _stack_start = .; + + _fiq_stack_start = .; + + . = . + (1 << 18); + + _fiq_stack_top = .; + + _irq_stack_start = .; + + . = . + (1 << 18); + + _irq_stack_top = .; + + _supervisor_stack_start = .; + + . = . + (1 << 19); + + _supervisor_stack_top = .; + + _stack_end = .; + } +} diff --git a/src/boot/loader_stage1.S b/src/boot/loader_stage1.S new file mode 100644 index 0000000..69d78c5 --- /dev/null +++ b/src/boot/loader_stage1.S @@ -0,0 +1,55 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the loader. + * + * Loader copies it's embedded stage 2 to address 0x4000 + * and jumps to it. Registers r0 - r2 are arguments for the kernel + * and should be left intact. + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still occur + // when the main core overwrites this part of memory + wfe + +proceed: + // copy stage2 of the loader to address 0x4000 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0x4000 + + // load blob size to r5 + mov r5, #(stage2_end - stage2_start) + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Initialize the stack + // _stack_top is defined in loader_stage1_linker.ld + ldr sp, =_stack_top + + // Call stage2 of the loader (branch to 0x4000) + bx r4 + +.align 4 +stage2_start: + .incbin "loader_stage2.img" +stage2_end: diff --git a/src/boot/loader_stage1_linker.ld b/src/boot/loader_stage1_linker.ld new file mode 100644 index 0000000..711fcbf --- /dev/null +++ b/src/boot/loader_stage1_linker.ld @@ -0,0 +1,16 @@ +ENTRY(_boot) + +SECTIONS +{ + /* see linker.ld for details */ + . = 0x2000000; + + __start = .; + loader_stage1 : + { + KEEP(loader_stage1.o) + } + __end = .; + + _stack_top = 0x8000; +} diff --git a/src/boot/loader_stage2.c b/src/boot/loader_stage2.c new file mode 100644 index 0000000..fc3ae1c --- /dev/null +++ b/src/boot/loader_stage2.c @@ -0,0 +1,33 @@ +#include +#include +#include "uart.h" +#include "io.h" +#include "global.h" + +void *const kernel_load_addr = ((void*) 0x8000); + +void _stage2_main(uint32_t r0, uint32_t r1, uint32_t atags) +{ + uart_init(); + + // get kernel size via uart (little endian) + uint32_t b0, b1, b2, b3; + + b0 = getchar(); + b1 = getchar(); + b2 = getchar(); + b3 = getchar(); + + uint32_t kernel_size = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + + // load kernel at kernel_load_addr + char *dst = kernel_load_addr, *end = dst + kernel_size; + + while (dst < end) + *(dst++) = getchar(); + + // jump to kernel + ((void(*)(uint32_t, uint32_t, uint32_t)) kernel_load_addr) + (r0, r1, atags); +} + diff --git a/src/boot/loader_stage2_linker.ld b/src/boot/loader_stage2_linker.ld new file mode 100644 index 0000000..33e79e9 --- /dev/null +++ b/src/boot/loader_stage2_linker.ld @@ -0,0 +1,16 @@ +ENTRY(_stage2_main) + +SECTIONS +{ + /* see loader_stage1.S for details */ + . = 0x4000; + + __start = .; + loader_stage2 : + { + KEEP(loader_stage2.o(.text)) + loader_stage2.o + uart.o + } + __end = .; +} diff --git a/src/boot/psr.h b/src/boot/psr.h new file mode 100644 index 0000000..f300a7a --- /dev/null +++ b/src/boot/psr.h @@ -0,0 +1,88 @@ +#ifndef PSR_H +#define PSR_H + +#include + +enum execution_mode { + MODE_USER = 0b10000, + MODE_FIQ = 0b10001, + MODE_IRQ = 0b10010, + MODE_SUPERVISOR = 0b10011, + MODE_MONITOR = 0b10110, + MODE_ABORT = 0b10111, + MODE_HYPERVISOR = 0b11010, + MODE_UNDEFINED = 0b11011, + MODE_SYSTEM = 0b11111, +}; + +typedef union +{ + uint32_t raw; + struct + { + uint32_t M_4_0 : 5; // bits 4:0 + uint32_t T : 1; // bit 5 + uint32_t F : 1; // bit 6 + uint32_t I : 1; // bit 7 + uint32_t A : 1; // bit 8 + uint32_t E : 1; // bit 9 + uint32_t IT_7_2 : 6; // bits 15:10 + uint32_t GE_3_0 : 4; // bits 19:16 + uint32_t Bits_23_20 : 4; // bits 23:20 + uint32_t J : 1; // bit 24 + uint32_t IT_1_0 : 2; // bits 26:25 + uint32_t Q : 1; // bit 27 + uint32_t V : 1; // bit 28 + uint32_t C : 1; // bit 29 + uint32_t Z : 1; // bit 30 + uint32_t N : 1; // bit 31 +#define PSR_MODE_4_0 M_4_0 +#define PSR_THUMB_BIT T +#define PSR_FIQ_MASKK_BIT F +#define PSR_IRQ_MASK_BIT I +#define PSR_ASYNC_ABORT_MASK_BIT A +#define PSR_ENDIANNESS_BIT E +#define PSR_IF_THEN_STATE_7_2 IT_7_2 +#define PSR_GREATER_THAN_OR_EQUAL_FLAGS GE_3_0 + // bits 23:20 are reserved +#define PSR_JAZELLE_BIT J +#define PSR_IF_THEN_STATE_1_0 IT_1_0 +#define PSR_CUMULATIVE_SATURATION_BIT Q +#define PSR_OVERFLOW_CONDITION_BIT V +#define PSR_CARRY_CONDITION_BIT C +#define PSR_ZERO_CONDITION_BIT Z +#define PSR_NEGATIVE_CONDITION_BIT N + } fields; +} PSR_t; + +inline static PSR_t read_CPSR(void) +{ + PSR_t CPSR; + // get content of current program status register + asm("mrs %0, cpsr" : "=r" (CPSR.raw) :: "memory"); + + return CPSR; +} + +inline static void write_CPSR(PSR_t CPSR) +{ + // set content of current program status register + asm("msr cpsr, %0" :: "r" (CPSR.raw) : "memory"); +} + +inline static PSR_t read_SPSR(void) +{ + PSR_t SPSR; + // get content of saved program status register + asm("mrs %0, spsr" : "=r" (SPSR.raw) :: "memory"); + + return SPSR; +} + +inline static void write_SPSR(PSR_t SPSR) +{ + // set content of saved program status register + asm("msr spsr, %0" :: "r" (SPSR.raw)); +} + +#endif // PSR_H diff --git a/src/boot/setup.c b/src/boot/setup.c new file mode 100644 index 0000000..a96b19e --- /dev/null +++ b/src/boot/setup.c @@ -0,0 +1,116 @@ +#include "uart.h" +#include "utils/io.h" +#include "demo_functionality.h" +#include "paging.h" +#include "atags.h" +// for POWER_OF_2() macro... perhaps the macro should be moved +#include "memory.h" +#include "armclock.h" +#include "scheduler.h" + +void setup(uint32_t r0, uint32_t machine_type, + struct atag_header *atags) +{ + uart_init(); + + // When we attach screen session after loading kernel with socat + // we miss kernel's greeting... So we'll make the kernel wait for + // one char we're going to send from within screen + getchar(); + + puts("Hello, kernel World!"); + + prints("ARM machine type: 0x"); printhext(machine_type); puts(""); + + uint32_t memory_size = 0; + + // value 3 introduced by stage1 code means no atags was found + if (r0 == 3) + { + puts ("No ATAGS was found!"); + } + else + { + prints("ATAGS copied to 0x"); + printhex((uint32_t) atags); puts(""); + + puts("__ ATAGS contents __"); + + print_atags(atags); + + puts("__ end of ATAGS contents __"); + + memory_size = find_memory_size(atags); + } + + if (memory_size) + { + char *unit; + uint32_t size_in_unit; + + if (memory_size % POWER_OF_2(10)) + { + unit = "B"; + size_in_unit = memory_size; + } + else if (memory_size % POWER_OF_2(20)) + { + unit = "KB"; + size_in_unit = memory_size / POWER_OF_2(10); + } + else if (memory_size % POWER_OF_2(30)) + { + unit = "MB"; + size_in_unit = memory_size / POWER_OF_2(20); + } + else + { + unit = "GB"; + size_in_unit = memory_size / POWER_OF_2(30); + } + + prints ("memory available: "); + printdect (size_in_unit); + puts (unit); + } + else + { + // Most Pis have more, but qemu might give us little + puts("Couldn't determine available memory - assuming 192MB"); + memory_size = 192 * POWER_OF_2(20); + } + + // assume we need at least one section for PL0 + if (memory_size < PRIVILEGED_MEMORY_END + SECTION_SIZE) + { + puts("Not enough memory to continue"); + while (1); + } + + // prints some info + demo_paging_support(); + + // prints some info + demo_current_mode(); + + setup_pager_structures(memory_size); + + // prints some info and sets upp translation table, turns on MMU + setup_flat_map(); + + puts("Initializing clock"); + // sets some general settings for arm timer + armclk_init(); + + puts("Setting up scheduler's internal structures"); + setup_scheduler_structures(); + + puts("Switching uart to use irqs"); + + // note, that kernel's puts() is still going to use blocking io + uart_irq_enable(); + + // prints some info and sets up a section for PL0 code, loads a blob + // there, then runs scheduler... never, ever, ever returns + demo_setup_PL0(); +} -- cgit v1.2.3