aboutsummaryrefslogtreecommitdiff
path: root/src/boot
diff options
context:
space:
mode:
Diffstat (limited to 'src/boot')
-rw-r--r--src/boot/kernel_stage1.S168
-rw-r--r--src/boot/kernel_stage1.ld27
-rw-r--r--src/boot/kernel_stage2.ld80
-rw-r--r--src/boot/loader_stage1.S55
-rw-r--r--src/boot/loader_stage1_linker.ld16
-rw-r--r--src/boot/loader_stage2.c33
-rw-r--r--src/boot/loader_stage2_linker.ld16
-rw-r--r--src/boot/psr.h88
-rw-r--r--src/boot/setup.c116
9 files changed, 599 insertions, 0 deletions
diff --git a/src/boot/kernel_stage1.S b/src/boot/kernel_stage1.S
new file mode 100644
index 0000000..e770513
--- /dev/null
+++ b/src/boot/kernel_stage1.S
@@ -0,0 +1,168 @@
+/* arm mode, cortex-a7 compatibility
+ *
+ * _boot is entry point for the kernel.
+ *
+ * Kernel copies it's embedded stage 2 to address 0x0 and jumps to
+ * it (to the reset handler). Registers r0 - r2 are arguments for
+ * the kernel, but we're not using them for now.
+ *
+ * This file is based on (and almost identical with) loader_stage1.S
+ */
+
+.global _boot
+_boot:
+ // Only let the first core execute
+ mrc p15, 0, r3, c0, c0, 5
+ and r3, r3, #3
+ cmp r3, #0
+ beq proceed
+ // this is a kind of blef - races can theoretically still
+ // occur when the main core overwrites this part of memory
+ wfe
+
+ // we'll use the size of stage1 to determine where we have free
+ // space after it. We'll then copy our atags/fdt there, so
+ // it doesn't get overwritten by stage2 we deploy at 0x0
+atags_magic:
+ .word 0x54410001
+
+proceed:
+ // load the second word of structure passed to us through r2;
+ // if it's atags, it's second word should be the magic number
+ // Btw, location of ATAGS is always 0x100.
+ ldr r3, [r2, #4]
+ adr r4, atags_magic
+ ldr r4, [r4]
+
+ // compare second word of assumed atags with magic number
+ // to see, if it's really atags and not sth else (i.e. fdt)
+ cmp r3, r4
+
+ // normally at start r0 contains value 0;
+ // value 3 in r0 would tell stage2 code, we found no atags :(
+ movne r0, #3
+ bne stage2_blob_copying
+
+ // if atags was found, copying of it takes place here
+
+ // the following loop finds, where atags ends
+ // r3 shall point to currently looked-at tag
+ mov r3, r2
+
+find_end_of_atags_loop:
+ // load first word of tag header to r4 (it contains tag size)
+ ldr r4, [r3]
+ // make r3 point at the next tag (by adding 4*tag_size to it)
+ add r3, r4, lsl #2
+
+ // load second word of tag header to r5 (it contains tag type)
+ ldr r5, [r3, #4]
+
+ // if tag value is 0, it is the last tag
+ cmp r5, #0
+ bne find_end_of_atags_loop
+
+ add r3, #8 // make r3 point at the end of last tag
+ sub r3, r2 // get atags size in r3
+
+ // at this pont r2 and r3 point at start and size of atags,
+ // respectively; now we'll compute, where we're going to have
+ // free space to put atags in; we want to put atags either
+ // right after our blob or, if if it doesn't fit between
+ // blob end and the address stage1 is loaded at, after stage1
+
+ // get blob size to r5
+ adr r5, blob_size
+ ldr r5, [r5]
+
+ // we could only copy atags to a 4-aligned address
+ mov r6, #4
+ bl aling_r5_to_r6
+
+ // compute where atags copied right after blob would end
+ add r6, r5, r3
+ // we can only overwrite stuff before the copying loop
+ adr r7, copy_atags_loop
+ cmp r6, r7
+ ble copy_atags
+
+ // atags wouldn't fit - use memory after stage1 as destination
+ adr r5, _boot
+ adr r6, stage1_size
+ ldr r6, [r6]
+ add r5, r6
+ mov r6, #4
+ bl aling_r5_to_r6
+
+copy_atags:
+ // now copy atags (r2 - atags start; r3 - atags size;
+ // r5 - destination; r4 - iterator; r6 - buffor)
+ mov r4, #0
+
+copy_atags_loop:
+ ldr r6, [r2, r4]
+ str r6, [r5, r4]
+ add r4, #4
+ cmp r4, r3
+ blo copy_atags_loop
+
+ mov r2, r5 // place the new atags address in r2
+ b stage2_blob_copying // atags stuff done; proceed
+
+// mini-function, that does what the label says; clobbers r7
+aling_r5_to_r6:
+ sub r5, #1
+ sub r7, r6, #1
+ bic r5, r7
+ add r5, r6
+ mov pc, lr
+
+
+stage2_blob_copying: // copy stage2 of the kernel to address 0x0
+
+ // first, load address of stage2_start to r3 (a PIC way)
+ adr r3, stage2_start
+
+ // load destination address for stage2 code to r4
+ mov r4, #0
+
+ // load blob size to r5
+ // The size might get too big for an immediate value, so
+ // we load it from memory.
+ adr r5, blob_size
+ ldr r5, [r5]
+
+ // r6 is the counter - counts the bytes copied
+ mov r6, #0
+
+ // This initial piece of code might get overwritten when we
+ // copy stage2, so the actual copying loop shall be after
+ // stage2 blob. We want this asm code to be PIC, so we're
+ // computing address of stage2_end into r7.
+ add r7, r3, r5
+ bx r7
+
+blob_size:
+ .word stage2_end - stage2_start
+stage1_size:
+ .word stage1_end - _boot
+
+.align 4
+stage2_start:
+ .incbin "kernel_stage2.img"
+stage2_end:
+
+ // each word of the blob is loaded to r7 and stored
+ // from r7 to it's destination in a loop
+loop:
+ ldr r7, [r3, r6]
+ str r7, [r4, r6]
+ add r6, r6, #4
+ cmp r6, r5
+ blo loop
+
+ // Call stage2 of the kernel (branch to 0x0,
+ // which is the reset handler).
+ bx r4
+
+stage1_end:
diff --git a/src/boot/kernel_stage1.ld b/src/boot/kernel_stage1.ld
new file mode 100644
index 0000000..3130634
--- /dev/null
+++ b/src/boot/kernel_stage1.ld
@@ -0,0 +1,27 @@
+ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */
+
+/* Code starts at 0x8000 - that's where RPis in 32-bit mode load
+ * kernel at. My experiments do, however, show, that qemu emulating
+ * RPi2 loads the kernel at 0x10000! (took some pain to find out).
+ * rpi-open-firmware, on the other hand, loads kernel at 0x2000000!
+ * This is not really a problem, since:
+ * 1. We can use our bootloader to load the kernel at 0x8000
+ * 2. We've rewritten stage 1 of both bootloader and kernel in
+ * careful assembly, so that they should work regardless of
+ * where they are loaded.
+ * 3. In qemu, we can load kernel.elf instead of raw binary
+ * (qemu will do the right thing then)
+ */
+
+SECTIONS
+{
+
+ . = 0x8000;
+
+ __start = .;
+ .kernel_stage1 :
+ {
+ KEEP(kernel_stage1.o)
+ }
+ __end = .;
+}
diff --git a/src/boot/kernel_stage2.ld b/src/boot/kernel_stage2.ld
new file mode 100644
index 0000000..9411ca2
--- /dev/null
+++ b/src/boot/kernel_stage2.ld
@@ -0,0 +1,80 @@
+/* This sesond stage of the kernel is run from address 0x0 */
+
+TRANSLATION_TABLE_SIZE = 4096 * 4;
+SECTIONS_LIST_SIZE = 4096 * 8;
+MMU_SECTION_SIZE = 1 << 20;
+
+SECTIONS
+{
+
+ . = 0x0;
+
+ __start = .;
+ .interrupt_vector :
+ {
+ KEEP(interrupt_vector.o)
+ }
+ . = ALIGN(4);
+ .embedded_ramfs :
+ {
+ ramfs_embeddable.o
+ }
+ .rest_of_kernel :
+ {
+ *(.text)
+ *(.data)
+ *(.rodata)
+ *(.bss)
+ *(/COMMON/)
+ *(*)
+ }
+ __end = .;
+
+ . = ALIGN(1 << 14);
+
+ .translation_table (NOLOAD) :
+ {
+ _translation_table_start = .;
+
+ . = . + TRANSLATION_TABLE_SIZE;
+
+ _translation_table_end = .;
+ }
+
+ .sections_list (NOLOAD) :
+ {
+ _sections_list_start = .;
+
+ . = . + SECTIONS_LIST_SIZE;
+
+ _sections_list_end = .;
+ }
+
+ . = ALIGN(1 << 20);
+ . = . + MMU_SECTION_SIZE;
+
+ .stack (NOLOAD) :
+ {
+ _stack_start = .;
+
+ _fiq_stack_start = .;
+
+ . = . + (1 << 18);
+
+ _fiq_stack_top = .;
+
+ _irq_stack_start = .;
+
+ . = . + (1 << 18);
+
+ _irq_stack_top = .;
+
+ _supervisor_stack_start = .;
+
+ . = . + (1 << 19);
+
+ _supervisor_stack_top = .;
+
+ _stack_end = .;
+ }
+}
diff --git a/src/boot/loader_stage1.S b/src/boot/loader_stage1.S
new file mode 100644
index 0000000..69d78c5
--- /dev/null
+++ b/src/boot/loader_stage1.S
@@ -0,0 +1,55 @@
+/* arm mode, cortex-a7 compatibility
+ *
+ * _boot is entry point for the loader.
+ *
+ * Loader copies it's embedded stage 2 to address 0x4000
+ * and jumps to it. Registers r0 - r2 are arguments for the kernel
+ * and should be left intact.
+ */
+
+.global _boot
+_boot:
+ // Only let the first core execute
+ mrc p15, 0, r3, c0, c0, 5
+ and r3, r3, #3
+ cmp r3, #0
+ beq proceed
+ // this is a kind of blef - races can theoretically still occur
+ // when the main core overwrites this part of memory
+ wfe
+
+proceed:
+ // copy stage2 of the loader to address 0x4000
+
+ // first, load address of stage2_start to r3 (a PIC way)
+ adr r3, stage2_start
+
+ // load destination address for stage2 code to r4
+ mov r4, #0x4000
+
+ // load blob size to r5
+ mov r5, #(stage2_end - stage2_start)
+
+ // r6 is the counter - counts the bytes copied
+ mov r6, #0
+
+ // each word of the blob is loaded to r7 and stored
+ // from r7 to it's destination in a loop
+loop:
+ ldr r7, [r3, r6]
+ str r7, [r4, r6]
+ add r6, r6, #4
+ cmp r6, r5
+ blo loop
+
+ // Initialize the stack
+ // _stack_top is defined in loader_stage1_linker.ld
+ ldr sp, =_stack_top
+
+ // Call stage2 of the loader (branch to 0x4000)
+ bx r4
+
+.align 4
+stage2_start:
+ .incbin "loader_stage2.img"
+stage2_end:
diff --git a/src/boot/loader_stage1_linker.ld b/src/boot/loader_stage1_linker.ld
new file mode 100644
index 0000000..711fcbf
--- /dev/null
+++ b/src/boot/loader_stage1_linker.ld
@@ -0,0 +1,16 @@
+ENTRY(_boot)
+
+SECTIONS
+{
+ /* see linker.ld for details */
+ . = 0x2000000;
+
+ __start = .;
+ loader_stage1 :
+ {
+ KEEP(loader_stage1.o)
+ }
+ __end = .;
+
+ _stack_top = 0x8000;
+}
diff --git a/src/boot/loader_stage2.c b/src/boot/loader_stage2.c
new file mode 100644
index 0000000..fc3ae1c
--- /dev/null
+++ b/src/boot/loader_stage2.c
@@ -0,0 +1,33 @@
+#include <stddef.h>
+#include <stdint.h>
+#include "uart.h"
+#include "io.h"
+#include "global.h"
+
+void *const kernel_load_addr = ((void*) 0x8000);
+
+void _stage2_main(uint32_t r0, uint32_t r1, uint32_t atags)
+{
+ uart_init();
+
+ // get kernel size via uart (little endian)
+ uint32_t b0, b1, b2, b3;
+
+ b0 = getchar();
+ b1 = getchar();
+ b2 = getchar();
+ b3 = getchar();
+
+ uint32_t kernel_size = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+
+ // load kernel at kernel_load_addr
+ char *dst = kernel_load_addr, *end = dst + kernel_size;
+
+ while (dst < end)
+ *(dst++) = getchar();
+
+ // jump to kernel
+ ((void(*)(uint32_t, uint32_t, uint32_t)) kernel_load_addr)
+ (r0, r1, atags);
+}
+
diff --git a/src/boot/loader_stage2_linker.ld b/src/boot/loader_stage2_linker.ld
new file mode 100644
index 0000000..33e79e9
--- /dev/null
+++ b/src/boot/loader_stage2_linker.ld
@@ -0,0 +1,16 @@
+ENTRY(_stage2_main)
+
+SECTIONS
+{
+ /* see loader_stage1.S for details */
+ . = 0x4000;
+
+ __start = .;
+ loader_stage2 :
+ {
+ KEEP(loader_stage2.o(.text))
+ loader_stage2.o
+ uart.o
+ }
+ __end = .;
+}
diff --git a/src/boot/psr.h b/src/boot/psr.h
new file mode 100644
index 0000000..f300a7a
--- /dev/null
+++ b/src/boot/psr.h
@@ -0,0 +1,88 @@
+#ifndef PSR_H
+#define PSR_H
+
+#include <stdint.h>
+
+enum execution_mode {
+ MODE_USER = 0b10000,
+ MODE_FIQ = 0b10001,
+ MODE_IRQ = 0b10010,
+ MODE_SUPERVISOR = 0b10011,
+ MODE_MONITOR = 0b10110,
+ MODE_ABORT = 0b10111,
+ MODE_HYPERVISOR = 0b11010,
+ MODE_UNDEFINED = 0b11011,
+ MODE_SYSTEM = 0b11111,
+};
+
+typedef union
+{
+ uint32_t raw;
+ struct
+ {
+ uint32_t M_4_0 : 5; // bits 4:0
+ uint32_t T : 1; // bit 5
+ uint32_t F : 1; // bit 6
+ uint32_t I : 1; // bit 7
+ uint32_t A : 1; // bit 8
+ uint32_t E : 1; // bit 9
+ uint32_t IT_7_2 : 6; // bits 15:10
+ uint32_t GE_3_0 : 4; // bits 19:16
+ uint32_t Bits_23_20 : 4; // bits 23:20
+ uint32_t J : 1; // bit 24
+ uint32_t IT_1_0 : 2; // bits 26:25
+ uint32_t Q : 1; // bit 27
+ uint32_t V : 1; // bit 28
+ uint32_t C : 1; // bit 29
+ uint32_t Z : 1; // bit 30
+ uint32_t N : 1; // bit 31
+#define PSR_MODE_4_0 M_4_0
+#define PSR_THUMB_BIT T
+#define PSR_FIQ_MASKK_BIT F
+#define PSR_IRQ_MASK_BIT I
+#define PSR_ASYNC_ABORT_MASK_BIT A
+#define PSR_ENDIANNESS_BIT E
+#define PSR_IF_THEN_STATE_7_2 IT_7_2
+#define PSR_GREATER_THAN_OR_EQUAL_FLAGS GE_3_0
+ // bits 23:20 are reserved
+#define PSR_JAZELLE_BIT J
+#define PSR_IF_THEN_STATE_1_0 IT_1_0
+#define PSR_CUMULATIVE_SATURATION_BIT Q
+#define PSR_OVERFLOW_CONDITION_BIT V
+#define PSR_CARRY_CONDITION_BIT C
+#define PSR_ZERO_CONDITION_BIT Z
+#define PSR_NEGATIVE_CONDITION_BIT N
+ } fields;
+} PSR_t;
+
+inline static PSR_t read_CPSR(void)
+{
+ PSR_t CPSR;
+ // get content of current program status register
+ asm("mrs %0, cpsr" : "=r" (CPSR.raw) :: "memory");
+
+ return CPSR;
+}
+
+inline static void write_CPSR(PSR_t CPSR)
+{
+ // set content of current program status register
+ asm("msr cpsr, %0" :: "r" (CPSR.raw) : "memory");
+}
+
+inline static PSR_t read_SPSR(void)
+{
+ PSR_t SPSR;
+ // get content of saved program status register
+ asm("mrs %0, spsr" : "=r" (SPSR.raw) :: "memory");
+
+ return SPSR;
+}
+
+inline static void write_SPSR(PSR_t SPSR)
+{
+ // set content of saved program status register
+ asm("msr spsr, %0" :: "r" (SPSR.raw));
+}
+
+#endif // PSR_H
diff --git a/src/boot/setup.c b/src/boot/setup.c
new file mode 100644
index 0000000..a96b19e
--- /dev/null
+++ b/src/boot/setup.c
@@ -0,0 +1,116 @@
+#include "uart.h"
+#include "utils/io.h"
+#include "demo_functionality.h"
+#include "paging.h"
+#include "atags.h"
+// for POWER_OF_2() macro... perhaps the macro should be moved
+#include "memory.h"
+#include "armclock.h"
+#include "scheduler.h"
+
+void setup(uint32_t r0, uint32_t machine_type,
+ struct atag_header *atags)
+{
+ uart_init();
+
+ // When we attach screen session after loading kernel with socat
+ // we miss kernel's greeting... So we'll make the kernel wait for
+ // one char we're going to send from within screen
+ getchar();
+
+ puts("Hello, kernel World!");
+
+ prints("ARM machine type: 0x"); printhext(machine_type); puts("");
+
+ uint32_t memory_size = 0;
+
+ // value 3 introduced by stage1 code means no atags was found
+ if (r0 == 3)
+ {
+ puts ("No ATAGS was found!");
+ }
+ else
+ {
+ prints("ATAGS copied to 0x");
+ printhex((uint32_t) atags); puts("");
+
+ puts("__ ATAGS contents __");
+
+ print_atags(atags);
+
+ puts("__ end of ATAGS contents __");
+
+ memory_size = find_memory_size(atags);
+ }
+
+ if (memory_size)
+ {
+ char *unit;
+ uint32_t size_in_unit;
+
+ if (memory_size % POWER_OF_2(10))
+ {
+ unit = "B";
+ size_in_unit = memory_size;
+ }
+ else if (memory_size % POWER_OF_2(20))
+ {
+ unit = "KB";
+ size_in_unit = memory_size / POWER_OF_2(10);
+ }
+ else if (memory_size % POWER_OF_2(30))
+ {
+ unit = "MB";
+ size_in_unit = memory_size / POWER_OF_2(20);
+ }
+ else
+ {
+ unit = "GB";
+ size_in_unit = memory_size / POWER_OF_2(30);
+ }
+
+ prints ("memory available: ");
+ printdect (size_in_unit);
+ puts (unit);
+ }
+ else
+ {
+ // Most Pis have more, but qemu might give us little
+ puts("Couldn't determine available memory - assuming 192MB");
+ memory_size = 192 * POWER_OF_2(20);
+ }
+
+ // assume we need at least one section for PL0
+ if (memory_size < PRIVILEGED_MEMORY_END + SECTION_SIZE)
+ {
+ puts("Not enough memory to continue");
+ while (1);
+ }
+
+ // prints some info
+ demo_paging_support();
+
+ // prints some info
+ demo_current_mode();
+
+ setup_pager_structures(memory_size);
+
+ // prints some info and sets upp translation table, turns on MMU
+ setup_flat_map();
+
+ puts("Initializing clock");
+ // sets some general settings for arm timer
+ armclk_init();
+
+ puts("Setting up scheduler's internal structures");
+ setup_scheduler_structures();
+
+ puts("Switching uart to use irqs");
+
+ // note, that kernel's puts() is still going to use blocking io
+ uart_irq_enable();
+
+ // prints some info and sets up a section for PL0 code, loads a blob
+ // there, then runs scheduler... never, ever, ever returns
+ demo_setup_PL0();
+}