aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <kwojtus@protonmail.com>2019-12-28 21:54:42 +0100
committerWojtek Kosior <kwojtus@protonmail.com>2019-12-28 21:54:42 +0100
commit700f4c412d42c9b9811269045c0e363a0331bba9 (patch)
tree260feed1ca657843d993c1ae73e93f25a17cede1
parent80c9af17330ac442a4c3d6d55b4041cbe923e9b4 (diff)
downloadrpi-MMU-example-700f4c412d42c9b9811269045c0e363a0331bba9.tar.gz
rpi-MMU-example-700f4c412d42c9b9811269045c0e363a0331bba9.zip
split kernel into 2 stages; second stage gets copied to 0x0 and runs from there
-rw-r--r--Makefile29
-rw-r--r--boot.S27
-rw-r--r--demo_functionality.c26
-rw-r--r--interrupt_vector.S53
-rw-r--r--interrupts.c55
-rw-r--r--kernel_stage1.S67
-rw-r--r--kernel_stage1.ld27
-rw-r--r--kernel_stage2.ld52
-rw-r--r--linker.ld47
-rw-r--r--memory.h43
-rw-r--r--setup.c (renamed from kernel.c)11
11 files changed, 288 insertions, 149 deletions
diff --git a/Makefile b/Makefile
index 29efa86..68368a5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,10 @@
-CFLAGS=-mcpu=cortex-a7 -ffreestanding -std=gnu11 -Wall -Wextra -O2 -fPIC -I.
+CFLAGS=-mcpu=cortex-a7 -ffreestanding -std=gnu11 -Wall -Wextra -O2 -I.
ELFFLAGS=-nostdlib -lgcc
ARM_OBJECTS=kernel.o paging.o demo_functionality.o PL0_test.o uart.o loader_stage1.o loader_stage2.o
+KERNEL_STAGE2_OBJECTS=setup.o interrupt_vector.o interrupts.o uart.o demo_functionality.o paging.o ramfs_embeddable.o ramfs.o
+
RAMFS_FILES=PL_0_test.img
all : kernel.img
@@ -10,20 +12,32 @@ all : kernel.img
%.o : %.c
arm-none-eabi-gcc $(CFLAGS) -c $^ -o $@
-%.o : %.S
- arm-none-eabi-as -mcpu=cortex-a7 $^ -o $@
-
%.img : %.elf
arm-none-eabi-objcopy $^ -O binary $@
+%.o : %.S
+ arm-none-eabi-as -mcpu=cortex-a7 $^ -o $@
+
%_embeddable.o : %.img
arm-none-eabi-objcopy -I binary -O elf32-littlearm -B arm --rename-section .data=.rodata $^ $@
PL_0_test.elf : PL0_test.o uart.o
arm-none-eabi-gcc -T PL0_test.ld -o $@ $(ELFFLAGS) $^
-kernel.elf : boot.o kernel.o uart.o demo_functionality.o paging.o interrupt_vector.o interrupts.o ramfs_embeddable.o ramfs.o
- arm-none-eabi-gcc -T linker.ld -o $@ $(ELFFLAGS) $^
+kernel_stage1.o : kernel_stage1.S kernel_stage2.img
+ arm-none-eabi-as -mcpu=cortex-a7 $< -o $@
+
+kernel.elf : kernel_stage1.ld kernel_stage1.o
+ arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) kernel_stage1.o
+
+kernel.img : kernel.elf
+ arm-none-eabi-objcopy $^ -O binary $@
+
+kernel_stage2.elf : kernel_stage2.ld $(KERNEL_STAGE2_OBJECTS)
+ arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) $(KERNEL_STAGE2_OBJECTS)
+
+#kernel.elf : boot.o kernel.o uart.o demo_functionality.o paging.o interrupt_vector.o interrupts.o ramfs_embeddable.o ramfs.o
+# arm-none-eabi-gcc -T linker.ld -o $@ $(ELFFLAGS) $^
loader_stage2.elf : loader_stage2.o uart.o
arm-none-eabi-gcc -T loader_stage2_linker.ld -o $@ $(ELFFLAGS) $^
@@ -42,6 +56,9 @@ loader.img : loader.elf
qemu-elf : kernel.elf
qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $^
+qemu-img : kernel.img
+ qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $^
+
qemu-bin : loader.img kernel.img pipe_image
./pipe_image --stdout | qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $<
diff --git a/boot.S b/boot.S
deleted file mode 100644
index 593ed11..0000000
--- a/boot.S
+++ /dev/null
@@ -1,27 +0,0 @@
-// armv7 mode
-
-// Entry point for the kernel.
-// r15 -> should begin execution at 0x8000.
-// r0 -> 0x00000000
-// r1 -> 0x00000C42
-// r2 -> 0x00000100 - start of ATAGS
-// preserve these registers as argument for kernel_main
-
-.global _boot // make entry point label global
-_boot:
- // Only let the first core execute
- mrc p15, 0, r3, c0, c0, 5
- and r3, r3, #3
- cmp r3, #0
- beq proceed
- // this is a kind of blef - races can theoretically still occur
- // when the main core overwrites this part of memory
- wfe
-
-proceed:
- // Initialize the stack (_stack_top is defined in linker.ld)
- ldr sp, =_stack_top
-
- // Call kernel_main
- ldr r3, =kernel_main
- bx r3
diff --git a/demo_functionality.c b/demo_functionality.c
index 4b002d6..420639b 100644
--- a/demo_functionality.c
+++ b/demo_functionality.c
@@ -53,7 +53,7 @@ void demo_current_mode(void)
uart_puts(mode_name);
}
-#define TRANSLATION_TABLE \
+#define TRANSLATION_TABLE \
((short_section_descriptor_t volatile*) TRANSLATION_TABLE_BASE)
extern char
@@ -144,29 +144,7 @@ void demo_go_unprivileged(void)
write_SPSR(new_SPSR);
uart_puts("All ready, jumping to PL0 code\n\r");
-
+
asm volatile("ldm %0, {r0 - r15} ^" ::
"r" (PL0_regs));
}
-
-extern char
- __interrupts_start,
- __interrupts_end,
- __interrupts_size;
-
-extern void (*volatile system_reentry_point)(void);
-
-void system_reentry(void)
-{
- uart_puts("re-entered system");
- while(1);
-}
-
-void demo_setup_interrupts(void)
-{
- system_reentry_point = system_reentry;
-
- for (size_t i = 0; i < (size_t) &__interrupts_size; i++)
- ((volatile char*) 0)[i] =
- (&__interrupts_start)[i];
-}
diff --git a/interrupt_vector.S b/interrupt_vector.S
index d20bf6d..6037b7c 100644
--- a/interrupt_vector.S
+++ b/interrupt_vector.S
@@ -1,22 +1,45 @@
-.section ".interrupts.vector"
-
-.global abort_handler
-.local generic_handler
-.global _interrupt_vectors
_interrupt_vectors:
- b generic_handler
- b generic_handler
- b generic_handler
+ b reset_handler_caller
+ b undef_handler_caller
+ b svc_handler_caller
b abort_handler_caller
b abort_handler_caller
- b generic_handler
- b generic_handler
-
-.section ".interrupts.text"
+ b generic_handler_caller
+ b irq_handler_caller
+ b fiq_handler_caller
+
+reset_handler_caller:
+ ldr sp, =_stack_top
+ ldr r5, =reset_handler
+ bx r5
+
+undef_handler_caller:
+ ldr sp, =_stack_top
+ ldr r5, =undefined_instruction_vector
+ bx r5
-generic_handler:
- b generic_handler
+svc_handler_caller:
+ ldr sp, =_stack_top
+ ldr r5, =supervisor_call_handler
+ bx r5
+
abort_handler_caller:
- mov sp, #0x8000
+ ldr sp, =_stack_top
ldr r5, =abort_handler
bx r5
+
+generic_handler_caller:
+ ldr sp, =_stack_top
+ ldr r5, =generic_handler
+ bx r5
+
+irq_handler_caller:
+ ldr sp, =_stack_top
+ ldr r5, =irq_handler
+ bx r5
+
+fiq_handler_caller:
+ ldr sp, =_stack_top
+ ldr r5, =fiq_handler
+ bx r5
+
diff --git a/interrupts.c b/interrupts.c
index 6952f89..1b0590a 100644
--- a/interrupts.c
+++ b/interrupts.c
@@ -1,10 +1,20 @@
#include "uart.h"
-/**
- @brief The undefined instruction interrupt handler
- If an undefined instruction is encountered, the CPU will start
- executing this function. Just trap here as a debug solution.
-*/
+void setup(void);
+
+void reset_handler(void)
+{
+ static _Bool setup_done;
+
+ if (!setup_done)
+ setup();
+
+ setup_done = 1;
+
+ // TODO do something here
+ while(1);
+}
+
void
__attribute__((interrupt("UNDEF")))
__attribute__((section(".interrupts.text")))
@@ -17,13 +27,40 @@ undefined_instruction_vector(void)
}
}
-void __attribute__((section(".interrupts.data")))
-(*system_reentry_point) (void);
+void supervisor_call_handler(void)
+{
+ uart_puts("something svc happened\n\r");
+
+ while(1);
+}
void
__attribute__((interrupt("ABORT")))
-__attribute__((section(".interrupts.text")))
abort_handler(void)
{
- system_reentry_point();
+ uart_puts("re-entered system\n\r");
+
+ while(1);
+}
+
+void generic_handler(void)
+{
+ uart_puts("something weird happened\n\r");
+
+ while(1);
+}
+
+void irq_handler(void)
+{
+ uart_puts("irq happened\n\r");
+
+ while(1);
+}
+
+void fiq_handler(void)
+{
+ uart_puts("fiq happened\n\r");
+
+ while(1);
}
+
diff --git a/kernel_stage1.S b/kernel_stage1.S
new file mode 100644
index 0000000..1e0f614
--- /dev/null
+++ b/kernel_stage1.S
@@ -0,0 +1,67 @@
+/* arm mode, cortex-a7 compatibility
+ *
+ * _boot is entry point for the kernel.
+ *
+ * Kernel copies it's embedded stage 2 to address 0x0 and jumps to
+ * it (to the reset handler). Registers r0 - r2 are arguments for
+ * the kernel, but we're not using them for now.
+ *
+ * This file is based on (and almost identical with) loader_stage1.S
+ */
+
+.global _boot
+_boot:
+ // Only let the first core execute
+ mrc p15, 0, r3, c0, c0, 5
+ and r3, r3, #3
+ cmp r3, #0
+ beq proceed
+ // this is a kind of blef - races can theoretically still
+ // occur when the main core overwrites this part of memory
+ wfe
+
+proceed:
+ // copy stage2 of the kernel to address 0x0
+
+ // first, load address of stage2_start to r3 (a PIC way)
+ adr r3, stage2_start
+
+ // load destination address for stage2 code to r4
+ mov r4, #0
+
+ // load blob size to r5
+ // The size might get too big for an immediate value, so
+ // we load it from memory.
+ adr r5, blob_size
+ ldr r5, [r5]
+
+ // r6 is the counter - counts the bytes copied
+ mov r6, #0
+
+ // This initial piece of code might get overwritten when we
+ // copy stage2, so the actual copying loop shall be after
+ // stage2 blob. We want this asm code to be PIC, so we're
+ // computing address of stage2_end into r7.
+ add r7, r3, r5
+ bx r7
+
+blob_size:
+ .word stage2_end - stage2_start
+
+.align 4
+stage2_start:
+ .incbin "kernel_stage2.img"
+stage2_end:
+
+ // each word of the blob is loaded to r7 and stored
+ // from r7 to it's destination in a loop
+loop:
+ ldr r7, [r3, r6]
+ str r7, [r4, r6]
+ add r6, r6, #4
+ cmp r6, r5
+ blo loop
+
+ // Call stage2 of the kernel (branch to 0x0,
+ // which is the reset handler).
+ bx r4
diff --git a/kernel_stage1.ld b/kernel_stage1.ld
new file mode 100644
index 0000000..3130634
--- /dev/null
+++ b/kernel_stage1.ld
@@ -0,0 +1,27 @@
+ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */
+
+/* Code starts at 0x8000 - that's where RPis in 32-bit mode load
+ * kernel at. My experiments do, however, show, that qemu emulating
+ * RPi2 loads the kernel at 0x10000! (took some pain to find out).
+ * rpi-open-firmware, on the other hand, loads kernel at 0x2000000!
+ * This is not really a problem, since:
+ * 1. We can use our bootloader to load the kernel at 0x8000
+ * 2. We've rewritten stage 1 of both bootloader and kernel in
+ * careful assembly, so that they should work regardless of
+ * where they are loaded.
+ * 3. In qemu, we can load kernel.elf instead of raw binary
+ * (qemu will do the right thing then)
+ */
+
+SECTIONS
+{
+
+ . = 0x8000;
+
+ __start = .;
+ .kernel_stage1 :
+ {
+ KEEP(kernel_stage1.o)
+ }
+ __end = .;
+}
diff --git a/kernel_stage2.ld b/kernel_stage2.ld
new file mode 100644
index 0000000..d3a23bf
--- /dev/null
+++ b/kernel_stage2.ld
@@ -0,0 +1,52 @@
+/* This sesond stage of the kernel is run from address 0x0 */
+
+TRANSLATION_TABLE_SIZE = 4096 * 4;
+MMU_SECTION_SIZE = 1 << 20;
+
+SECTIONS
+{
+
+ . = 0x0;
+
+ __start = .;
+ .kernel_stage2 :
+ {
+ KEEP(interrupt_vector.o)
+ . = ALIGN(4);
+ ramfs_embeddable.o
+ (*)
+ }
+ __end = .;
+
+ . = ALIGN(1 << 14);
+
+ .translation_table (NOLOAD) :
+ {
+ _translation_table_start = .;
+
+ . = . + TRANSLATION_TABLE_SIZE;
+
+ _translation_table_end = .;
+ }
+
+ . = ALIGN(1 << 20);
+ . = . + MMU_SECTION_SIZE;
+
+ .stack (NOLOAD) :
+ {
+ _stack_start = .;
+
+ . = . + MMU_SECTION_SIZE;
+
+ _stack_top = .;
+ }
+
+ .unprivileged_memory (NOLOAD) :
+ {
+ _unprivileged_memory_start = .;
+
+ . = . + MMU_SECTION_SIZE;
+
+ _unprivileged_memory_end = .;
+ }
+}
diff --git a/linker.ld b/linker.ld
deleted file mode 100644
index 444bbf6..0000000
--- a/linker.ld
+++ /dev/null
@@ -1,47 +0,0 @@
-ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */
-
-SECTIONS
-{
- /* Starts at 0x8000 - that's where RPis in 32-bit mode load */
- /* kernel at. My experiments do, however, show, that qemu */
- /* emulating RPi2 loads the kernel at 0x10000! (took some pain */
- /* to find out). rpi-open-firmware, on the other hand, loads */
- /* kernel at 0x2000000! */
- /* This is not really a problem, since: */
- /* 1. We can use our bootloader to load the kernel at 0x8000 */
- /* 2. Stage 1 of the bootloader is written in careful */
- /* assembly, so that the loader itself should work */
- /* regardless of where it is loaded. */
- /* 3. In qemu, we can load kernel.elf instead of raw binary */
- /* (qemu will do the right thing then) */
-
- . = 0x8000;
-
- /* RPi in 64-bit mode uses address 0x80000 instead */
-
- __start = .;
- .kernel :
- {
- __kernel_start = .;
- KEEP(boot.o)
- . = ALIGN(4);
- ramfs_embeddable.o
- *(EXCLUDE_FILE (libkernel.o interrupt_vector.o interrupts.o) *)
- __kernel_end = .;
- }
- __kernel_size = __kernel_end - __kernel_start;
-
- .interrupts :
- {
- __interrupts_start = .;
- KEEP(*(.interrupts.vector))
- interrupt_vector.o
- interrupts.o
- __interrupts_end = .;
- }
- __interrupts_size = __interrupts_end - __interrupts_start;
-
- __end = .;
-
- _stack_top = __start;
-}
diff --git a/memory.h b/memory.h
index e4493e2..adc3bc0 100644
--- a/memory.h
+++ b/memory.h
@@ -1,7 +1,10 @@
#ifndef MEMORY_H
#define MEMORY_H
-#define POWER_OF_2(EXP) (((uint32_t) 1) << EXP)
+// These macros were heavily used b4 I moved all the address
+// computation to the linker script. Now I'm just keeping them
+// in case they're needed for something else :)
+#define POWER_OF_2(EXP) (((size_t) 1) << EXP)
#define ALIGN_POWER_OF_2(ADDR, EXP) \
(((ADDR - 1) & ~(POWER_OF_2(EXP) - 1)) + POWER_OF_2(EXP))
@@ -10,33 +13,45 @@
#define ALIGN_SECTION(ADDR) ALIGN_POWER_OF_2(ADDR, 20)
-#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0)
-#define STACK_START ((uint32_t) 0x4000)
-#define STACK_END ((uint32_t) 0x8000)
+// memory layout
+
+#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0)
+// all those symbols are defined in the linker script
extern const char __end;
extern const char __start;
+extern const char _translation_table_start;
+extern const char _translation_table_end;
+extern const char _stack_start;
+extern const char _stack_top;
+extern const char _unprivileged_memory_start;
+extern const char _unprivileged_memory_end;
-#define KERNEL_START ((uint32_t) &__start) // this is 0x8000
-#define KERNEL_END ((uint32_t) &__end)
+#define KERNEL_START ((size_t) &__start) // this is 0x0
+#define KERNEL_END ((size_t) &__end)
// first 2^14 aligned address after the kernel
-#define TRANSLATION_TABLE_BASE ALIGN_POWER_OF_2(KERNEL_END, 14)
-
-#define TRANSLATION_TABLE_END \
- (TRANSLATION_TABLE_BASE + (uint32_t) (4096 * 4))
+#define TRANSLATION_TABLE_BASE ((size_t) &_translation_table_start)
+#define TRANSLATION_TABLE_END ((size_t) &_translation_table_end)
-#define PRIVILEGED_MEMORY_END ALIGN_SECTION(TRANSLATION_TABLE_END)
+// first section after the translation table is left unused;
+// the next section is used as the stack
+#define STACK_START ((size_t) &_stack_start)
+#define STACK_END ((size_t) &_stack_top)
-#define UNPRIVILEGED_MEMORY_START PRIVILEGED_MEMORY_END
+#define PRIVILEGED_MEMORY_END STACK_END
+#define UNPRIVILEGED_MEMORY_START \
+ ((size_t) &_unprivileged_memory_start) // equal to STACK_END
#define UNPRIVILEGED_MEMORY_END \
- (UNPRIVILEGED_MEMORY_START + SECTION_SIZE)
+ ((size_t) &_unprivileged_memory_end)
-#define PL0_SECTION_NUMBER ((uint32_t) 0b101010101010)
+#define PL0_SECTION_NUMBER ((size_t) 0xaaa)
#define VIRTUAL_PL0_MEMORY_START (PL0_SECTION_NUMBER << 20)
+#define VIRTUAL_PL0_MEMORY_END \
+ (VIRTUAL_PL0_MEMORY_START + SECTION_SIZE)
#endif // MEMORY_H
diff --git a/kernel.c b/setup.c
index 719ceff..48df825 100644
--- a/kernel.c
+++ b/setup.c
@@ -2,7 +2,7 @@
#include "demo_functionality.h"
#include "paging.h"
-void kernel_main(void)
+void setup(void)
{
uart_init();
@@ -22,16 +22,13 @@ void kernel_main(void)
// prints some info and sets upp translation table, turns on MMU
setup_flat_map();
+ // prints some info and sets up a section for PL0 code,
+ // loads a blob there
demo_setup_PL0();
- demo_setup_interrupts();
-
- // prints some info and sets up a section for PL0 code, loads a blob
- // there and jumps to it... never, ever, ever returns
+ // jumps to unprivileged code... never, ever, ever returns
demo_go_unprivileged();
- while(1);
-
while (1)
{
char c = uart_getc();