diff options
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | Makefile | 65 | ||||
-rw-r--r-- | PL0_test.c | 41 | ||||
-rw-r--r-- | PL0_test.ld | 45 | ||||
-rw-r--r-- | PL0_utils.c | 19 | ||||
-rw-r--r-- | PL0_utils.h | 6 | ||||
-rw-r--r-- | armclock.h | 68 | ||||
-rw-r--r-- | atags.c | 103 | ||||
-rw-r--r-- | atags.h | 102 | ||||
-rw-r--r-- | bcmclock.h | 35 | ||||
-rw-r--r-- | boot.S | 58 | ||||
-rw-r--r-- | demo_functionality.c | 179 | ||||
-rw-r--r-- | global.h | 40 | ||||
-rw-r--r-- | interrupt_vector.S | 53 | ||||
-rw-r--r-- | interrupts.c | 80 | ||||
-rw-r--r-- | io.c | 64 | ||||
-rw-r--r-- | io.h | 26 | ||||
-rw-r--r-- | kernel.c | 107 | ||||
-rw-r--r-- | kernel_stage1.S | 168 | ||||
-rw-r--r-- | kernel_stage1.ld | 27 | ||||
-rw-r--r-- | kernel_stage2.ld | 79 | ||||
-rw-r--r-- | libkernel.c | 12 | ||||
-rw-r--r-- | libkernel.h | 6 | ||||
-rw-r--r-- | linker.ld | 107 | ||||
-rw-r--r-- | loader_stage1.S | 55 | ||||
-rw-r--r-- | loader_stage1.c | 25 | ||||
-rw-r--r-- | loader_stage1.ld | 56 | ||||
-rw-r--r-- | loader_stage1_linker.ld | 16 | ||||
-rw-r--r-- | loader_stage2.c | 25 | ||||
-rw-r--r-- | loader_stage2.ld | 44 | ||||
-rw-r--r-- | loader_stage2_linker.ld | 16 | ||||
-rw-r--r-- | makefs.c | 97 | ||||
-rw-r--r-- | memory.h | 71 | ||||
-rw-r--r-- | paging.c | 167 | ||||
-rw-r--r-- | paging.h | 8 | ||||
-rw-r--r-- | psr.h | 21 | ||||
-rw-r--r-- | ramfs.c | 65 | ||||
-rw-r--r-- | ramfs.h | 16 | ||||
-rw-r--r-- | setup.c | 119 | ||||
-rw-r--r-- | strings.c | 119 | ||||
-rw-r--r-- | strings.h | 31 | ||||
-rw-r--r-- | svc.S | 5 | ||||
-rw-r--r-- | svc_interface.h | 11 | ||||
-rw-r--r-- | uart.c | 10 | ||||
-rw-r--r-- | uart.h | 61 |
45 files changed, 1837 insertions, 696 deletions
@@ -1,2 +1,7 @@ .gitignore .idea +*.o +*.elf +*.img +pipe_image +makefs @@ -1,50 +1,63 @@ -CFLAGS=-mcpu=cortex-a7 -ffreestanding -std=gnu11 -Wall -Wextra -O2 -fPIC -I. +CFLAGS=-mcpu=cortex-a7 -ffreestanding -std=gnu11 -Wall -Wextra -O2 -I. ELFFLAGS=-nostdlib -lgcc ARM_OBJECTS=kernel.o paging.o demo_functionality.o PL0_test.o uart.o loader_stage1.o loader_stage2.o -EMBEDDABLE_OBJECTS=PL_0_test_embeddable.o loader_stage2_embeddable.o +KERNEL_STAGE2_OBJECTS=setup.o interrupt_vector.o interrupts.o uart.o demo_functionality.o paging.o ramfs_embeddable.o ramfs.o strings.o io.o atags.o -RENAME_FLAGS=--rename-section .data=.renamed_data --rename-section .rodata=.renamed_rodata --rename-section .text=.renamed_text --rename-section .bss=.renamed_bss +PL_0_TEST_OBJECTS=PL0_utils.o svc.o PL0_test.o strings.o io.o -all : kernel7.img +LOADER_STAGE2_OBJECTS=uart.o strings.o io.o loader_stage2.o + +RAMFS_FILES=PL_0_test.img + +all : kernel.img %.o : %.c arm-none-eabi-gcc $(CFLAGS) -c $^ -o $@ -%.o : %.S - arm-none-eabi-as -mcpu=cortex-a7 $^ -o $@ - %.img : %.elf arm-none-eabi-objcopy $^ -O binary $@ +%.o : %.S + arm-none-eabi-as -mcpu=cortex-a7 $^ -o $@ + %_embeddable.o : %.img - arm-none-eabi-objcopy -I binary -O elf32-littlearm -B arm --rename-section .data=.rodata $^ $@ + arm-none-eabi-objcopy -I binary -O elf32-littlearm -B arm $^ $@ -libkernel_renamed.o : libkernel.o - arm-none-eabi-objcopy $(RENAME_FLAGS) $^ $@ +PL_0_test.elf : PL0_test.ld $(PL_0_TEST_OBJECTS) + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) $(PL_0_TEST_OBJECTS) -PL_0_test.elf : PL0_test.o uart.o - arm-none-eabi-gcc -T PL0_test.ld -o $@ $(ELFFLAGS) $^ +kernel_stage1.o : kernel_stage1.S kernel_stage2.img + arm-none-eabi-as -mcpu=cortex-a7 $< -o $@ -kernel.elf : boot.o kernel.o uart.o PL_0_test_embeddable.o demo_functionality.o paging.o libkernel_renamed.o interrupt_vector.o interrupts.o - arm-none-eabi-gcc -T linker.ld -o $@ $(ELFFLAGS) $^ +kernel.elf : kernel_stage1.ld kernel_stage1.o + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) kernel_stage1.o -loader_stage2.elf : loader_stage2.o uart.o - arm-none-eabi-gcc -T loader_stage2.ld -o $@ $(ELFFLAGS) $^ +kernel_stage2.elf : kernel_stage2.ld $(KERNEL_STAGE2_OBJECTS) + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) $(KERNEL_STAGE2_OBJECTS) -loader_stage2.img : loader_stage2.elf - arm-none-eabi-objcopy $^ -O binary $@ - test -n "$$(find $@ -size -16384c)" || exit -1 +loader_stage2.elf : loader_stage2_linker.ld $(LOADER_STAGE2_OBJECTS) + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) $(LOADER_STAGE2_OBJECTS) -loader.elf : boot.o loader_stage1.o loader_stage2_embeddable.o - arm-none-eabi-gcc -T loader_stage1.ld -o $@ $(ELFFLAGS) $^ +loader_stage1.o : loader_stage1.S loader_stage2.img + arm-none-eabi-as -mcpu=cortex-a7 $< -o $@ +loader.elf : loader_stage1_linker.ld loader_stage1.o + arm-none-eabi-gcc -T $< -o $@ $(ELFFLAGS) loader_stage1.o + +loader.img : loader.elf + arm-none-eabi-objcopy $^ -O binary $@ +# check if the resulting image is not too big + test -n "$$(find $@ -size -16384c)" || exit -1 qemu-elf : kernel.elf qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $^ -qemu-bin : loader.elf kernel.img pipe_image +qemu-bin : kernel.img + qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $^ + +qemu-loader : loader.img kernel.img pipe_image ./pipe_image --stdout | qemu-system-arm -m 256 -M raspi2 -serial stdio -kernel $< run-on-rpi : kernel.img pipe_image @@ -54,7 +67,13 @@ run-on-rpi : kernel.img pipe_image pipe_image : pipe_image.c lib/rs232/rs232.c gcc -Wall -std=gnu99 -O3 $^ -o $@ +makefs : makefs.c + gcc -Wall -std=gnu99 -O3 $^ -o $@ + +ramfs.img : makefs $(RAMFS_FILES) + ./makefs $(RAMFS_FILES) > $@ + clean : - -rm *.img *.elf *.o pipe_image + -rm *.img *.elf *.o pipe_image makefs .PHONY: all qemu-elf qemu-bin clean @@ -1,27 +1,34 @@ -#include "uart.h" -#include "psr.h" +#include "PL0_utils.h" +// entry point - must remain the only function in the file! void PL0_main(void) { - // if all went correct, Success! gets printed - uart_puts("Success!\n\r"); - - // if we're indeed i PL0, we should crash now, when trying to access - // memory we're not allowed to - char first_kernel_byte[2]; - - first_kernel_byte[0] = *(char*) ((uint32_t) 0x8000); - first_kernel_byte[1] = '\0'; + // If loading program to userspace and handling of svc are + // implemented correctly, this shall get printed + puts("Hello userspace! Type 'f' if you want me to try accessing " + "kernel memory!"); - uart_puts(first_kernel_byte); - while (1) { - char c = uart_getc(); - - uart_putc(c); + char c = getchar(); if (c == '\r') - uart_putc('\n'); + putchar('\n'); + + putchar(c); + + if (c == 'f') + { + // if we're indeed in PL0, we should trigger the abort + // handler now, when trying to access memory we're not + // allowed to + puts("Attempting to read kernel memory from userspace :d"); + char first_kernel_byte[2]; + + first_kernel_byte[0] = *(char*) 0x0; + first_kernel_byte[1] = '\0'; + + puts(first_kernel_byte); + } } } diff --git a/PL0_test.ld b/PL0_test.ld index 430e098..b1d06f4 100644 --- a/PL0_test.ld +++ b/PL0_test.ld @@ -1,44 +1,19 @@ -ENTRY(_start) - +/* linker script for creating the example userspace program PL0_test + */ + +/* no ENTRY() statement - this executable is run by jumping to it */ + SECTIONS { - /* 0b10101010101000000000000000000000 */ + /* my thought up address userspace programs should run from */ . = 0xaaa00000; + __start = .; - __text_start = .; - .text : + .another_weird_section_name_that_doesnt_matter : { /* have entry point at the beginning */ - KEEP(*(.text.PL0main)) - *(.text) + KEEP(PL0_test.o) + *(*) } - . = ALIGN(4096); /* align to page size */ - __text_end = .; - - __rodata_start = .; - .rodata : - { - *(.rodata) - } - . = ALIGN(4096); /* align to page size */ - __rodata_end = .; - - __data_start = .; - .data : - { - *(.data) - } - . = ALIGN(4096); /* align to page size */ - __data_end = .; - - __bss_start = .; - .bss : - { - bss = .; - *(.bss) - } - . = ALIGN(4096); /* align to page size */ - __bss_end = .; - __bss_size = __bss_end - __bss_start; __end = .; } diff --git a/PL0_utils.c b/PL0_utils.c new file mode 100644 index 0000000..d83edb9 --- /dev/null +++ b/PL0_utils.c @@ -0,0 +1,19 @@ +#include <stddef.h> +#include <stdint.h> + +#include "svc_interface.h" +#include "PL0_utils.h" + +// most generic definition possible +// the actual function defined in svc.S +uint32_t svc(enum svc_type, ...); + +void putchar(char character) +{ + svc(UART_PUTCHAR, character); +} + +char getchar(void) +{ + return svc(UART_GETCHAR); +} diff --git a/PL0_utils.h b/PL0_utils.h new file mode 100644 index 0000000..c26a100 --- /dev/null +++ b/PL0_utils.h @@ -0,0 +1,6 @@ +#ifndef PL0_UTILS_H +#define PL0_UTILS_H + +#include "io.h" + +#endif // PL0_UTILS_H diff --git a/armclock.h b/armclock.h new file mode 100644 index 0000000..2b2aec9 --- /dev/null +++ b/armclock.h @@ -0,0 +1,68 @@ +#ifndef ARMCLOCK_H +#define ARMCLOCK_H + +#include <stdint.h> + +#include "global.h" + +#define ARMCLK_LOAD (ARM_BASE + 0x400) +#define ARMCLK_VALUE (ARM_BASE + 0x404) +#define ARMCLK_CONTROL (ARM_BASE + 0x408) +#define ARMCLK_IRQ_CLR_ACK (ARM_BASE + 0x40C) +#define ARMCLK_RAW_IRQ (ARM_BASE + 0x410) +#define ARMCLK_MASKED_IRQ (ARM_BASE + 0x414) +#define ARMCLK_RELOAD (ARM_BASE + 0x418) +#define ARMCLK_PRE_DRIVER (ARM_BASE + 0x41C) +#define ARMCLK_FREE_RUNNING_COUNTER (ARM_BASE + 0x420) + +typedef union armclk_control +{ + uint32_t raw; + struct + { + uint32_t one_shot_mode : 1; // bit 0; unused in RPi + uint32_t counter_23bit : 1; // bit 1 + uint32_t pre_scale : 2; // bits 3:2 + uint32_t bit_4 : 1; // bit 4 + uint32_t interrupt_enable : 1; // bit 5 + uint32_t periodic_mode : 1; // bit 6; unused in RPi + uint32_t timer_enable : 1; // bit 7 + uint32_t halt_in_debug : 1; // bit 8 + uint32_t free_running_enable : 1; // bit 9 + uint32_t bits_15_10 : 6; // bits 15:10 + uint32_t free_running_pre_scaler : 8; // bits 23:16 + uint32_t bits_31_24 : 8; // bits 31:24 + } fields; +} armclk_control_t; + +static inline void armclk_init(void) +{ + armclk_control_t ctrl = (armclk_control_t) (uint32_t) 0; + ctrl.fields.timer_enable = 1; + ctrl.fields.interrupt_enable = 1; + ctrl.fields.counter_23bit = 1; + *(uint32_t volatile*) ARMCLK_CONTROL = ctrl.raw; +} + +static inline void armclk_enable_timer_irq(void) +{ + *(uint32_t volatile*) ARM_ENABLE_BASIC_IRQS = 1; +} + +static inline void armclk_disable_timer_irq(void) +{ + *(uint32_t volatile*) ARM_DISABLE_BASIC_IRQS = 1; +} + +static inline void armclk_irq_settimeout(uint32_t timeout) +{ + *(uint32_t volatile*) ARMCLK_IRQ_CLR_ACK = 0; + *(uint32_t volatile*) ARMCLK_LOAD = timeout; +} + +static inline _Bool armclk_irq_pending(void) +{ + return *(uint32_t volatile*) ARMCLK_RAW_IRQ; +} + +#endif // ARMCLOCK_H @@ -0,0 +1,103 @@ +#include "atags.h" +#include "io.h" + +static inline struct atag_header *next_tag(struct atag_header *tag) +{ + return (struct atag_header*) (((uint32_t*) tag) + tag->size); +} + +#define TAG_CONTENTS_FUN(tagname) \ + static inline struct atag_##tagname *tagname##_tag_contents \ + (struct atag_header *tag) \ + { \ + return (struct atag_##tagname*) (tag + 1); \ + } + +TAG_CONTENTS_FUN(header) +TAG_CONTENTS_FUN(core) +TAG_CONTENTS_FUN(mem) +TAG_CONTENTS_FUN(videotext) +TAG_CONTENTS_FUN(ramdisk) +TAG_CONTENTS_FUN(initrd2) +TAG_CONTENTS_FUN(serialnr) +TAG_CONTENTS_FUN(revision) +TAG_CONTENTS_FUN(videolfb) +TAG_CONTENTS_FUN(cmdline) + +uint32_t find_memory_size(struct atag_header *atags) +{ + // we silently assume there will only be one mem atag + while (atags->tag != ATAG_MEM && atags->tag != ATAG_NONE) + atags = next_tag(atags); + + if (atags->tag == ATAG_NONE) + return 0; + + struct atag_mem *mem_tag = mem_tag_contents(atags); + + // our design assumes address 0x0 is available, so we reject mem + // atag saying otherwise + if (mem_tag->start != 0) + { + puts("ignoring information about memory, " + "that doesn't start at 0x0"); + return 0; + } + + return mem_tag->size; +} + +void print_tag(struct atag_header *tag) +{ +#define TAG_CASE(tagname_upcase, tagname_locase, instructions) \ + case ATAG_##tagname_upcase: \ + puts("ATAG_" #tagname_upcase ":"); \ + { \ + struct atag_##tagname_locase *contents = \ + tagname_locase##_tag_contents(tag); \ + instructions; \ + } \ + break + + switch (tag->tag) + { + TAG_CASE(CORE, core, + prints(" flags: 0x"); + printhex(contents->flags); puts(""); + prints(" page size: "); + printdec(contents->pagesize); puts(""); + prints(" root device: "); + printdec(contents->rootdev); puts("");); + TAG_CASE(MEM, mem, + prints(" memory size: 0x"); + printhex(contents->size); puts(""); + prints(" memory start: 0x"); + printhex(contents->start); puts("");); + // the rest are unimportant for now, + // as they're not passed by qemu + TAG_CASE(VIDEOTEXT, videotext, (void) contents;); + TAG_CASE(RAMDISK, ramdisk, (void) contents;); + TAG_CASE(INITRD2, initrd2, (void) contents;); + TAG_CASE(SERIAL, serialnr, (void) contents;); + TAG_CASE(REVISION, revision, (void) contents;); + TAG_CASE(VIDEOLFB, videolfb, (void) contents;); + TAG_CASE(CMDLINE, cmdline, (void) contents;); + + case ATAG_NONE: + puts("ATAG_NONE"); + break; + default: + prints("!! unknown tag: 0x"); printhex(tag->tag); puts(" !!"); + } +} + +void print_atags(struct atag_header *atags) +{ + while (atags->tag != ATAG_NONE) + { + print_tag(atags); + atags = next_tag(atags); + } + + print_tag(atags); // also print ATAG_NONE +} @@ -0,0 +1,102 @@ +#ifndef ATAGS_H +#define ATAGS_H + +#include <stdint.h> + +#define ATAG_NONE 0x00000000 +#define ATAG_CORE 0x54410001 +#define ATAG_MEM 0x54410002 +#define ATAG_VIDEOTEXT 0x54410003 +#define ATAG_RAMDISK 0x54410004 +#define ATAG_INITRD2 0x54420005 +#define ATAG_SERIAL 0x54410006 +#define ATAG_REVISION 0x54410007 +#define ATAG_VIDEOLFB 0x54410008 +#define ATAG_CMDLINE 0x54410009 + +struct atag_header +{ + uint32_t size; + uint32_t tag; +}; + +struct atag_core +{ + uint32_t flags; + uint32_t pagesize; + uint32_t rootdev; +}; + +struct atag_mem +{ + uint32_t size; + uint32_t start; +}; + +struct atag_videotext +{ + uint8_t x; + uint8_t y; + uint16_t video_page; + uint8_t video_mode; + uint8_t video_cols; + uint16_t video_ega_bx; + uint8_t video_lines; + uint8_t video_isvga; + uint16_t video_points; +}; + +struct atag_ramdisk +{ + uint32_t flags; + uint32_t size; + uint32_t start; +}; + +struct atag_initrd2 +{ + uint32_t start; + uint32_t size; +}; + +struct atag_serialnr +{ + uint32_t low; + uint32_t high; +}; + +struct atag_revision +{ + uint32_t rev; +}; + +struct atag_videolfb +{ + uint16_t lfb_width; + uint16_t lfb_height; + uint16_t lfb_depth; + uint16_t lfb_linelength; + uint32_t lfb_base; + uint32_t lfb_size; + uint8_t red_size; + uint8_t red_pos; + uint8_t green_size; + uint8_t green_pos; + uint8_t blue_size; + uint8_t blue_pos; + uint8_t rsvd_size; + uint8_t rsvd_pos; +}; + +struct atag_cmdline +{ + char cmdline[1]; +}; + +uint32_t find_memory_size(struct atag_header *atags); + +void print_tag(struct atag_header *tag); + +void print_atags(struct atag_header *atags); + +#endif // ATAGS_H diff --git a/bcmclock.h b/bcmclock.h new file mode 100644 index 0000000..dd7136b --- /dev/null +++ b/bcmclock.h @@ -0,0 +1,35 @@ +#ifndef BCMCLOCK_H +#define BCMCLOCK_H + +#include <stdint.h> + +#include "global.h" + +#define ST_BASE (PERIF_BASE + 0x3000) // System Timer + +#define ST_CS (ST_BASE + 0x0) // System Timer Control/Status +#define ST_CLO (ST_BASE + 0x4) // System Timer Counter Lower 32 bits +#define ST_CHI (ST_BASE + 0x8) // System Timer Counter Higher 32 bits +#define ST_C0 (ST_BASE + 0xC) // System Timer Compare 0 +#define ST_C1 (ST_BASE + 0x10) // System Timer Compare 1 +#define ST_C2 (ST_BASE + 0x14) // System Timer Compare 2 +#define ST_C3 (ST_BASE + 0x18) // System Timer Compare 3 + +static inline void bcmclk_enable_timer_irq(void) +{ + *(uint32_t volatile*) ARM_ENABLE_IRQS_1 = 1 << 3; +} + +static inline void bcmclk_disable_timer_irq(void) +{ + *(uint32_t volatile*) ARM_DISABLE_IRQS_1 = 1 << 3; +} + +static inline void bcmclk_irq_settimeout(uint32_t timeout) +{ + uint32_t clock_now = *(uint32_t volatile*) ST_CLO; + *(uint32_t volatile*) ST_C3 = clock_now + timeout; + *(uint32_t volatile*) ST_CS = 1 << 3; +} + +#endif // BCMCLOCK_H @@ -1,58 +0,0 @@ -// armv7 mode - -// To keep this in the first portion of the binary. -.section ".text.boot" - -//.org 0x8000 - -// Make _start global. -.globl _start - -// Entry point for the kernel. -// r15 -> should begin execution at 0x8000. -// r0 -> 0x00000000 -// r1 -> 0x00000C42 -// r2 -> 0x00000100 - start of ATAGS -// preserve these registers as argument for kernel_main -_start: - // Shut off extra cores - mrc p15, 0, r5, c0, c0, 5 - and r5, r5, #3 - cmp r5, #0 - bne halt - - // go to system mode - cps #0b11111 - isb - - // Setup the stack. - // It shall be directly below our kernel image - ldr r5, =__start - mov sp, r5 - - // Clear out bss. - ldr r4, =__bss_start - ldr r9, =__bss_end - mov r5, #0 - mov r6, #0 - mov r7, #0 - mov r8, #0 - b 2f - -1: - // store multiple at r4. - stmia r4!, {r5-r8} - - // If we are still below bss_end, loop. -2: - cmp r4, r9 - blo 1b - - // Call kernel_main - ldr r3, =kernel_main - bx r3 - - // halt -halt: - wfe - b halt diff --git a/demo_functionality.c b/demo_functionality.c index 691cdb1..1ef91a1 100644 --- a/demo_functionality.c +++ b/demo_functionality.c @@ -1,7 +1,11 @@ -#include "uart.h" +#include "io.h" #include "psr.h" #include "memory.h" #include "translation_table_descriptors.h" +#include "ramfs.h" +#include "strings.h" +#include "paging.h" +#include "armclock.h" void demo_paging_support(void) { @@ -13,16 +17,16 @@ void demo_paging_support(void) switch(ID_MMFR0 & 0xf) /* lowest 4 bits indicate VMSA support */ { - case 0 : paging = "no paging\n\r"; break; - case 1 : paging = "implementation defined paging\n\r"; break; - case 2 : paging = "VMSAv6, with cache and TLB type registers\n\r"; break; - case 3 : paging = "VMSAv7, with support for remapping and access flag\n\r"; break; - case 4 : paging = "VMSAv7 with PXN bit supported\n\r"; break; - case 5 : paging = "VMSAv7, PXN and long format descriptors. EPAE is supported.\n\r"; break; - default : paging = "?_? unknown paging ?_?\n\r"; + case 0 : paging = "no paging"; break; + case 1 : paging = "implementation defined paging"; break; + case 2 : paging = "VMSAv6, with cache and TLB type registers"; break; + case 3 : paging = "VMSAv7, with support for remapping and access flag"; break; + case 4 : paging = "VMSAv7 with PXN bit supported"; break; + case 5 : paging = "VMSAv7, PXN and long format descriptors. EPAE is supported."; break; + default : paging = "?_? unknown paging ?_?"; } - uart_puts(paging); + puts(paging); } void demo_current_mode(void) @@ -36,99 +40,102 @@ void demo_current_mode(void) switch(CPSR.fields.PSR_MODE_4_0) { - case MODE_USER : mode_name = "User (PL0)\r\n"; break; - case MODE_FIQ : mode_name = "FIQ (PL1)\r\n"; break; - case MODE_IRQ : mode_name = "IRQ (PL1)\r\n"; break; - case MODE_SUPERVISOR : mode_name = "Supervisor (PL1)\r\n"; break; - case MODE_MONITOR : mode_name = "Monitor (PL1)\r\n"; break; - case MODE_ABORT : mode_name = "Abort (PL1)\r\n"; break; - case MODE_HYPERVISOR : mode_name = "Hyp (PL2)\r\n"; break; - case MODE_UNDEFINED : mode_name = "Undefined (PL1)\r\n"; break; - case MODE_SYSTEM : mode_name = "System (PL1)\r\n"; break; - default : mode_name = "Unknown mode\r\n"; break; + case MODE_USER : mode_name = "User (PL0)"; break; + case MODE_FIQ : mode_name = "FIQ (PL1)"; break; + case MODE_IRQ : mode_name = "IRQ (PL1)"; break; + case MODE_SUPERVISOR : mode_name = "Supervisor (PL1)"; break; + case MODE_MONITOR : mode_name = "Monitor (PL1)"; break; + case MODE_ABORT : mode_name = "Abort (PL1)"; break; + case MODE_HYPERVISOR : mode_name = "Hyp (PL2)"; break; + case MODE_UNDEFINED : mode_name = "Undefined (PL1)"; break; + case MODE_SYSTEM : mode_name = "System (PL1)"; break; + default : mode_name = "Unknown mode"; break; } - uart_puts("current mode: "); - uart_puts(mode_name); + prints("current mode: "); + puts(mode_name); } +#define TRANSLATION_TABLE \ + ((short_section_descriptor_t volatile*) TRANSLATION_TABLE_BASE) + extern char - _binary_PL_0_test_img_start, - _binary_PL_0_test_img_end, - _binary_PL_0_test_img_size; + _binary_ramfs_img_start, + _binary_ramfs_img_end, + _binary_ramfs_img_size; -void demo_go_unprivileged(void) +void demo_setup_PL0(void) { - short_section_descriptor_t *translation_table = - (short_section_descriptor_t*) TRANSLATION_TABLE_BASE; - - volatile short_section_descriptor_t *PL0_section = - &translation_table[PL0_SECTION_NUMBER]; - volatile short_section_descriptor_t *UART_memory_section = - &translation_table[((uint32_t) GPIO_BASE) >> 20]; - - PL0_section->SECTION_BASE_ADDRESS_31_20 = - UNPRIVILEGED_MEMORY_START >> 20; - - // make the selected section and uart section available for PL0 - PL0_section->ACCESS_PERMISSIONS_2 = - AP_2_0_MODEL_RW_ALL >> 2; - PL0_section->ACCESS_PERMISSIONS_1_0 = - AP_2_0_MODEL_RW_ALL & 0b011; - - UART_memory_section->ACCESS_PERMISSIONS_2 = - AP_2_0_MODEL_RW_ALL >> 2; - UART_memory_section->ACCESS_PERMISSIONS_1_0 = - AP_2_0_MODEL_RW_ALL & 0b011; - + // find PL_0_test.img im ramfs + struct ramfile PL_0_test_img; + + if (find_file(&_binary_ramfs_img_start, "PL_0_test.img", + &PL_0_test_img)) + { + puts("PL_0_test.img not found :("); + asm volatile ("wfi"); + } - // invalidate main Translation Lookup Buffer (just in case) - asm("mcr p15, 0, %0, c8, c7, 0\n\r" - "isb" :: "r" (0) : "memory"); + // dummy value 5 for now, as we haven't implemented processes yet + uint16_t physical_section_number = claim_and_map_section + ((void*) 5, PL0_SECTION_NUMBER, AP_2_0_MODEL_RW_ALL); + if (physical_section_number == SECTION_NULL) + { + puts("Couldn't claim memory section for unprivileged code :("); + while(1); + } + + size_t physical_section_start = + (((size_t) physical_section_number) << 20); + // check that translation works... by copying a string using one // mapping and reading it using other :D - char message[] = "mapped sections for PL0 code\n\r"; - - unsigned int i; - for (i = 0; i < sizeof(message); i++) - ((volatile char*) UNPRIVILEGED_MEMORY_START)[i] = message[i]; - - uart_puts((char*) VIRTUAL_PL0_MEMORY_START); - - // now paste a userspace program to that section, jump to it and - // switch to PL0 - for (size_t i = 0; i < (size_t) &_binary_PL_0_test_img_size; i++) - ((volatile char*) VIRTUAL_PL0_MEMORY_START)[i] = - (&_binary_PL_0_test_img_start)[i]; - - // jump to that copied code (switch to PL0 is done by that code) - asm volatile("mov r5, #0\n\r" - "movt r5, #"PL0_SECTION_NUMBER_STR"1111\n\r" - "mov sp, r5\n\r" // setting stack is important :D - "mov r5, #0\n\r" - "movt r5, #"PL0_SECTION_NUMBER_STR"0000\n\r" - "blx r5\n\r"); -} + char str_part1[] = "mapped section for PL0 code (0x"; + char str_part2[] = " -> 0x"; + char str_part3[] = ")"; -extern char - __interrupt_vectors_start, - __interrupt_vectors_end, - __interrupt_vectors_size; + char *string_end = (char*) physical_section_start; -extern void (*volatile system_reentry_point)(void); + memcpy(string_end, str_part1, sizeof(str_part1) - 1); + uint32_to_hex(VIRTUAL_PL0_MEMORY_START, + string_end += sizeof(str_part1) - 1); + memcpy(string_end += 8, str_part2, sizeof(str_part2) - 1); + uint32_to_hex(physical_section_start, + string_end += sizeof(str_part2) - 1); + memcpy(string_end += 8, str_part3, sizeof(str_part3)); -void system_reentry(void) -{ - uart_puts("re-entered system"); - while(1); + puts((char*) VIRTUAL_PL0_MEMORY_START); + + // now paste a userspace program to that section + memcpy((void*) VIRTUAL_PL0_MEMORY_START, + PL_0_test_img.file_contents, PL_0_test_img.file_size); + + puts("copied PL0 code to it's section"); } -void demo_setup_interrupts(void) +void demo_go_unprivileged(void) { - system_reentry_point = system_reentry; + uint32_t PL0_regs[14] = {0}; + PL0_regs[13] = VIRTUAL_PL0_MEMORY_START; // the new pc + + PSR_t new_SPSR = read_CPSR(); + new_SPSR.fields.PSR_MODE_4_0 = MODE_USER; + new_SPSR.fields.PSR_IRQ_MASK_BIT = 0; + write_SPSR(new_SPSR); - for (size_t i = 0; i < (size_t) &__interrupt_vectors_size; i++) - ((volatile char*) 0)[i] = - (&__interrupt_vectors_start)[i]; + puts("All ready, jumping to PL0 code"); + + armclk_irq_settimeout(0x00100000); + + asm volatile("cps %[sysmode]\n\r" + "mov sp, %[stackaddr]\n\r" + "cps %[supmode]\n\r" + "ldm %[contextaddr], {r0 - r12, pc} ^" :: + [sysmode]"I" (MODE_SYSTEM), + [supmode]"I" (MODE_SUPERVISOR), + [stackaddr]"r" ((PL0_SECTION_NUMBER + 1) << 20), + [contextaddr]"r" (PL0_regs) : "memory"); + + __builtin_unreachable(); } @@ -4,14 +4,38 @@ // board type, raspi2 #define RASPI 2 +// conditionally #define PERIF_BASE #if RASPI == 4 -#define GPIO_BASE 0xFE200000 -#else -#if RASPI == 3 || RASPI == 2 -#define GPIO_BASE 0x3F200000 -#else -#define GPIO_BASE 0x20200000 -#endif // RASPI == 3 || RASPI == 2 -#endif // RASPI == 4 + +#define PERIF_BASE 0xFE000000 + +#elif RASPI == 3 || RASPI == 2 + +#define PERIF_BASE 0x3F000000 + +#else // if RASPI == 1 + +#define PERIF_BASE 0x20000000 + +#endif + +// GPIO_BASE is #define'd in terms of PERIF_BASE +// (as in sane kernels - like linux, not like in wiki.osdev codes...) +#define GPIO_BASE (PERIF_BASE + 0x200000) + +// ARM control block +// called "base address for the ARM interrupt register" elsewhere +#define ARM_BASE (PERIF_BASE + 0xB000) + +#define ARM_IRQ_BASIC_PENDING (ARM_BASE + 0x200) +#define ARM_IRQ_PENDING_1 (ARM_BASE + 0x204) +#define ARM_IRQ_PENDING_2 (ARM_BASE + 0x208) +#define ARM_FIQ_CONTROL (ARM_BASE + 0x20C) +#define ARM_ENABLE_IRQS_1 (ARM_BASE + 0x210) +#define ARM_ENABLE_IRQS_2 (ARM_BASE + 0x214) +#define ARM_ENABLE_BASIC_IRQS (ARM_BASE + 0x218) +#define ARM_DISABLE_IRQS_1 (ARM_BASE + 0x21C) +#define ARM_DISABLE_IRQS_2 (ARM_BASE + 0x220) +#define ARM_DISABLE_BASIC_IRQS (ARM_BASE + 0x224) #endif // GLOBAL_H diff --git a/interrupt_vector.S b/interrupt_vector.S index 3daef05..1f5bb3a 100644 --- a/interrupt_vector.S +++ b/interrupt_vector.S @@ -1,10 +1,8 @@ .section ".interrupt_vectors.text" .global abort_handler -.global irq_handler -.local generic_handler +.local generic_handler .global _interrupt_vectors - _interrupt_vectors: ldr pc,reset_handler ldr pc,undefined_handler @@ -37,13 +35,56 @@ enable_irq: bic r0,r0,#0x80 msr cpsr_c,r0 bx lr + b reset_handler_caller + b undef_handler_caller + b svc_handler_caller + b abort_handler_caller + b abort_handler_caller + b generic_handler_caller + b irq_handler_caller + b fiq_handler_caller + +reset_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =reset_handler + bx r5 + +undef_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =undefined_instruction_vector + bx r5 + +svc_handler_caller: + ldr sp, =_supervisor_stack_top + push {r5, lr} + ldr r5, =supervisor_call_handler + blx r5 + ldm sp!, {r5, pc} ^ -generic_handler: - b generic_handler abort_handler_caller: - mov sp, #0x8000 + ldr sp, =_supervisor_stack_top ldr r5, =abort_handler bx r5 + +generic_handler_caller: + ldr sp, =_supervisor_stack_top + ldr r5, =generic_handler + bx r5 + +irq_handler_caller: + ldr sp, =_irq_stack_top + sub lr, #4 + push {r0-r3, lr} + ldr r3, =irq_handler + blx r3 + ldm sp!, {r0-r3, pc} ^ + +fiq_handler_caller: + ldr sp, =_fiq_stack_top + ldr r5, =fiq_handler + bx r5 + + irq: mov sp, #0x8000 ldr r5, =abort_handler diff --git a/interrupts.c b/interrupts.c index 712cfb9..2dd7fbd 100644 --- a/interrupts.c +++ b/interrupts.c @@ -1,33 +1,91 @@ #include "uart.h" #include "interrupts.h" +#include "io.h" +#include "svc_interface.h" +#include "armclock.h" /** @brief The undefined instruction interrupt handler + */ +void __attribute__((noreturn)) setup(void); - If an undefined instruction is encountered, the CPU will start - executing this function. Just trap here as a debug solution. -*/ -void __attribute__((interrupt("UNDEF"))) undefined_instruction_vector(void) +// from what I've heard, reset is never used on the Pi; +// in our case it should run once - when stage1 of the kernel +// jumps to stage2 +void reset_handler(void) { - uart_puts("Undefined instruction occured"); + setup(); +} + +void undefined_instruction_vector(void) +{ + puts("Undefined instruction occured"); while( 1 ) { /* Do Nothing! */ } } -void __attribute__((section(".interrupt_vectors.data"))) -(*system_reentry_point) (void); +uint32_t supervisor_call_handler(enum svc_type request, uint32_t arg1, + uint32_t arg2, uint32_t arg3) +{ + (void) arg2; (void) arg3; // unused for now -void -__attribute__((interrupt("ABORT"))) -__attribute__((section(".interrupt_vectors.text"))) -abort_handler(void) + switch(request) { + case UART_PUTCHAR: + putchar(arg1); + break; + case UART_GETCHAR: + return getchar(); + case UART_WRITE: + puts("UART_WRITE not implemented!!!!!"); + break; + default: + // perhaps we should kill the process now? + puts("unknown supervisor call type!!!!!"); + } + + return 0; // a dummy value +} + +void abort_handler(void) +{ + puts("re-entered system"); + + while(1); +} + +void generic_handler(void) +{ + puts("something weird happened"); + + while(1); +} + +void irq_handler(void) { uart_puts("nwm\r\n"); system_reentry_point(); + if (armclk_irq_pending()) + { + puts("<<irq from timer>>"); + armclk_irq_settimeout(0x00100000); + } + else + { + puts("unknown irq"); + while(1); + } +} + +void fiq_handler(void) +{ + puts("fiq happened"); + + while(1); } + /* Here is your interrupt function */ void __attribute__((interrupt("IRQ"))) @@ -0,0 +1,64 @@ +#include <stddef.h> + +#include "io.h" +#include "strings.h" + +void puts(char string[]) +{ + prints(string); + + putchar('\n'); + putchar('\r'); +} + +void prints(char string[]) +{ + for (size_t i = 0; string[i]; i++) + putchar(string[i]); +} + +void printdec(uint32_t number) +{ + char buf[11]; + + uint32_to_decstring(number, buf); + + prints(buf); +} + +void printhex(uint32_t number) +{ + char buf[9]; + + uint32_to_hexstring(number, buf); + + prints(buf); +} + +void printbin(uint32_t number) +{ + char buf[33]; + + uint32_to_binstring(number, buf); + + prints(buf); +} + +void printdect(uint32_t number) +{ + char buf[11]; + + uint32_to_decstringt(number, buf); + + prints(buf); +} + +void printhext(uint32_t number) +{ + char buf[9]; + + uint32_to_hexstringt(number, buf); + + prints(buf); +} + @@ -0,0 +1,26 @@ +#ifndef IO_H +#define IO_H + +#include <stdint.h> + +// putchar() and getchar() are not part of io.c, but it's useful to +// have those symbols declared here +void putchar(char c); + +char getchar(void); + +void puts(char string[]); + +void prints(char string[]); + +void printdec(uint32_t number); + +void printhex(uint32_t number); + +void printbin(uint32_t number); + +void printdect(uint32_t number); + +void printhext(uint32_t number); + +#endif // IO_H @@ -1,107 +0,0 @@ -#include "uart.h" -#include "demo_functionality.h" -#include "paging.h" -#include "interrupts.h" -#include "strings.h" -#include "psr.h" - -extern void enable_irq ( void ); -void kernel_main(uint32_t r0, uint32_t r1, uint32_t atags) -{ - // Declare as unused - (void) r0; - (void) r1; - (void) atags; - - uart_init(); - - // When we attach screen session after loading kernel with socat - // we miss kernel's greeting... So we'll make the kernel wait for - // one char we're going to send from within screen - uart_getc(); - - uart_puts("Hello, kernel World!\r\n"); - - // prints some info - demo_paging_support(); - - - - // prints some info and switches to system mode - demo_mode_to_system(); - // prints some info - demo_current_mode(); - - // prints some info and sets upp translation table, turns on MMU - setup_flat_map(); - - demo_setup_libkernel(); - - demo_setup_PL0(); - enable_timer(); - - // enable interrupts - demo_setup_interrupts(); - - *(int *)(0x3000B210) = 1; - *(int *)(0x3000B204) = 1; - - - asm("LDR r0, =0x3F00B200\n\r" - "MOV r1, #2\n\r" - "STR r1, [r0, #0x00]\n\r"); - - *(int *)(0x7E00B210) = 1; - *(int *)(0x7E00B214) = 1; - *(int *)(0x7E00B218) = 1; - uart_putc(*(int*)(0x40000034) ); - *(int *)(0x40000034) = 1; - - uint32_to_bits(*(int*)(0xE000ED24),buf); - uart_puts(buf); - uart_puts("\r\n"); - uint32_to_bits(*(int*)(0xE002ED24),buf); - uart_puts(buf); - uart_puts("\r\n"); - - int regVal; - asm("mrs %0, cpsr":"=r"(regVal):); - uint32_to_bits(regVal,buf); - uart_puts(buf); - uart_puts("\r\n"); - -// ; Read ICC_IGRPEN1 into Rt -// MCR p15,0,<Rt>,c12,c12,7 ; Write Rt to ICC_IGRPEN1 - - uint32_t ICC_IGRPEN1; - asm(" MRC p15,0, %0 ,c12,c12,7" : "=r"(ICC_IGRPEN1) :: "memory"); //READ FROM ICC_IGRPEN1 - - uint32_to_bits(ICC_IGRPEN1,buf); - uart_puts(buf); - uart_puts("\r\n"); - - - // uint32_to_bits(*(int*)(0x40000024),buf); -// uart_puts(buf); -// uart_puts("\r\n"); - -// *(int *)(0x7E00B210) = 1; -// *(int *)(0x7E00B214) = 1; -// *(int *)(0x7E00B218) = 1; - - // prints some info and sets up a section for PL0 code, loads a blob - // there and jumps to it... never, ever, ever returns - demo_go_unprivileged(); - - while(1); - - while (1) - { - char c = uart_getc(); - - uart_putc(c); - - if (c == '\r') - uart_putc('\n'); - } -} diff --git a/kernel_stage1.S b/kernel_stage1.S new file mode 100644 index 0000000..dd3e6fd --- /dev/null +++ b/kernel_stage1.S @@ -0,0 +1,168 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the kernel. + * + * Kernel copies it's embedded stage 2 to address 0x0 and jumps to + * it (to the reset handler). Registers r0 - r2 are arguments for + * the kernel, but we're not using them for now. + * + * This file is based on (and almost identical with) loader_stage1.S + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still + // occur when the main core overwrites this part of memory + wfe + + // we'll use the size of stage1 to determine where we have free + // space after it. We'll then copy our atags/fdt there, so + // it doesn't get overwritten by stage2 we deploy at 0x0 +atags_magic: + .word 0x54410001 + +proceed: + // load the second word of structure passed to us through r2; + // if it's atags, it's second word should be the magic number + // Btw, location of ATAGS is always 0x100. + ldr r3, [r2, #4] + adr r4, atags_magic + ldr r4, [r4] + + // compare second word of assumed atags with magic number + // to see, if it's really atags and not sth else (i.e. fdt) + cmp r3, r4 + + // normally at start r0 contains value 0; + // value 3 in r0 would tell stage2 code, we found no atags :( + movne r0, #3 + bne stage2_blob_copying + + // if atags was found, copying of it takes place here + + // the following loop finds, where atags ends + // r3 shall point to currently looked-at tag + mov r3, r2 + +find_end_of_atags_loop: + // load first word of tag header to r4 (it contains tag size) + ldr r4, [r3] + // make r3 point at the next tag (by adding 4*tag_size to it) + add r3, r4, lsl #2 + + // load second word of tag header to r5 (it contains tag type) + ldr r5, [r3, #4] + + // if tag value is 0, it is the last tag + cmp r5, #0 + bne find_end_of_atags_loop + + add r3, #8 // make r3 point at the end of last tag + sub r3, r2 // get atags size in r3 + + // at this pont r2 and r3 point at start and size of atags, + // respectively; now we'll compute, where we're going to have + // free space to put atags in; we want to put atags either + // right after our blob or, if if it doesn't fit between + // blob end and the address stage1 is loaded at, after stage1 + + // get blob size to r5 + adr r5, blob_size + ldr r5, [r5] + + // we could only copy atags to a 4-aligned address + mov r6, #4 + bl aling_r5_to_r6 + + // compute where atags copied right after blob would end + add r6, r5, r3 + // we can only overwrite stuff before the copying loop + adr r7, copy_atags_loop + cmp r6, r7 + ble copy_atags + + // atags wouldn't fit - use memory after stage1 as destination + adr r5, _boot + adr r6, stage1_size + ldr r6, [r6] + add r5, r6 + mov r6, #4 + bl aling_r5_to_r6 + +copy_atags: + // now copy atags (r2 - atags start; r3 - atags size; + // r5 - destination; r4 - iterator; r6 - buffor) + mov r4, #0 + +copy_atags_loop: + ldr r6, [r2, r4] + str r6, [r5, r4] + add r4, #4 + cmp r4, r3 + blo copy_atags_loop + + mov r2, r5 // place the new atags address in r2 + b stage2_blob_copying // atags stuff done; proceed + +// mini-function, that does what the label says; clobbers r7 +aling_r5_to_r6: + sub r5, #1 + sub r7, r6, #1 + bic r5, r7 + add r5, r6 + mov pc, lr + + +stage2_blob_copying: // copy stage2 of the kernel to address 0x0 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0 + + // load blob size to r5 + // The size might get too big for an immediate value, so + // we load it from memory. + adr r5, blob_size + ldr r5, [r5] + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // This initial piece of code might get overwritten when we + // copy stage2, so the actual copying loop shall be after + // stage2 blob. We want this asm code to be PIC, so we're + // computing address of stage2_end into r7. + add r7, r3, r5 + bx r7 + +blob_size: + .word stage2_end - stage2_start +stage1_size: + .word stage1_end - _boot + +.align 4 +stage2_start: + .incbin "kernel_stage2.img" +stage2_end: + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Call stage2 of the kernel (branch to 0x0, + // which is the reset handler). + bx r4 + +stage1_end: diff --git a/kernel_stage1.ld b/kernel_stage1.ld new file mode 100644 index 0000000..3130634 --- /dev/null +++ b/kernel_stage1.ld @@ -0,0 +1,27 @@ +ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */ + +/* Code starts at 0x8000 - that's where RPis in 32-bit mode load + * kernel at. My experiments do, however, show, that qemu emulating + * RPi2 loads the kernel at 0x10000! (took some pain to find out). + * rpi-open-firmware, on the other hand, loads kernel at 0x2000000! + * This is not really a problem, since: + * 1. We can use our bootloader to load the kernel at 0x8000 + * 2. We've rewritten stage 1 of both bootloader and kernel in + * careful assembly, so that they should work regardless of + * where they are loaded. + * 3. In qemu, we can load kernel.elf instead of raw binary + * (qemu will do the right thing then) + */ + +SECTIONS +{ + + . = 0x8000; + + __start = .; + .kernel_stage1 : + { + KEEP(kernel_stage1.o) + } + __end = .; +} diff --git a/kernel_stage2.ld b/kernel_stage2.ld new file mode 100644 index 0000000..15b61ec --- /dev/null +++ b/kernel_stage2.ld @@ -0,0 +1,79 @@ +/* This sesond stage of the kernel is run from address 0x0 */ + +TRANSLATION_TABLE_SIZE = 4096 * 4; +SECTIONS_LIST_SIZE = 4096 * 8; +MMU_SECTION_SIZE = 1 << 20; + +SECTIONS +{ + + . = 0x0; + + __start = .; + .interrupt_vector : + { + KEEP(interrupt_vector.o) + } + . = ALIGN(4); + .embedded_ramfs : + { + ramfs_embeddable.o + } + .rest_of_kernel : + { + *(.text) + *(.data) + *(.rodata) + *(.bss) + *(/COMMON/) + } + __end = .; + + . = ALIGN(1 << 14); + + .translation_table (NOLOAD) : + { + _translation_table_start = .; + + . = . + TRANSLATION_TABLE_SIZE; + + _translation_table_end = .; + } + + .sections_list (NOLOAD) : + { + _sections_list_start = .; + + . = . + SECTIONS_LIST_SIZE; + + _sections_list_end = .; + } + + . = ALIGN(1 << 20); + . = . + MMU_SECTION_SIZE; + + .stack (NOLOAD) : + { + _stack_start = .; + + _fiq_stack_start = .; + + . = . + (1 << 18); + + _fiq_stack_top = .; + + _irq_stack_start = .; + + . = . + (1 << 18); + + _irq_stack_top = .; + + _supervisor_stack_start = .; + + . = . + (1 << 19); + + _supervisor_stack_top = .; + + _stack_end = .; + } +} diff --git a/libkernel.c b/libkernel.c deleted file mode 100644 index 0955e0e..0000000 --- a/libkernel.c +++ /dev/null @@ -1,12 +0,0 @@ -// This is the privileged code, that gets placed somewhere in -// unprivileged process' address space. It's compiled as relocatable, -// so it can be loaded at different address for different processes. - -void call_unprivileged(void (*address) (void)) { - // switch to PL0 - asm("cps #0b10000\n\r" - "isb" ::: "memory"); - - // jump to that unprivileged code - address(); -} diff --git a/libkernel.h b/libkernel.h deleted file mode 100644 index a8af348..0000000 --- a/libkernel.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef LIBKERNEL_H -#define LIBKERNEL_H - -void call_unprivileged(void (*address) (void)); - -#endif // LIBKERNEL_H diff --git a/linker.ld b/linker.ld deleted file mode 100644 index 01eda35..0000000 --- a/linker.ld +++ /dev/null @@ -1,107 +0,0 @@ -ENTRY(_start) - -SECTIONS -{ - /* Starts at LOADER_ADDR, which is 0x8000 - that's where RPis in */ - /* 32-bit mode load kernel at. */ - /* My experiments do, however, show, that qemu emulating RPi2 */ - /* loads the kernel at 0x10000! (took some pain to find out). */ - /* Since we're using a bootloader now, we can compile the kernel */ - /* for 0x8000 and bootloader will load it properly (although it */ - /* itself still has to be compiled for 0x10000) */ - /* rpi-open-firmware, ont he other hand, loads kernel at 0x2000000 */ - /* This issue is also to be avoided by the use of bootloader */ - . = 0x8000; - /* For AArch64, use . = 0x80000; Unless this too is wrong in qemu… */ - - __start = .; - __text_start = .; - .text : - { - KEEP(*(.text.boot)) - *(.text) - } - . = ALIGN(4096); /* align to page size */ - __text_end = .; - - __rodata_start = .; - .rodata : - { - *(.rodata) - } - . = ALIGN(4096); /* align to page size */ - __rodata_end = .; - - __data_start = .; - .data : - { - *(.data) - } - . = ALIGN(4096); /* align to page size */ - __data_end = .; - - __bss_start = .; - .bss : - { - bss = .; - *(.bss) - } - . = ALIGN(4096); /* align to page size */ - __bss_end = .; - __bss_size = __bss_end - __bss_start; - - - - /* Here come the definitions for renamed sections */ - __renamed_start = .; - __renamed_text_start = .; - .renamed_text : - { - *(.renamed_text) - } - . = ALIGN(4096); /* align to page size */ - __renamed_text_end = .; - - __renamed_rodata_start = .; - .renamed_rodata : - { - *(.renamed_rodata) - } - . = ALIGN(4096); /* align to page size */ - __renamed_rodata_end = .; - - __renamed_data_start = .; - .renamed_data : - { - *(.renamed_data) - } - . = ALIGN(4096); /* align to page size */ - __renamed_data_end = .; - - __renamed_bss_start = .; - .renamed_bss : - { - renamed_bss = .; - *(.renamed_bss) - } - . = ALIGN(4096); /* align to page size */ - __renamed_bss_end = .; - __renamed_bss_size = __renamed_bss_end - __renamed_bss_start; - __renamed_end = .; - __renamed_size = __renamed_end - __renamed_start; - - - . = ALIGN(4096); /* align to page size */ - __interrupt_vectors_start = .; - .interrupt_vectors : - { - interrupt_vectors = .; - *(.interrupt_vectors.text) - *(.interrupt_vectors.data) - } - . = ALIGN(4096); /* align to page size */ - __interrupt_vectors_end = .; - __interrupt_vectors_size = __interrupt_vectors_end - __interrupt_vectors_start; - - __end = .; -} diff --git a/loader_stage1.S b/loader_stage1.S new file mode 100644 index 0000000..9326360 --- /dev/null +++ b/loader_stage1.S @@ -0,0 +1,55 @@ +/* arm mode, cortex-a7 compatibility + * + * _boot is entry point for the loader. + * + * Loader copies it's embedded stage 2 to address 0x4000 + * and jumps to it. Registers r0 - r2 are arguments for the kernel + * and should be left intact. + */ + +.global _boot +_boot: + // Only let the first core execute + mrc p15, 0, r3, c0, c0, 5 + and r3, r3, #3 + cmp r3, #0 + beq proceed + // this is a kind of blef - races can theoretically still occur + // when the main core overwrites this part of memory + wfe + +proceed: + // copy stage2 of the loader to address 0x4000 + + // first, load address of stage2_start to r3 (a PIC way) + adr r3, stage2_start + + // load destination address for stage2 code to r4 + mov r4, #0x4000 + + // load blob size to r5 + mov r5, #(stage2_end - stage2_start) + + // r6 is the counter - counts the bytes copied + mov r6, #0 + + // each word of the blob is loaded to r7 and stored + // from r7 to it's destination in a loop +loop: + ldr r7, [r3, r6] + str r7, [r4, r6] + add r6, r6, #4 + cmp r6, r5 + blo loop + + // Initialize the stack + // _stack_top is defined in loader_stage1_linker.ld + ldr sp, =_stack_top + + // Call stage2 of the loader (branch to 0x4000) + bx r4 + +.align 4 +stage2_start: + .incbin "loader_stage2.img" +stage2_end: diff --git a/loader_stage1.c b/loader_stage1.c deleted file mode 100644 index d209c15..0000000 --- a/loader_stage1.c +++ /dev/null @@ -1,25 +0,0 @@ -#include <stddef.h> -#include <stdint.h> -#include <global.h> - -char *const stage2_addr = ((void*) 0x4000); - -// there's one tricky thing about embedding file in executable; -// mainly, symbols are visible to c code as extern chars, but the actual -// values are their adresses... see the code below -extern char - _binary_loader_stage2_img_start, - _binary_loader_stage2_img_end, - _binary_loader_stage2_img_size; - -void kernel_main(uint32_t r0, uint32_t r1, uint32_t atags) -{ - // stage2 of the bootloader is a blob embedded in executable; - // copy it over to it's destination place - // TODO implement a memcpy() somewhere and use it instead of loops - for (size_t i = 0; i < (size_t) &_binary_loader_stage2_img_size; i++) - stage2_addr[i] = (&_binary_loader_stage2_img_start)[i]; - - // jump to stage2 - ((void(*)(uint32_t, uint32_t, uint32_t))stage2_addr)(r0, r1, atags); -} diff --git a/loader_stage1.ld b/loader_stage1.ld deleted file mode 100644 index 18fe477..0000000 --- a/loader_stage1.ld +++ /dev/null @@ -1,56 +0,0 @@ -ENTRY(_start) - -SECTIONS -{ - /* Starts at LOADER_ADDR. */ - /* Warning! Internet says RPis in 32-bit mode load binary at 0x8000! */ - /* My experiments do, however, show, that qemu emulating RPi2 */ - /* loads it at 0x10000! (took some pain to find out) */ - - /* . = 0x10000; */ - - /* rpi-open-firmware, on the other hand, loads it at 0x2000000 */ - /* (and this should be not-so-hard to change by modifying the */ - /* firmware) */ - - . = 0x2000000; - - /* For AArch64, use . = 0x80000; Unless this too is wrong */ - - __start = .; - __text_start = .; - .text : - { - KEEP(*(.text.boot)) - *(.text) - } - . = ALIGN(4096); /* align to page size */ - __text_end = .; - - __rodata_start = .; - .rodata : - { - *(.rodata) - } - . = ALIGN(4096); /* align to page size */ - __rodata_end = .; - - __data_start = .; - .data : - { - *(.data) - } - . = ALIGN(4096); /* align to page size */ - __data_end = .; - - __bss_start = .; - .bss : - { - bss = .; - *(.bss) - } - . = ALIGN(4096); /* align to page size */ - __bss_end = .; - __bss_size = __bss_end - __bss_start; - __end = .; -} diff --git a/loader_stage1_linker.ld b/loader_stage1_linker.ld new file mode 100644 index 0000000..711fcbf --- /dev/null +++ b/loader_stage1_linker.ld @@ -0,0 +1,16 @@ +ENTRY(_boot) + +SECTIONS +{ + /* see linker.ld for details */ + . = 0x2000000; + + __start = .; + loader_stage1 : + { + KEEP(loader_stage1.o) + } + __end = .; + + _stack_top = 0x8000; +} diff --git a/loader_stage2.c b/loader_stage2.c index e221dda..15d2003 100644 --- a/loader_stage2.c +++ b/loader_stage2.c @@ -1,27 +1,22 @@ #include <stddef.h> #include <stdint.h> #include <uart.h> +#include <io.h> #include <global.h> void *const kernel_load_addr = ((void*) 0x8000); -void __attribute__((section(".text.stage2main"))) -stage2(uint32_t r0, uint32_t r1, uint32_t atags) +void _stage2_main(uint32_t r0, uint32_t r1, uint32_t atags) { - // Declare as unused - (void) r0; - (void) r1; - (void) atags; - uart_init(); // get kernel size via uart (little endian) uint32_t b0, b1, b2, b3; - b0 = uart_getc(); - b1 = uart_getc(); - b2 = uart_getc(); - b3 = uart_getc(); + b0 = getchar(); + b1 = getchar(); + b2 = getchar(); + b3 = getchar(); uint32_t kernel_size = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); @@ -29,12 +24,10 @@ stage2(uint32_t r0, uint32_t r1, uint32_t atags) char *dst = kernel_load_addr, *end = dst + kernel_size; while (dst < end) - *(dst++) = uart_getc(); + *(dst++) = getchar(); // jump to kernel - // TODO also forward arguments (r0, r1, atags) - asm volatile("bx %0" :: "r" (kernel_load_addr) : "memory"); + ((void(*)(uint32_t, uint32_t, uint32_t)) kernel_load_addr) + (r0, r1, atags); } -void *const _start = ((void*) stage2); // for linker script - diff --git a/loader_stage2.ld b/loader_stage2.ld deleted file mode 100644 index 8f215e9..0000000 --- a/loader_stage2.ld +++ /dev/null @@ -1,44 +0,0 @@ -ENTRY(_start) - -SECTIONS -{ - /* stage2 bootloader gets loaded at 0x4000 */ - . = 0x4000; - __start = .; - __text_start = .; - .text : - { - /* have entry point at the beginning */ - KEEP(*(.text.stage2main)) - *(.text) - } - . = ALIGN(4096); /* align to page size */ - __text_end = .; - - __rodata_start = .; - .rodata : - { - *(.rodata) - } - . = ALIGN(4096); /* align to page size */ - __rodata_end = .; - - __data_start = .; - .data : - { - *(.data) - } - . = ALIGN(4096); /* align to page size */ - __data_end = .; - - __bss_start = .; - .bss : - { - bss = .; - *(.bss) - } - . = ALIGN(4096); /* align to page size */ - __bss_end = .; - __bss_size = __bss_end - __bss_start; - __end = .; -} diff --git a/loader_stage2_linker.ld b/loader_stage2_linker.ld new file mode 100644 index 0000000..33e79e9 --- /dev/null +++ b/loader_stage2_linker.ld @@ -0,0 +1,16 @@ +ENTRY(_stage2_main) + +SECTIONS +{ + /* see loader_stage1.S for details */ + . = 0x4000; + + __start = .; + loader_stage2 : + { + KEEP(loader_stage2.o(.text)) + loader_stage2.o + uart.o + } + __end = .; +} diff --git a/makefs.c b/makefs.c new file mode 100644 index 0000000..379e8c5 --- /dev/null +++ b/makefs.c @@ -0,0 +1,97 @@ +// Take files given on stdin and make them into a ramfs image of our +// own, (stupid) simple format. +// In the format: for each file comes the null-terminated string +// with filename, then null-padding until a 4-aligned offset, then +// 4-byte little-endian size of the file and then the contents +// of the file and then another null-padding until a 4-aligned offset. +// Files encoded this way go one after another (so it's easy to add +// something at the beginning). +// At the and comes one null-byte (as if a file with empty name +// was there). + +#include <stdint.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <err.h> +#include <sys/stat.h> +#include <string.h> + +#define ANSI_FG_RED "\033[0;31m" +#define ANSI_FG_DEFAULT "\033[0;39m" + +int main(int argc, char **argv) +{ + // process files in the order they are provided on the command line + for (int i = 1; i < argc; i++) + { + struct stat fileinfo; + + if (stat(argv[i], &fileinfo)) + err(-1, "couldn't stat " ANSI_FG_RED "%s" ANSI_FG_DEFAULT, + argv[i]); + + if (!S_ISREG(fileinfo.st_mode)) + errx(-1, ANSI_FG_RED "%s" ANSI_FG_DEFAULT + " is not a regular file.", argv[i]); + + // don't allow files with size so big, that it can't be encoded + // in a 4-byte unsigned int... In practice even smaller files + // won't fit on the rpi. + if (fileinfo.st_size > UINT32_MAX) + errx(-1, ANSI_FG_RED "%s" ANSI_FG_DEFAULT + " is too big.", argv[i]); + + uint32_t file_size = fileinfo.st_size; + uint32_t name_size = strlen(argv[i]) + 1; // 1 for null-byte + + if (fwrite(argv[i], 1, name_size, stdout) != name_size) + errx(-1, "error writing to stdout"); + + // pad with null-bytes until a 4-aligned offset + for (uint32_t j = 0; (j + name_size) & 0b11; j++) + if (putchar('\0')) + errx(-1, "error writing to stdout"); + + // TODO convert file_size to little endian first (in case our + // host is be). + if (fwrite(&file_size, 4, 1, stdout) != 1) + errx(-1, "error writing to stdout"); + + // flush b4 running cat, so that stuff we've written comes + // b4 the actual file contents in the output + if (fflush(stdout)) + err(-1, "couldn't flush stdout"); + + // we don't copy the actual file ourselves - we run cat for that + pid_t pid; + int wstatus; + switch (pid = fork()) + { + case -1: + err(-1, "couldn't fork"); + case 0: + if (execlp("cat", "cat", argv[i], NULL)) + err(-1, "couldn't execute cat"); + default: + if (wait(&wstatus) == -1) + err(-1, "error waiting for child"); + + if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) + exit(-1); + } + + // again, pad with null-bytes until a 4-aligned offset + for (uint32_t j = 0; (j + file_size) & 0b11; j++) + if (putchar('\0')) + errx(-1, "error writing to stdout"); + } + + if (putchar('\0')) + errx(-1, "error writing to stdout"); + + return 0; +} + @@ -1,35 +1,72 @@ #ifndef MEMORY_H #define MEMORY_H -#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0) +#include <stddef.h> + +// These macros were heavily used b4 I moved all the address +// computation to the linker script. Now I'm just keeping them +// in case they're needed for something else :) +#define POWER_OF_2(EXP) (((size_t) 1) << EXP) + +#define ALIGN_POWER_OF_2(ADDR, EXP) \ + (((ADDR - 1) & ~(POWER_OF_2(EXP) - 1)) + POWER_OF_2(EXP)) + +#define SECTION_SIZE POWER_OF_2(20) + +#define ALIGN_SECTION(ADDR) ALIGN_POWER_OF_2(ADDR, 20) -#define STACK_START ((uint32_t) 0x4000) -#define STACK_END ((uint32_t) 0x8000) +// memory layout + +#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0) + +// all those symbols are defined in the linker script extern char __end; extern char __start; +extern char _translation_table_start; +extern char _translation_table_end; +extern char _sections_list_start; +extern char _sections_list_end; +extern char _stack_start; +extern char _fiq_stack_start; +extern char _fiq_stack_top; +extern char _irq_stack_start; +extern char _irq_stack_top; +extern char _supervisor_stack_start; +extern char _supervisor_stack_top; +extern char _stack_end; -#define KERNEL_START ((uint32_t) &__start) -#define KERNEL_END ((uint32_t) &__end) +#define KERNEL_START ((size_t) &__start) // this is 0x0 +#define KERNEL_END ((size_t) &__end) // first 2^14 aligned address after the kernel -#define TRANSLATION_TABLE_BASE \ - (((KERNEL_END - (uint32_t) 1) & ~((uint32_t) 0x3fff)) \ - + (uint32_t) 0x4000) +#define TRANSLATION_TABLE_BASE ((size_t) &_translation_table_start) +#define TRANSLATION_TABLE_END ((size_t) &_translation_table_end) -#define TRANSLATION_TABLE_END \ - (TRANSLATION_TABLE_BASE + (uint32_t) (4096 * 4)) +// another 32KB after the translation table are used for sections list +#define SECTIONS_LIST_START ((size_t) &_sections_list_start) +#define SECTIONS_LIST_END ((size_t) &_sections_list_end) -#define PRIVILEGED_MEMORY_END TRANSLATION_TABLE_END +// first section after the translation table is left unused; +// the next section is used as the stack +#define STACK_START ((size_t) &_stack_start) +#define FIQ_STACK_START ((size_t) &_fiq_stack_start) +#define FIQ_STACK_END ((size_t) &_fiq_stack_top) +#define IRQ_STACK_START ((size_t) &_irq_stack_start) +#define IRQ_STACK_END ((size_t) &_irq_stack_top) +#define SUPERVISOR_STACK_START ((size_t) &_supervisor_stack_start) +#define SUPERVISOR_STACK_END ((size_t) &_supervisor_stack_top) +#define STACK_END ((size_t) &_stack_end) -#define UNPRIVILEGED_MEMORY_START \ - (((PRIVILEGED_MEMORY_END - (uint32_t) 1) & ~((uint32_t) 0xfffff)) \ - + (uint32_t) 0x100000) +#define PRIVILEGED_MEMORY_END STACK_END -#define PL0_SECTION_NUMBER ((uint32_t) 0b101010101010) -#define PL0_SECTION_NUMBER_STR "0b101010101010" + +// the following describes the virtual section for our PL0 programs +#define PL0_SECTION_NUMBER ((size_t) 0xaaa) #define VIRTUAL_PL0_MEMORY_START (PL0_SECTION_NUMBER << 20) +#define VIRTUAL_PL0_MEMORY_END \ + (VIRTUAL_PL0_MEMORY_START + SECTION_SIZE) #endif // MEMORY_H - + @@ -1,24 +1,23 @@ #include "cp_regs.h" -#include "uart.h" #include "strings.h" #include "memory.h" #include "translation_table_descriptors.h" +#include "io.h" + +#include "paging.h" void setup_flat_map(void) { - char bits[33]; // for printing uint32_t bit values - // compute translation table base address // translation table shall start at first 2^14-bytes aligned // address after the kernel image - uint32_to_bits(TRANSLATION_TABLE_BASE, bits); - uart_puts("binary representation of chosen" - " lvl1 translation table address: "); - uart_puts(bits); uart_puts("\n\r"); + prints("chosen lvl1 translation table address: 0x"); + printhex(TRANSLATION_TABLE_BASE); + puts(""); // flat map all memory - uart_puts("preparing translation table\n\r"); + puts("preparing translation table"); short_descriptor_lvl1_t volatile *translation_table = (short_descriptor_lvl1_t*) TRANSLATION_TABLE_BASE; @@ -35,8 +34,7 @@ void setup_flat_map(void) }; // meddle with domain settings - uart_puts("setting domain0 to client access" - " and blocking other domains\n\r"); + puts("setting domain0 to client access and blocking other domains"); DACR_t DACR = 0; DACR = set_domain_permissions(DACR, 0, DOMAIN_CLIENT_ACCESS); @@ -55,7 +53,7 @@ void setup_flat_map(void) // we also disable data and instruction caches and the MMU // some of this is redundant (i.e. MMU should already be disabled) - uart_puts("setting C, I, AFE and TRE to 0 in SCTLR\n\r"); + puts("setting C, I, AFE and TRE to 0 in SCTLR"); SCTLR_t SCTLR; asm("mrc p15, 0, %0, c1, c0, 0" : "=r" (SCTLR.raw)); @@ -70,8 +68,8 @@ void setup_flat_map(void) // TODO: move invalidation instructions to some header as inlines - uart_puts("invalidating instruction cache, branch prediction," - " and entire main TLB\n\r"); + puts("invalidating instruction cache, branch prediction," + " and entire main TLB"); // invalidate instruction cache asm("mcr p15, 0, r0, c7, c5, 0\n\r" // r0 gets ignored @@ -86,8 +84,7 @@ void setup_flat_map(void) "isb" :: "r" (0) : "memory"); // now set TTBCR to use TTBR0 exclusively - uart_puts("Setting TTBCR.N to 0, so that" - " TTBR0 is used everywhere\n\r"); + puts("Setting TTBCR.N to 0, so that TTBR0 is used everywhere"); uint32_t TTBCR = 0; asm("mcr p15, 0, %0, c2, c0, 2" :: "r" (TTBCR)); @@ -102,7 +99,7 @@ void setup_flat_map(void) asm("mcr p15, 0, %0, c2, c0, 0" :: "r" (TTBR0.raw)); // enable MMU - uart_puts("enabling the MMU\n\r"); + puts("enabling the MMU"); // redundant - we already have SCTLR contents in the variable // asm("mrc p15, 0, %0, c1, c0, 0" : "=r" (SCTLR.raw)); @@ -112,3 +109,141 @@ void setup_flat_map(void) asm("mcr p15, 0, %0, c1, c0, 0\n\r" "isb" :: "r" (SCTLR.raw) : "memory"); } + +#define OWNER_FREE ((void*) 0) +#define OWNER_KERNEL ((void*) 1) +#define OWNER_SPLIT ((void*) 2) + +// we want to maintain a list of free and used physical sections +struct section_node +{ + // we're going to add processes, process management and + // struct process. Then, owner will be struct process*. + void *owner; // 0 if free, 1 if used by kernel, 2 if split to pages + + // it's actually a 2-directional lists; + // end of list is marked by reference to SECTION_NULL; + // we use offsets into sections_list array instead of pointers; + uint16_t prev, next; +}; + +static struct section_node volatile *sections_list; + +static uint16_t + all_sections_count, kernel_sections_count, + split_sections_count, free_sections_count; + +// those are SECTION_NULL when the corresponding count is 0; +static uint16_t + first_free_section, first_kernel_section, first_split_section; + +void setup_pager_structures(uint32_t available_mem) +{ + all_sections_count = available_mem / SECTION_SIZE; + kernel_sections_count = PRIVILEGED_MEMORY_END / SECTION_SIZE; + free_sections_count = all_sections_count - kernel_sections_count; + split_sections_count = 0; + + sections_list = (struct section_node*) SECTIONS_LIST_START; + + first_split_section = SECTION_NULL; + + for (uint16_t i = 0; i < kernel_sections_count; i++) + sections_list[i] = (struct section_node) { + .owner = OWNER_KERNEL, + .prev = i == 0 ? SECTION_NULL : i - 1, + .next = i == kernel_sections_count - 1 ? SECTION_NULL : i + 1 + }; + + first_kernel_section = 0; + + for (uint16_t i = kernel_sections_count; + i < all_sections_count; i++) + sections_list[i] = (struct section_node) { + .owner = OWNER_FREE, + .prev = i == kernel_sections_count ? SECTION_NULL : i - 1, + .next = i == all_sections_count - 1 ? SECTION_NULL : i + 1 + }; + + first_free_section = kernel_sections_count; + + puts("Initialized kernel's internal structures for paging"); + prints("We have "); printdect(free_sections_count); + puts(" free sections left for use"); +} + +// return section number or SECTION_NULL in case of failure +static uint16_t claim_section(void *owner) +{ + if (!free_sections_count) + return SECTION_NULL; // failure + + uint16_t section = first_free_section; + + if (--free_sections_count) + { + uint16_t next; + + next = sections_list[section].next; + sections_list[next].prev = SECTION_NULL; + + first_free_section = next; + } + else + first_free_section = SECTION_NULL; + + if (owner == OWNER_KERNEL) + { + sections_list[first_kernel_section].prev = section; + + sections_list[section] = (struct section_node) { + .owner = owner, + .prev = SECTION_NULL, + .next = first_kernel_section + }; + + kernel_sections_count++; + + first_kernel_section = section; + } + else + sections_list[section] = (struct section_node) { + .owner = owner, + .prev = SECTION_NULL, + .next = SECTION_NULL + }; + + return section; +} + +// return values like claim_section() +uint16_t claim_and_map_section +(void *owner, uint16_t where_to_map, uint8_t access_permissions) +{ + uint16_t section = claim_section(owner); + + if (section == SECTION_NULL) + return section; + + short_section_descriptor_t volatile *section_entry = + &((short_section_descriptor_t*) + TRANSLATION_TABLE_BASE)[where_to_map]; + + short_section_descriptor_t descriptor = *section_entry; + + // set up address of section + descriptor.SECTION_BASE_ADDRESS_31_20 = section; + + // set requested permissions on section + descriptor.ACCESS_PERMISSIONS_2 = access_permissions >> 2; + descriptor.ACCESS_PERMISSIONS_1_0 = access_permissions & 0b011; + + // write modified descriptor to the table + *section_entry = descriptor; + + // invalidate main Translation Lookup Buffer + asm("mcr p15, 0, r1, c8, c7, 0\n\r" + "isb" ::: "memory"); + + return section; +} @@ -3,4 +3,12 @@ void setup_flat_map(void); +void setup_pager_structures(uint32_t available_mem); + +#define SECTION_NULL 0xffff + +// returns section number or SECTION_NULL in case of failure +uint16_t claim_and_map_section +(void *owner, uint16_t where_to_map, uint8_t access_permissions); + #endif // PAGING_H @@ -64,4 +64,25 @@ inline static PSR_t read_CPSR(void) return CPSR; } +inline static void write_CPSR(PSR_t CPSR) +{ + // set content of current program status register + asm("msr cpsr, %0" :: "r" (CPSR.raw) : "memory"); +} + +inline static PSR_t read_SPSR(void) +{ + PSR_t SPSR; + // get content of saved program status register + asm("mrs %0, spsr" : "=r" (SPSR.raw) :: "memory"); + + return SPSR; +} + +inline static void write_SPSR(PSR_t SPSR) +{ + // set content of saved program status register + asm("msr spsr, %0" :: "r" (SPSR.raw)); +} + #endif // PSR_H @@ -0,0 +1,65 @@ +// driver for the read-only ramfs +// see makefs.c for details + +#include <stdint.h> +#include "ramfs.h" + +static int strcmp(char const *str1, char const *str2) +{ + while (1) + { + int c1 = (unsigned char) *str1, c2 = (unsigned char) *str2; + + if (!c1 && !c2) + return 0; + + if (c1 != c2) + return c1 - c2; + + str1++; str2++; + } +} + +static uint32_t strlen(char const *str1) +{ + uint32_t len = 0; + + while (str1[len]) + len++; + + return len; +} + +static inline char *align4(char *addr) +{ + return (char*) (((uint32_t) addr - 1) & ~0b11) + 4; +} + +int find_file(void *ramfs, char *filename, struct ramfile *buf) +{ + char *fs_file = ramfs; + + while (*fs_file) + { + uint32_t *fs_file_size = (uint32_t*) + align4(fs_file + strlen(fs_file) + 1); + + char *fs_file_contents = (char*) (fs_file_size + 1); + + if (!strcmp(fs_file, filename)) + { + buf->file_size = *fs_file_size; + + buf->file_name = fs_file; + + buf->file_contents = fs_file_contents; + + return 0; + } + + // move to the next file in ramfs + fs_file = align4(fs_file_contents + *fs_file_size); + } + + return -1; // reached end of ramfs; file not found +} @@ -0,0 +1,16 @@ +#ifndef RAMFS_H +#define RAMFS_H + +struct ramfile +{ + char *file_name; + uint32_t file_size; + char *file_contents; +}; + +// search for file named filename in ramfs; +// If found - return 0 and fill buf fields with file's info. +// Otherwise return a non-zero value. +int find_file(void *ramfs, char *filename, struct ramfile *buf); + +#endif // RAMFS_H @@ -0,0 +1,119 @@ +#include "uart.h" +#include "io.h" +#include "demo_functionality.h" +#include "paging.h" +#include "atags.h" +// for POWER_OF_2() macro... perhaps the macro should be moved +#include "memory.h" +#include "armclock.h" + +void setup(uint32_t r0, uint32_t machine_type, + struct atag_header *atags) +{ + uart_init(); + + // When we attach screen session after loading kernel with socat + // we miss kernel's greeting... So we'll make the kernel wait for + // one char we're going to send from within screen + getchar(); + + puts("Hello, kernel World!"); + + prints("ARM machine type: 0x"); printhext(machine_type); puts(""); + + uint32_t memory_size = 0; + + // value 3 introduced by stage1 code means no atags was found + if (r0 == 3) + puts("No ATAGS was found!"); + else + { + prints("ATAGS copied to 0x"); + printhex((uint32_t) atags); puts(""); + + puts("__ ATAGS contents __"); + + print_atags(atags); + + puts("__ end of ATAGS contents __"); + + memory_size = find_memory_size(atags); + } + + if (memory_size) + { + char *unit; + uint32_t size_in_unit; + + if (memory_size % POWER_OF_2(10)) + { + unit = "B"; + size_in_unit = memory_size; + } + else if (memory_size % POWER_OF_2(20)) + { + unit = "KB"; + size_in_unit = memory_size / POWER_OF_2(10); + } + else if (memory_size % POWER_OF_2(30)) + { + unit = "MB"; + size_in_unit = memory_size / POWER_OF_2(20); + } + else + { + unit = "GB"; + size_in_unit = memory_size / POWER_OF_2(30); + } + + prints("memory available: "); + printdect(size_in_unit); puts(unit); + } + else + { + // Most Pis have more, but qemu might give us little + puts("Couldn't determine available memory - assuming 192MB"); + memory_size = 192 * POWER_OF_2(20); + } + + // assume we need at least one section for PL0 + if (memory_size < PRIVILEGED_MEMORY_END + SECTION_SIZE) + { + puts("Not enough memory to continue"); + while (1); + } + + // prints some info + demo_paging_support(); + + // prints some info + demo_current_mode(); + + setup_pager_structures(memory_size); + + // prints some info and sets upp translation table, turns on MMU + setup_flat_map(); + + // prints some info and sets up a section for PL0 code, + // loads a blob there + demo_setup_PL0(); + + // sets some general settings for arm timer + armclk_init(); + + // turns on irq from arm timer + armclk_enable_timer_irq(); + + // jumps to unprivileged code... never, ever, ever returns + demo_go_unprivileged(); + + while (1) + { + char c = getchar(); + + if (c == '\r') + putchar('\n'); + + putchar(c); + } +} diff --git a/strings.c b/strings.c new file mode 100644 index 0000000..368d7dc --- /dev/null +++ b/strings.c @@ -0,0 +1,119 @@ +#include "strings.h" + +void uint32_to_dec(uint32_t number, char buf[10]) +{ + for (int i = 0; i < 10; i++) + { + buf[10 - 1 - i] = '0' + (number % 10); + number /= 10; + } +} + +void uint32_to_hex(uint32_t number, char buf[8]) +{ + for (int i = 0; i < 8; i++) + { + unsigned char quadbit = (number >> ((8 - i - 1) * 4)) & 0xf; + buf[i] = quadbit > 9 ? quadbit - 10 + 'a' : quadbit + '0'; + } +} + +void uint32_to_bin(uint32_t number, char buf[32]) +{ + for (int i = 0; i < 32; i++) + buf[i] = ((number >> (32 - i - 1)) & 1) ? '1' : '0'; +} + +void uint32_to_decstring(uint32_t number, char buf[11]) +{ + uint32_to_dec(number, buf); + buf[10] = '\0'; +} + +void uint32_to_hexstring(uint32_t number, char buf[9]) +{ + uint32_to_hex(number, buf); + buf[8] = '\0'; +} + +void uint32_to_binstring(uint32_t number, char buf[33]) +{ + uint32_to_bin(number, buf); + buf[32] = '\0'; +} + +void trim_0s(char string[]) +{ + size_t i; + for (i = 0; string[i] == '0'; i++); + + size_t j = 0; + + if (!string[i]) + string[j++] = string[--i]; + + do + string[j] = string[i + j]; + while (string[j++]); +} + +void uint32_to_decstringt(uint32_t number, char buf[11]) +{ + uint32_to_decstring(number, buf); + trim_0s(buf); +} + +void uint32_to_hexstringt(uint32_t number, char buf[9]) +{ + uint32_to_hexstring(number, buf); + trim_0s(buf); +} + +size_t strlen(char string[]) +{ + size_t len; + + for (len = 0; string[len]; len++); + + return len; +} + +void memcpy(void *dst, void *src, size_t nbytes) +{ + size_t iter; + + // copying by word is faster than by byte, + // but can easily cause alignment faults, so we resign from it... + for (iter = 0; iter < nbytes ; iter++) + ((volatile uint8_t*) dst)[iter] = ((uint8_t*) src)[iter]; +} + +// keep in mind memset is also needed for array initialization, like +// uint32_t buf[16] = {0}; +// gcc compiles this to memset call + +void *memset(void *s, int c, size_t n) +{ + volatile char *mem = s; + + for (size_t i = 0; i < n; i++) + mem[i] = c; + + return s; +} + +char *strcat(char *dst, const char *src) +{ + char *where_to_append; + + for (where_to_append = dst; *where_to_append; where_to_append++); + + size_t i; + + for (i = 0; src[i]; i++) + ((char volatile*) where_to_append)[i] = src[i]; + + ((char volatile*) where_to_append)[i] = '\0'; + + return dst; +} @@ -1,14 +1,33 @@ #ifndef STRINGS_H #define STRINGS_H +#include <stddef.h> #include <stdint.h> -void uint32_to_bits(uint32_t number, char *buf) -{ - for (int i = 0; i < 32; i++) - buf[i] = ((number >> (32 - i - 1)) & 1) ? '1' : '0'; +void uint32_to_dec(uint32_t number, char buf[10]); - buf[32] = '\0'; -} +void uint32_to_hex(uint32_t number, char buf[8]); +void uint32_to_bin(uint32_t number, char buf[32]); + +void uint32_to_decstring(uint32_t number, char buf[11]); + +void uint32_to_hexstring(uint32_t number, char buf[9]); + +void uint32_to_binstring(uint32_t number, char buf[33]); + +void trim_0s(char string[]); + +void uint32_to_decstringt(uint32_t number, char buf[11]); + +void uint32_to_hexstringt(uint32_t number, char buf[9]); + +size_t strlen(char string[]); + +void memcpy(void *dst, void *src, size_t nbytes); + +void *memset(void *s, int c, size_t n); + +char *strcat(char *dst, const char *src); + #endif // STRINGS_H @@ -0,0 +1,5 @@ +.global svc + +svc: + svc #0 + mov pc, lr diff --git a/svc_interface.h b/svc_interface.h new file mode 100644 index 0000000..aa478ce --- /dev/null +++ b/svc_interface.h @@ -0,0 +1,11 @@ +#ifndef SVC_INTERFACE_H +#define SVC_INTERFACE_H + +enum svc_type + { + UART_PUTCHAR, + UART_GETCHAR, + UART_WRITE + }; + +#endif // SVC_INTERFACE_H @@ -63,22 +63,16 @@ void uart_init() mmio_write(UART0_CR, (1 << 0) | (1 << 8) | (1 << 9)); } -void uart_putc(unsigned char c) +void putchar(char c) { // Wait for UART to become ready to transmit. while ( mmio_read(UART0_FR) & (1 << 5) ) { } mmio_write(UART0_DR, c); } -unsigned char uart_getc() +char getchar(void) { // Wait for UART to have received something. while ( mmio_read(UART0_FR) & (1 << 4) ) { } return mmio_read(UART0_DR); } - -void uart_puts(const char* str) -{ - for (size_t i = 0; str[i] != '\0'; i ++) - uart_putc((unsigned char)str[i]); -} @@ -4,43 +4,40 @@ #include <stddef.h> #include <stdint.h> #include <global.h> -enum - { - // The offsets for reach register. - // Controls actuation of pull up/down to ALL GPIO pins. - GPPUD = (GPIO_BASE + 0x94), +// The offsets for reach register. - // Controls actuation of pull up/down for specific GPIO pin. - GPPUDCLK0 = (GPIO_BASE + 0x98), +// Controls actuation of pull up/down to ALL GPIO pins. +#define GPPUD (GPIO_BASE + 0x94) - // The base address for UART. - UART0_BASE = 0x3F201000, // for raspi2 & 3, 0x20201000 for raspi1 +// Controls actuation of pull up/down for specific GPIO pin. +#define GPPUDCLK0 (GPIO_BASE + 0x98) - // The offsets for reach register for the UART. - UART0_DR = (UART0_BASE + 0x00), - UART0_RSRECR = (UART0_BASE + 0x04), - UART0_FR = (UART0_BASE + 0x18), - UART0_ILPR = (UART0_BASE + 0x20), - UART0_IBRD = (UART0_BASE + 0x24), - UART0_FBRD = (UART0_BASE + 0x28), - UART0_LCRH = (UART0_BASE + 0x2C), - UART0_CR = (UART0_BASE + 0x30), - UART0_IFLS = (UART0_BASE + 0x34), - UART0_IMSC = (UART0_BASE + 0x38), - UART0_RIS = (UART0_BASE + 0x3C), - UART0_MIS = (UART0_BASE + 0x40), - UART0_ICR = (UART0_BASE + 0x44), - UART0_DMACR = (UART0_BASE + 0x48), - UART0_ITCR = (UART0_BASE + 0x80), - UART0_ITIP = (UART0_BASE + 0x84), - UART0_ITOP = (UART0_BASE + 0x88), - UART0_TDR = (UART0_BASE + 0x8C), - }; +// The base address for UART. +#define UART0_BASE (GPIO_BASE + 0x1000) + +// The offsets for reach register for the UART. +#define UART0_DR (UART0_BASE + 0x00) +#define UART0_RSRECR (UART0_BASE + 0x04) +#define UART0_FR (UART0_BASE + 0x18) +#define UART0_ILPR (UART0_BASE + 0x20) +#define UART0_IBRD (UART0_BASE + 0x24) +#define UART0_FBRD (UART0_BASE + 0x28) +#define UART0_LCRH (UART0_BASE + 0x2C) +#define UART0_CR (UART0_BASE + 0x30) +#define UART0_IFLS (UART0_BASE + 0x34) +#define UART0_IMSC (UART0_BASE + 0x38) +#define UART0_RIS (UART0_BASE + 0x3C) +#define UART0_MIS (UART0_BASE + 0x40) +#define UART0_ICR (UART0_BASE + 0x44) +#define UART0_DMACR (UART0_BASE + 0x48) +#define UART0_ITCR (UART0_BASE + 0x80) +#define UART0_ITIP (UART0_BASE + 0x84) +#define UART0_ITOP (UART0_BASE + 0x88) +#define UART0_TDR (UART0_BASE + 0x8C) void uart_init(); -void uart_putc(unsigned char c); -unsigned char uart_getc(); -void uart_puts(const char* str); +void putchar(char c); +char getchar(void); #endif // UART_H |