aboutsummaryrefslogtreecommitdiff
path: root/src/arm/PL1
diff options
context:
space:
mode:
Diffstat (limited to 'src/arm/PL1')
-rw-r--r--src/arm/PL1/PL1_common/global.h38
-rw-r--r--src/arm/PL1/PL1_common/uart.c103
-rw-r--r--src/arm/PL1/PL1_common/uart.h106
-rw-r--r--src/arm/PL1/kernel/armclock.h76
-rw-r--r--src/arm/PL1/kernel/atags.c103
-rw-r--r--src/arm/PL1/kernel/atags.h102
-rw-r--r--src/arm/PL1/kernel/bcmclock.h35
-rw-r--r--src/arm/PL1/kernel/cp_regs.h114
-rw-r--r--src/arm/PL1/kernel/demo_functionality.c121
-rw-r--r--src/arm/PL1/kernel/demo_functionality.h16
-rw-r--r--src/arm/PL1/kernel/interrupt_vector.S56
-rw-r--r--src/arm/PL1/kernel/interrupts.c135
-rw-r--r--src/arm/PL1/kernel/interrupts.h47
-rw-r--r--src/arm/PL1/kernel/kernel_stage1.S168
-rw-r--r--src/arm/PL1/kernel/kernel_stage1.ld27
-rw-r--r--src/arm/PL1/kernel/kernel_stage2.ld80
-rw-r--r--src/arm/PL1/kernel/memory.h72
-rw-r--r--src/arm/PL1/kernel/paging.c249
-rw-r--r--src/arm/PL1/kernel/paging.h14
-rw-r--r--src/arm/PL1/kernel/psr.h88
-rw-r--r--src/arm/PL1/kernel/ramfs.c65
-rw-r--r--src/arm/PL1/kernel/ramfs.h16
-rw-r--r--src/arm/PL1/kernel/scheduler.c156
-rw-r--r--src/arm/PL1/kernel/scheduler.h32
-rw-r--r--src/arm/PL1/kernel/setup.c116
-rw-r--r--src/arm/PL1/kernel/translation_table_descriptors.h180
-rw-r--r--src/arm/PL1/loader/loader_stage1.S55
-rw-r--r--src/arm/PL1/loader/loader_stage1_linker.ld16
-rw-r--r--src/arm/PL1/loader/loader_stage2.c33
-rw-r--r--src/arm/PL1/loader/loader_stage2_linker.ld16
30 files changed, 2435 insertions, 0 deletions
diff --git a/src/arm/PL1/PL1_common/global.h b/src/arm/PL1/PL1_common/global.h
new file mode 100644
index 0000000..4e17b44
--- /dev/null
+++ b/src/arm/PL1/PL1_common/global.h
@@ -0,0 +1,38 @@
+#ifndef GLOBAL_H
+#define GLOBAL_H
+
+#include <stdint.h>
+
+// board type, raspi2
+#define RASPI 2
+
+// conditionally #define PERIF_BASE
+#if RASPI == 4
+
+#define PERIF_BASE 0xFE000000
+
+#elif RASPI == 3 || RASPI == 2
+
+#define PERIF_BASE 0x3F000000
+
+#else // if RASPI == 1
+
+#define PERIF_BASE 0x20000000
+
+#endif
+
+// GPIO_BASE is #define'd in terms of PERIF_BASE
+// (as in sane kernels - like linux, not like in wiki.osdev codes...)
+#define GPIO_BASE (PERIF_BASE + 0x200000)
+
+inline static uint32_t rd32(uint32_t addr)
+{
+ return *(uint32_t volatile*) addr;
+}
+
+inline static void wr32(uint32_t addr, uint32_t value)
+{
+ *(uint32_t volatile*) addr = value;
+}
+
+#endif // GLOBAL_H
diff --git a/src/arm/PL1/PL1_common/uart.c b/src/arm/PL1/PL1_common/uart.c
new file mode 100644
index 0000000..4dd1c2b
--- /dev/null
+++ b/src/arm/PL1/PL1_common/uart.c
@@ -0,0 +1,103 @@
+#include <stddef.h>
+#include <stdint.h>
+#include "uart.h"
+#include "global.h"
+
+// Loop <delay> times in a way that the compiler won't optimize away
+static inline void delay(int32_t count)
+{
+ asm volatile("__delay_%=: subs %[count], %[count], #1; bne __delay_%=\n"
+ : "=r"(count): [count]"0"(count) : "cc");
+}
+
+void uart_init()
+{
+ // Disable PL011_UART.
+ wr32(PL011_UART_CR, 0);
+
+ // Setup the GPIO pin 14 && 15.
+
+ // Disable pull up/down for all GPIO pins & delay for 150 cycles.
+ wr32(GPPUD, 0);
+ delay(150);
+
+ // Disable pull up/down for pin 14,15 & delay for 150 cycles.
+ wr32(GPPUDCLK0, (1 << 14) | (1 << 15));
+ delay(150);
+
+ // Write 0 to GPPUDCLK0 to make it take effect.
+ wr32(GPPUDCLK0, 0);
+
+ // Set integer & fractional part of baud rate.
+ // Divider = UART_CLOCK/(16 * Baud)
+ // Fraction part register = (Fractional part * 64) + 0.5
+ // UART_CLOCK = 3000000; Baud = 115200.
+
+ // Divider = 3000000 / (16 * 115200) = 1.627 = ~1.
+ wr32(PL011_UART_IBRD, 1);
+ // Fractional part register = (.627 * 64) + 0.5 = 40.6 = ~40.
+ wr32(PL011_UART_FBRD, 40);
+
+ // Set 8 bit data transmission (1 stop bit, no parity)
+ // and disable FIFO to be able to receive interrupt every received
+ // char, not every 2 chars
+ wr32(PL011_UART_LCRH, (1 << 5) | (1 << 6));
+
+ // set interrupt to come when transmit FIFO becomes ≤ 1/8 full
+ // or receive FIFO becomes ≥ 1/8 full
+ // (not really matters, since we disabled FIFOs)
+ wr32(PL011_UART_IFLS, 0);
+
+ // Enable PL011_UART, receive & transfer part of UART.2
+ wr32(PL011_UART_CR, (1 << 0) | (1 << 8) | (1 << 9));
+
+ // At first, it's probably safer to disable interrupts :)
+ uart_irq_disable();
+
+ // The above disables the entire uart irq;
+ // Also disable single sources within it
+ wr32(PL011_UART_IMSC, 0);
+}
+
+inline static _Bool can_transmit(void)
+{
+ return !(rd32(PL011_UART_FR) & (1 << 5));
+}
+
+inline static _Bool can_receive(void)
+{
+ return !(rd32(PL011_UART_FR) & (1 << 4));
+}
+
+void putchar(char c)
+{
+ while (!can_transmit());
+
+ wr32(PL011_UART_DR, c);
+}
+
+char getchar(void)
+{
+ while (!can_receive());
+
+ return rd32(PL011_UART_DR);
+}
+
+_Bool putchar_non_blocking(char c)
+{
+ if (can_transmit())
+ {
+ wr32(PL011_UART_DR, c);
+ return 0;
+ }
+
+ return 1;
+}
+
+int getchar_non_blocking(void)
+{
+ if (can_receive())
+ return rd32(PL011_UART_DR);
+
+ return -1;
+}
diff --git a/src/arm/PL1/PL1_common/uart.h b/src/arm/PL1/PL1_common/uart.h
new file mode 100644
index 0000000..96f3634
--- /dev/null
+++ b/src/arm/PL1/PL1_common/uart.h
@@ -0,0 +1,106 @@
+#ifndef UART_H
+#define UART_H
+
+#include <stdint.h>
+#include "global.h"
+#include "interrupts.h"
+
+// The offsets for reach register.
+
+// Controls actuation of pull up/down to ALL GPIO pins.
+#define GPPUD (GPIO_BASE + 0x94)
+
+// Controls actuation of pull up/down for specific GPIO pin.
+#define GPPUDCLK0 (GPIO_BASE + 0x98)
+
+// The base address for UART.
+#define PL011_UART_BASE (GPIO_BASE + 0x1000)
+
+// The offsets for reach register for the UART.
+#define PL011_UART_DR (PL011_UART_BASE + 0x00)
+#define PL011_UART_RSRECR (PL011_UART_BASE + 0x04)
+#define PL011_UART_FR (PL011_UART_BASE + 0x18)
+#define PL011_UART_ILPR (PL011_UART_BASE + 0x20)
+#define PL011_UART_IBRD (PL011_UART_BASE + 0x24)
+#define PL011_UART_FBRD (PL011_UART_BASE + 0x28)
+#define PL011_UART_LCRH (PL011_UART_BASE + 0x2C)
+#define PL011_UART_CR (PL011_UART_BASE + 0x30)
+#define PL011_UART_IFLS (PL011_UART_BASE + 0x34)
+#define PL011_UART_IMSC (PL011_UART_BASE + 0x38)
+#define PL011_UART_RIS (PL011_UART_BASE + 0x3C)
+#define PL011_UART_MIS (PL011_UART_BASE + 0x40)
+#define PL011_UART_ICR (PL011_UART_BASE + 0x44)
+#define PL011_UART_DMACR (PL011_UART_BASE + 0x48)
+#define PL011_UART_ITCR (PL011_UART_BASE + 0x80)
+#define PL011_UART_ITIP (PL011_UART_BASE + 0x84)
+#define PL011_UART_ITOP (PL011_UART_BASE + 0x88)
+#define PL011_UART_TDR (PL011_UART_BASE + 0x8C)
+
+void uart_init();
+void putchar(char c);
+char getchar(void);
+_Bool putchar_non_blocking(char c);
+int getchar_non_blocking(void);
+
+// TODO experiment to see if this gives us raw uart irq or the uart
+// irq bit or'd with it's enable bit (not crucial for now, sice in our
+// code this function only gets called when this irq is enabled)
+static inline _Bool uart_irq_pending(void)
+{
+ return
+ ((uint32_t) 1 << 25) & rd32(ARM_IRQ_PENDING_2);
+}
+
+static inline void uart_irq_disable(void)
+{
+ // Mask uart in arm peripheral interrupts
+ wr32(ARM_DISABLE_IRQS_2, ((uint32_t) 1) << 25);
+}
+
+static inline void uart_irq_enable(void)
+{
+ // Unmask uart in arm peripheral interrupts
+ wr32(ARM_ENABLE_IRQS_2, ((uint32_t) 1) << 25);
+}
+
+static inline _Bool uart_recv_irq_pending(void)
+{
+ return (1 << 4) & rd32(PL011_UART_MIS);
+}
+
+static inline void uart_recv_irq_disable(void)
+{
+ wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) & ~(1 << 4));
+}
+
+static inline void uart_recv_irq_enable(void)
+{
+ wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) | (1 << 4));
+}
+
+static inline void uart_clear_recv_irq(void)
+{
+ wr32(PL011_UART_ICR, (1 << 4));
+}
+
+static inline _Bool uart_send_irq_pending(void)
+{
+ return (1 << 5) & rd32(PL011_UART_MIS);
+}
+
+static inline void uart_send_irq_disable(void)
+{
+ wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) & ~(1 << 5));
+}
+
+static inline void uart_send_irq_enable(void)
+{
+ wr32(PL011_UART_IMSC, rd32(PL011_UART_IMSC) | (1 << 5));
+}
+
+static inline void uart_clear_send_irq(void)
+{
+ wr32(PL011_UART_ICR, (1 << 5));
+}
+
+#endif // UART_H
diff --git a/src/arm/PL1/kernel/armclock.h b/src/arm/PL1/kernel/armclock.h
new file mode 100644
index 0000000..0792ad3
--- /dev/null
+++ b/src/arm/PL1/kernel/armclock.h
@@ -0,0 +1,76 @@
+#ifndef ARMCLOCK_H
+#define ARMCLOCK_H
+
+#include <stdint.h>
+
+#include "global.h"
+#include "interrupts.h"
+#define ARMCLK_LOAD (ARM_BASE + 0x400)
+#define ARMCLK_VALUE (ARM_BASE + 0x404)
+#define ARMCLK_CONTROL (ARM_BASE + 0x408)
+#define ARMCLK_IRQ_CLR_ACK (ARM_BASE + 0x40C)
+#define ARMCLK_RAW_IRQ (ARM_BASE + 0x410)
+#define ARMCLK_MASKED_IRQ (ARM_BASE + 0x414)
+#define ARMCLK_RELOAD (ARM_BASE + 0x418)
+#define ARMCLK_PRE_DRIVER (ARM_BASE + 0x41C)
+#define ARMCLK_FREE_RUNNING_COUNTER (ARM_BASE + 0x420)
+
+typedef union armclk_control
+{
+ uint32_t raw;
+ struct
+ {
+ uint32_t one_shot_mode : 1; // bit 0; unused in RPi
+ uint32_t counter_23bit : 1; // bit 1
+ uint32_t pre_scale : 2; // bits 3:2
+ uint32_t bit_4 : 1; // bit 4
+ uint32_t interrupt_enable : 1; // bit 5
+ uint32_t periodic_mode : 1; // bit 6; unused in RPi
+ uint32_t timer_enable : 1; // bit 7
+ uint32_t halt_in_debug : 1; // bit 8
+ uint32_t free_running_enable : 1; // bit 9
+ uint32_t bits_15_10 : 6; // bits 15:10
+ uint32_t free_running_pre_scaler : 8; // bits 23:16
+ uint32_t bits_31_24 : 8; // bits 31:24
+ } fields;
+} armclk_control_t;
+
+static inline void armclk_init(void)
+{
+ armclk_control_t ctrl = (armclk_control_t) (uint32_t) 0;
+ ctrl.fields.timer_enable = 1;
+ ctrl.fields.interrupt_enable = 1;
+ ctrl.fields.counter_23bit = 1;
+ wr32(ARMCLK_CONTROL, ctrl.raw);
+}
+
+static inline void armclk_enable_timer_irq(void)
+{
+ armclk_control_t ctrl = (armclk_control_t) rd32(ARMCLK_CONTROL);
+ ctrl.fields.interrupt_enable = 1;
+ wr32(ARMCLK_CONTROL, ctrl.raw);
+
+ wr32(ARM_ENABLE_BASIC_IRQS, 1);
+}
+
+static inline void armclk_disable_timer_irq(void)
+{
+ armclk_control_t ctrl = (armclk_control_t) rd32(ARMCLK_CONTROL);
+ ctrl.fields.interrupt_enable = 0;
+ wr32(ARMCLK_CONTROL, ctrl.raw);
+
+ wr32(ARM_DISABLE_BASIC_IRQS, 1);
+}
+
+static inline void armclk_irq_settimeout(uint32_t timeout)
+{
+ wr32(ARMCLK_IRQ_CLR_ACK, 0);
+ wr32(ARMCLK_LOAD, timeout);
+}
+
+static inline _Bool armclk_irq_pending(void)
+{
+ return rd32(ARM_IRQ_BASIC_PENDING) & 1;
+}
+
+#endif // ARMCLOCK_H
diff --git a/src/arm/PL1/kernel/atags.c b/src/arm/PL1/kernel/atags.c
new file mode 100644
index 0000000..e2e6a24
--- /dev/null
+++ b/src/arm/PL1/kernel/atags.c
@@ -0,0 +1,103 @@
+#include "atags.h"
+#include "io.h"
+
+static inline struct atag_header *next_tag(struct atag_header *tag)
+{
+ return (struct atag_header*) (((uint32_t*) tag) + tag->size);
+}
+
+#define TAG_CONTENTS_FUN(tagname) \
+ static inline struct atag_##tagname *tagname##_tag_contents \
+ (struct atag_header *tag) \
+ { \
+ return (struct atag_##tagname*) (tag + 1); \
+ }
+
+TAG_CONTENTS_FUN(header)
+TAG_CONTENTS_FUN(core)
+TAG_CONTENTS_FUN(mem)
+TAG_CONTENTS_FUN(videotext)
+TAG_CONTENTS_FUN(ramdisk)
+TAG_CONTENTS_FUN(initrd2)
+TAG_CONTENTS_FUN(serialnr)
+TAG_CONTENTS_FUN(revision)
+TAG_CONTENTS_FUN(videolfb)
+TAG_CONTENTS_FUN(cmdline)
+
+uint32_t find_memory_size(struct atag_header *atags)
+{
+ // we silently assume there will only be one mem atag
+ while (atags->tag != ATAG_MEM && atags->tag != ATAG_NONE)
+ atags = next_tag(atags);
+
+ if (atags->tag == ATAG_NONE)
+ return 0;
+
+ struct atag_mem *mem_tag = mem_tag_contents(atags);
+
+ // our design assumes address 0x0 is available, so we reject mem
+ // atag saying otherwise
+ if (mem_tag->start != 0)
+ {
+ puts("ignoring information about memory, "
+ "that doesn't start at 0x0");
+ return 0;
+ }
+
+ return mem_tag->size;
+}
+
+void print_tag(struct atag_header *tag)
+{
+#define TAG_CASE(tagname_upcase, tagname_locase, instructions) \
+ case ATAG_##tagname_upcase: \
+ puts("ATAG_" #tagname_upcase ":"); \
+ { \
+ struct atag_##tagname_locase *contents = \
+ tagname_locase##_tag_contents(tag); \
+ instructions; \
+ } \
+ break
+
+ switch (tag->tag)
+ {
+ TAG_CASE(CORE, core,
+ prints(" flags: 0x");
+ printhex(contents->flags); puts("");
+ prints(" page size: ");
+ printdec(contents->pagesize); puts("");
+ prints(" root device: ");
+ printdec(contents->rootdev); puts(""););
+ TAG_CASE(MEM, mem,
+ prints(" memory size: 0x");
+ printhex(contents->size); puts("");
+ prints(" memory start: 0x");
+ printhex(contents->start); puts(""););
+ // the rest are unimportant for now,
+ // as they're not passed by qemu
+ TAG_CASE(VIDEOTEXT, videotext, (void) contents;);
+ TAG_CASE(RAMDISK, ramdisk, (void) contents;);
+ TAG_CASE(INITRD2, initrd2, (void) contents;);
+ TAG_CASE(SERIAL, serialnr, (void) contents;);
+ TAG_CASE(REVISION, revision, (void) contents;);
+ TAG_CASE(VIDEOLFB, videolfb, (void) contents;);
+ TAG_CASE(CMDLINE, cmdline, (void) contents;);
+
+ case ATAG_NONE:
+ puts("ATAG_NONE");
+ break;
+ default:
+ prints("!! unknown tag: 0x"); printhex(tag->tag); puts(" !!");
+ }
+}
+
+void print_atags(struct atag_header *atags)
+{
+ while (atags->tag != ATAG_NONE)
+ {
+ print_tag(atags);
+ atags = next_tag(atags);
+ }
+
+ print_tag(atags); // also print ATAG_NONE
+}
diff --git a/src/arm/PL1/kernel/atags.h b/src/arm/PL1/kernel/atags.h
new file mode 100644
index 0000000..4b6879f
--- /dev/null
+++ b/src/arm/PL1/kernel/atags.h
@@ -0,0 +1,102 @@
+#ifndef ATAGS_H
+#define ATAGS_H
+
+#include <stdint.h>
+
+#define ATAG_NONE 0x00000000
+#define ATAG_CORE 0x54410001
+#define ATAG_MEM 0x54410002
+#define ATAG_VIDEOTEXT 0x54410003
+#define ATAG_RAMDISK 0x54410004
+#define ATAG_INITRD2 0x54420005
+#define ATAG_SERIAL 0x54410006
+#define ATAG_REVISION 0x54410007
+#define ATAG_VIDEOLFB 0x54410008
+#define ATAG_CMDLINE 0x54410009
+
+struct atag_header
+{
+ uint32_t size;
+ uint32_t tag;
+};
+
+struct atag_core
+{
+ uint32_t flags;
+ uint32_t pagesize;
+ uint32_t rootdev;
+};
+
+struct atag_mem
+{
+ uint32_t size;
+ uint32_t start;
+};
+
+struct atag_videotext
+{
+ uint8_t x;
+ uint8_t y;
+ uint16_t video_page;
+ uint8_t video_mode;
+ uint8_t video_cols;
+ uint16_t video_ega_bx;
+ uint8_t video_lines;
+ uint8_t video_isvga;
+ uint16_t video_points;
+};
+
+struct atag_ramdisk
+{
+ uint32_t flags;
+ uint32_t size;
+ uint32_t start;
+};
+
+struct atag_initrd2
+{
+ uint32_t start;
+ uint32_t size;
+};
+
+struct atag_serialnr
+{
+ uint32_t low;
+ uint32_t high;
+};
+
+struct atag_revision
+{
+ uint32_t rev;
+};
+
+struct atag_videolfb
+{
+ uint16_t lfb_width;
+ uint16_t lfb_height;
+ uint16_t lfb_depth;
+ uint16_t lfb_linelength;
+ uint32_t lfb_base;
+ uint32_t lfb_size;
+ uint8_t red_size;
+ uint8_t red_pos;
+ uint8_t green_size;
+ uint8_t green_pos;
+ uint8_t blue_size;
+ uint8_t blue_pos;
+ uint8_t rsvd_size;
+ uint8_t rsvd_pos;
+};
+
+struct atag_cmdline
+{
+ char cmdline[1];
+};
+
+uint32_t find_memory_size(struct atag_header *atags);
+
+void print_tag(struct atag_header *tag);
+
+void print_atags(struct atag_header *atags);
+
+#endif // ATAGS_H
diff --git a/src/arm/PL1/kernel/bcmclock.h b/src/arm/PL1/kernel/bcmclock.h
new file mode 100644
index 0000000..462346d
--- /dev/null
+++ b/src/arm/PL1/kernel/bcmclock.h
@@ -0,0 +1,35 @@
+#ifndef BCMCLOCK_H
+#define BCMCLOCK_H
+
+#include <stdint.h>
+#include "interrupts.h"
+#include "global.h"
+
+#define ST_BASE (PERIF_BASE + 0x3000) // System Timer
+
+#define ST_CS (ST_BASE + 0x0) // System Timer Control/Status
+#define ST_CLO (ST_BASE + 0x4) // System Timer Counter Lower 32 bits
+#define ST_CHI (ST_BASE + 0x8) // System Timer Counter Higher 32 bits
+#define ST_C0 (ST_BASE + 0xC) // System Timer Compare 0
+#define ST_C1 (ST_BASE + 0x10) // System Timer Compare 1
+#define ST_C2 (ST_BASE + 0x14) // System Timer Compare 2
+#define ST_C3 (ST_BASE + 0x18) // System Timer Compare 3
+
+static inline void bcmclk_enable_timer_irq(void)
+{
+ wr32(ARM_ENABLE_IRQS_1, 1 << 3);
+}
+
+static inline void bcmclk_disable_timer_irq(void)
+{
+ wr32(ARM_DISABLE_IRQS_1, 1 << 3);
+}
+
+static inline void bcmclk_irq_settimeout(uint32_t timeout)
+{
+ uint32_t clock_now = rd32(ST_CLO);
+ wr32(ST_C3, clock_now + timeout);
+ wr32(ST_CS, 1 << 3);
+}
+
+#endif // BCMCLOCK_H
diff --git a/src/arm/PL1/kernel/cp_regs.h b/src/arm/PL1/kernel/cp_regs.h
new file mode 100644
index 0000000..e5e7063
--- /dev/null
+++ b/src/arm/PL1/kernel/cp_regs.h
@@ -0,0 +1,114 @@
+#ifndef CP_REGS_H
+#define CP_REGS_H
+
+#include <stdint.h>
+
+// SCTLR - System Control Register
+
+// Wandering why I didn't typedef this struct with fields?
+// That's because
+typedef union
+{
+ uint32_t raw;
+ struct
+ {
+ uint32_t M : 1; // bit 0
+ uint32_t A : 1; // bit 1
+ uint32_t C : 1; // bit 2
+ uint32_t Bits_4_3 : 2; // bits 3:4
+ uint32_t CP15BEN : 1; // bit 5
+ uint32_t Bit_6 : 1; // bit 6
+ uint32_t B : 1; // bit 7
+ uint32_t Bits_9_8 : 2; // bits 9:8
+ uint32_t SW : 1; // bit 10
+ uint32_t Z : 1; // bit 11
+ uint32_t I : 1; // bit 12
+ uint32_t V : 1; // bit 13
+ uint32_t RR : 1; // bit 14
+ uint32_t Bit_15 : 1; // bit 15
+ uint32_t Bit_16 : 1; // bit 16
+ uint32_t HA : 1; // bit 17
+ uint32_t Bit_18 : 1; // bit 18
+ uint32_t WXN : 1; // bit 19
+ uint32_t UWXN : 1; // bit 20
+ uint32_t FI : 1; // bit 21
+ uint32_t U : 1; // bit 22
+ uint32_t Bit_23 : 1; // bit 23
+ uint32_t VE : 1; // bit 24
+ uint32_t EE : 1; // bit 25
+ uint32_t Bit_26 : 1; // bit 26
+ uint32_t NMFI : 1; // bit 27
+ uint32_t TRE : 1; // bit 28
+ uint32_t AFE : 1; // bit 29
+ uint32_t TE : 1; // bit 30
+ uint32_t Bit_31 : 1; // bit 31
+ } fields;
+} SCTLR_t;
+
+// DACR - Domain Access Control Register
+// DACR holds 16 pairs of bits; each pair represents access
+// permissions to a respective memory domain. There's no point
+// declaring a union for this.
+typedef uint32_t DACR_t;
+
+inline static uint8_t domain_permissions(DACR_t DACR_contents,
+ int domain)
+{
+ return (DACR_contents << (30 - 2 * domain)) >> 30;
+}
+
+inline static DACR_t set_domain_permissions(DACR_t DACR_contents,
+ int domain,
+ uint8_t permissions)
+{
+ uint32_t clear_domain_permissions_mask = ~(0b11 << (2 * domain));
+ uint32_t new_domain_permissions_mask =
+ ((uint32_t) permissions) << (2 * domain);
+
+ return (DACR_contents & clear_domain_permissions_mask)
+ | new_domain_permissions_mask;
+}
+
+#define DOMAIN_NO_ACCESS 0b00
+#define DOMAIN_CLIENT_ACCESS 0b01
+#define DOMAIN_RESERVED 0b10
+#define DOMAIN_MANAGER_ACCESS 0b11
+
+// TTBR - Translation Table Base Register (there're 2 of them with
+// (almost) the same structure)
+
+// A field in TTBCR determines how long the address field is in TTBR0,
+// but here we'll ignore this and just assume the greatest possible
+// length of this field (18 bits). In TTBR1 it's always 18 bits.
+typedef union
+{
+ uint32_t raw;
+ struct
+ {
+
+ uint32_t C : 1; // bit 0
+ uint32_t S : 1; // bit 1
+ uint32_t IMP : 1; // bit 2
+ uint32_t RGN : 2; // bits 4:3
+ uint32_t NOS : 1; // bit 5
+ uint32_t IRGN_0 : 1; // bit 6
+ uint32_t Bits_13_6 : 7; // bits 13:7
+ uint32_t Bits_31_14 : 18; // bits 31:14
+ // with multiprocessing extensions the cacheable bit becomes
+ // upper IRGN bit
+#define IRGN_1 C
+
+ // i'm not sure 'interprocess region bits' is the right name,
+ // I'm just guessing (by analogy to RGN -> region bits)
+#define TTBR_CACHEABLE_BIT C
+#define TTBR_INTERPROCESS_REGION_BITS_1 IRGN_1
+#define TTBR_SHAREABLE_BIT S
+#define TTBR_IMPLEMENTATION_DEFINED_BIT IMP
+#define TTBR_REGION_BITS_1_0 RGN
+#define TTBR_INTERPROCESS_REGION_BITS_0 IRGN_0
+#define TTBR_NON_OUTER_SHAREABLE_BIT NOS
+#define TTBR_TRANSLATION_TABLE_BASE_ADDRESS Bits_31_14
+ } fields;
+} TTBR_t;
+
+#endif // CP_REGS_H
diff --git a/src/arm/PL1/kernel/demo_functionality.c b/src/arm/PL1/kernel/demo_functionality.c
new file mode 100644
index 0000000..217a858
--- /dev/null
+++ b/src/arm/PL1/kernel/demo_functionality.c
@@ -0,0 +1,121 @@
+#include "io.h"
+#include "psr.h"
+#include "memory.h"
+#include "translation_table_descriptors.h"
+#include "ramfs.h"
+#include "strings.h"
+#include "paging.h"
+#include "armclock.h"
+#include "scheduler.h"
+
+void demo_paging_support(void)
+{
+ uint32_t ID_MMFR0;
+ // get contents of coprocessor register to check for paging support
+ asm("mrc p15, 0, %0, c0, c1, 4" : "=r" (ID_MMFR0));
+
+ char *paging;
+
+ switch(ID_MMFR0 & 0xf) /* lowest 4 bits indicate VMSA support */
+ {
+ case 0 : paging = "no paging"; break;
+ case 1 : paging = "implementation defined paging"; break;
+ case 2 : paging = "VMSAv6, with cache and TLB type registers"; break;
+ case 3 : paging = "VMSAv7, with support for remapping and access flag"; break;
+ case 4 : paging = "VMSAv7 with PXN bit supported"; break;
+ case 5 : paging = "VMSAv7, PXN and long format descriptors. EPAE is supported."; break;
+ default : paging = "?_? unknown paging ?_?";
+ }
+
+ puts(paging);
+}
+
+void demo_current_mode(void)
+{
+
+ // get content of current program status register to check the current
+ // processor mode (should be system, as we set it in boot.S)
+ PSR_t CPSR = read_CPSR();
+
+ char *mode_name;
+
+ switch(CPSR.fields.PSR_MODE_4_0)
+ {
+ case MODE_USER : mode_name = "User (PL0)"; break;
+ case MODE_FIQ : mode_name = "FIQ (PL1)"; break;
+ case MODE_IRQ : mode_name = "IRQ (PL1)"; break;
+ case MODE_SUPERVISOR : mode_name = "Supervisor (PL1)"; break;
+ case MODE_MONITOR : mode_name = "Monitor (PL1)"; break;
+ case MODE_ABORT : mode_name = "Abort (PL1)"; break;
+ case MODE_HYPERVISOR : mode_name = "Hyp (PL2)"; break;
+ case MODE_UNDEFINED : mode_name = "Undefined (PL1)"; break;
+ case MODE_SYSTEM : mode_name = "System (PL1)"; break;
+ default : mode_name = "Unknown mode"; break;
+ }
+
+ prints("current mode: ");
+ puts(mode_name);
+}
+
+#define TRANSLATION_TABLE \
+ ((short_section_descriptor_t volatile*) TRANSLATION_TABLE_BASE)
+
+extern char
+ _binary_ramfs_img_start,
+ _binary_ramfs_img_end,
+ _binary_ramfs_img_size;
+
+void __attribute__((noreturn)) demo_setup_PL0(void)
+{
+ // find PL_0_test.img im ramfs
+ struct ramfile PL_0_test_img;
+
+ if (find_file(&_binary_ramfs_img_start, "PL_0_test.img",
+ &PL_0_test_img))
+ {
+ puts("PL_0_test.img not found :(");
+ asm volatile ("wfi");
+ }
+
+ // dummy value 5 for now, as we haven't implemented processes yet
+ uint16_t physical_section_number = claim_and_map_section
+ ((void*) 5, PL0_SECTION_NUMBER, AP_2_0_MODEL_RW_ALL);
+
+ if (physical_section_number == SECTION_NULL)
+ {
+ puts("Couldn't claim memory section for unprivileged code :(");
+ while(1);
+ }
+
+ size_t physical_section_start =
+ (((size_t) physical_section_number) << 20);
+
+ // check that translation works... by copying a string using one
+ // mapping and reading it using other :D
+ char str_part1[] = "mapped section for PL0 code (0x";
+ char str_part2[] = " -> 0x";
+ char str_part3[] = ")";
+
+ char *string_end = (char*) physical_section_start;
+
+ memcpy(string_end, str_part1, sizeof(str_part1) - 1);
+ uint32_to_hex(VIRTUAL_PL0_MEMORY_START,
+ string_end += sizeof(str_part1) - 1);
+ memcpy(string_end += 8, str_part2, sizeof(str_part2) - 1);
+ uint32_to_hex(physical_section_start,
+ string_end += sizeof(str_part2) - 1);
+ memcpy(string_end += 8, str_part3, sizeof(str_part3));
+
+ puts((char*) VIRTUAL_PL0_MEMORY_START);
+
+ // now paste a userspace program to that section
+ memcpy((void*) VIRTUAL_PL0_MEMORY_START,
+ PL_0_test_img.file_contents, PL_0_test_img.file_size);
+
+ puts("copied PL0 code to it's section");
+
+ puts("All ready! scheduling!");
+
+ schedule_new(VIRTUAL_PL0_MEMORY_START, // the new pc
+ VIRTUAL_PL0_MEMORY_END); // the new sp
+}
diff --git a/src/arm/PL1/kernel/demo_functionality.h b/src/arm/PL1/kernel/demo_functionality.h
new file mode 100644
index 0000000..a338c71
--- /dev/null
+++ b/src/arm/PL1/kernel/demo_functionality.h
@@ -0,0 +1,16 @@
+#ifndef DEMO_FUNCTIONALITY_H
+#define DEMO_FUNCTIONALITY_H
+
+void demo_paging_support(void);
+
+void demo_current_mode(void);
+
+//void demo_setup_libkernel(void);
+
+void demo_setup_PL0(void);
+
+//void demo_go_unprivileged(void);
+
+//void demo_setup_interrupts(void);
+
+#endif // DEMO_FUNCTIONALITY_H
diff --git a/src/arm/PL1/kernel/interrupt_vector.S b/src/arm/PL1/kernel/interrupt_vector.S
new file mode 100644
index 0000000..1ec80f7
--- /dev/null
+++ b/src/arm/PL1/kernel/interrupt_vector.S
@@ -0,0 +1,56 @@
+_interrupt_vectors:
+ b reset_handler_caller
+ b undef_handler_caller
+ b svc_handler_caller
+ b abort_handler_caller
+ b abort_handler_caller
+ b generic_handler_caller
+ b irq_handler_caller
+ b fiq_handler_caller
+
+reset_handler_caller:
+ ldr sp, =_supervisor_stack_top
+ ldr r5, =reset_handler
+ bx r5
+
+undef_handler_caller:
+ ldr sp, =_supervisor_stack_top
+ ldr r5, =undefined_instruction_vector
+ bx r5
+
+svc_handler_caller:
+ ldr sp, =_supervisor_stack_top
+ push {r0-r12, lr}
+ mov r0, sp
+ ldr r5, =supervisor_call_handler
+ blx r5
+ ldm sp!, {r0-r12, pc} ^
+
+abort_handler_caller:
+ ldr sp, =_supervisor_stack_top
+ ldr r5, =abort_handler
+ bx r5
+
+generic_handler_caller:
+ ldr sp, =_supervisor_stack_top
+ ldr r5, =generic_handler
+ bx r5
+
+irq_handler_caller:
+ ldr sp, =_irq_stack_top
+ sub lr, #4
+ push {r0-r12, lr}
+ mov r0, sp
+ ldr r3, =irq_handler
+ blx r3
+ ldm sp!, {r0-r12, pc} ^
+
+fiq_handler_caller:
+ ldr sp, =_fiq_stack_top
+ ldr r5, =fiq_handler
+ bx r5
+
+irq:
+ mov sp, #0x8000
+ ldr r5, =abort_handler
+ subs pc,lr,#4
diff --git a/src/arm/PL1/kernel/interrupts.c b/src/arm/PL1/kernel/interrupts.c
new file mode 100644
index 0000000..121d79c
--- /dev/null
+++ b/src/arm/PL1/kernel/interrupts.c
@@ -0,0 +1,135 @@
+#include "io.h"
+#include "uart.h"
+#include "svc_interface.h"
+#include "armclock.h"
+#include "scheduler.h"
+/**
+ @brief The undefined instruction interrupt handler
+**/
+
+
+void __attribute__((noreturn)) setup(void);
+
+// from what I've heard, reset is never used on the Pi;
+// in our case it should run once - when stage1 of the kernel
+// jumps to stage2
+void reset_handler(void)
+{
+ setup();
+}
+
+void undefined_instruction_vector(void)
+{
+ error("Undefined instruction occured");
+}
+
+uint32_t supervisor_call_handler(uint32_t regs[14])
+{
+ switch(regs[0]) {
+ case UART_PUTCHAR:
+ if (putchar_non_blocking(regs[1]))
+ schedule_wait_for_output(regs, regs[1]);
+ break;
+ case UART_GETCHAR:
+ {
+ int c;
+ if ((c = getchar_non_blocking()) == -1)
+ schedule_wait_for_input(regs);
+
+ regs[0] = c;
+ break;
+ }
+ case UART_WRITE:
+ error("UART_WRITE not implemented!!!!!");
+ break;
+ default:
+ // perhaps we should kill the process now?
+ error("unknown supervisor call type!!!!!");
+ }
+
+ return 0; // a dummy value
+}
+
+void abort_handler(void)
+{
+ // TODO maybe dump registers here?
+ error("re-entered system due to data/prefetch abort");
+}
+
+void generic_handler(void)
+{
+ error("something weird happened");
+}
+
+void irq_handler(uint32_t regs[14])
+{
+ if (armclk_irq_pending())
+ {
+ write_SPSR(PL1_PSR);
+ asm volatile("mov r0, %[context]\n\r"
+ "mov lr, %[return_func]\n\r"
+ "subs pc, lr, #0" ::
+ [context]"r" (regs),
+ [return_func]"r" (schedule_save_context) :
+ "memory");
+ }
+ else if (uart_irq_pending())
+ {
+ if (uart_recv_irq_pending())
+ {
+ uart_clear_recv_irq();
+ scheduler_try_input();
+ }
+ if (uart_send_irq_pending())
+ {
+ uart_clear_send_irq();
+ scheduler_try_output();
+ }
+
+ if (read_SPSR().fields.PSR_MODE_4_0 != MODE_USER)
+ {
+ write_SPSR(PL1_PSR);
+ asm volatile("mov lr, %0\n\r"
+ "subs pc, lr, #0" ::
+ "r" (schedule) : "memory");
+ }
+ }
+ else
+ error("unknown irq");
+
+ // important - don't allow this handler to return if irq came from
+ // PL1 (likely supervisor, because we don't really use system) mode
+}
+
+void fiq_handler(void)
+{
+ error("fiq happened");
+}
+
+
+/* Here is your interrupt function */
+//void
+//__attribute__((interrupt("IRQ")))
+//__attribute__((section(".interrupt_vectors.text")))
+//irq_handler2(void) {
+// /* You code goes here */
+//// uart_puts("GOT INTERRUPT!\r\n");
+//
+// local_timer_clr_reload_reg_t temp = { .IntClear = 1, .Reload = 1 };
+// QA7->TimerClearReload = temp; // Clear interrupt & reload
+//}
+
+///* here is your main */
+//int enable_timer(void) {
+//
+// QA7->TimerRouting.Routing = LOCALTIMER_TO_CORE0_IRQ; // Route local timer IRQ to Core0
+// QA7->TimerControlStatus.ReloadValue = 100; // Timer period set
+// QA7->TimerControlStatus.TimerEnable = 1; // Timer enabled
+// QA7->TimerControlStatus.IntEnable = 1; // Timer IRQ enabled
+// QA7->TimerClearReload.IntClear = 1; // Clear interrupt
+// QA7->TimerClearReload.Reload = 1; // Reload now
+// QA7->Core0TimerIntControl.nCNTPNSIRQ_IRQ = 1; // We are in NS EL1 so enable IRQ to core0 that level
+// QA7->Core0TimerIntControl.nCNTPNSIRQ_FIQ = 0; // Make sure FIQ is zero
+//// uart_puts("Enabled Timer\r\n");
+// return(0);
+//} \ No newline at end of file
diff --git a/src/arm/PL1/kernel/interrupts.h b/src/arm/PL1/kernel/interrupts.h
new file mode 100644
index 0000000..c2818ee
--- /dev/null
+++ b/src/arm/PL1/kernel/interrupts.h
@@ -0,0 +1,47 @@
+#ifndef RPI_MMU_EXAMPLE_INTERRUPTS_H
+#define RPI_MMU_EXAMPLE_INTERRUPTS_H
+
+#include <stdint.h>
+
+// ARM control block
+// called "base address for the ARM interrupt register" elsewhere
+#define ARM_BASE (PERIF_BASE + 0xB000)
+#define ARM_IRQ_BASIC_PENDING (ARM_BASE + 0x200)
+#define ARM_IRQ_PENDING_1 (ARM_BASE + 0x204)
+#define ARM_IRQ_PENDING_2 (ARM_BASE + 0x208)
+#define ARM_FIQ_CONTROL (ARM_BASE + 0x20C)
+#define ARM_ENABLE_IRQS_1 (ARM_BASE + 0x210)
+#define ARM_ENABLE_IRQS_2 (ARM_BASE + 0x214)
+#define ARM_ENABLE_BASIC_IRQS (ARM_BASE + 0x218)
+#define ARM_DISABLE_IRQS_1 (ARM_BASE + 0x21C)
+#define ARM_DISABLE_IRQS_2 (ARM_BASE + 0x220)
+#define ARM_DISABLE_BASIC_IRQS (ARM_BASE + 0x224)
+//offset of peripherals+ offset for first addresable register for interupt controller
+#define RPI_INTERRUPT_CONTROLLER_BASE ( 0x3F000000UL + 0xB200 )
+// Bits in the Enable_Basic_IRQs register to enable various interrupts.
+// According to the BCM2835 ARM Peripherals manual, section 7.5 */
+#define RPI_BASIC_ARM_TIMER_IRQ (1 << 0)
+#define RPI_BASIC_ARM_MAILBOX_IRQ (1 << 1)
+#define RPI_BASIC_ARM_DOORBELL_0_IRQ (1 << 2)
+#define RPI_BASIC_ARM_DOORBELL_1_IRQ (1 << 3)
+#define RPI_BASIC_GPU_0_HALTED_IRQ (1 << 4)
+#define RPI_BASIC_GPU_1_HALTED_IRQ (1 << 5)
+#define RPI_BASIC_ACCESS_ERROR_1_IRQ (1 << 6)
+#define RPI_BASIC_ACCESS_ERROR_0_IRQ (1 << 7)
+
+// @brief The interrupt controller memory mapped register set
+typedef struct {
+ volatile uint32_t IRQ_basic_pending;
+ volatile uint32_t IRQ_pending_1;
+ volatile uint32_t IRQ_pending_2;
+ volatile uint32_t FIQ_control;
+ volatile uint32_t Enable_IRQs_1;
+ volatile uint32_t Enable_IRQs_2;
+ volatile uint32_t Enable_Basic_IRQs;
+ volatile uint32_t Disable_IRQs_1;
+ volatile uint32_t Disable_IRQs_2;
+ volatile uint32_t Disable_Basic_IRQs;
+} rpi_irq_controller_t;
+
+extern rpi_irq_controller_t* RPI_GetIrqController(void);
+#endif //RPI_MMU_EXAMPLE_INTERRUPTS_H
diff --git a/src/arm/PL1/kernel/kernel_stage1.S b/src/arm/PL1/kernel/kernel_stage1.S
new file mode 100644
index 0000000..e770513
--- /dev/null
+++ b/src/arm/PL1/kernel/kernel_stage1.S
@@ -0,0 +1,168 @@
+/* arm mode, cortex-a7 compatibility
+ *
+ * _boot is entry point for the kernel.
+ *
+ * Kernel copies it's embedded stage 2 to address 0x0 and jumps to
+ * it (to the reset handler). Registers r0 - r2 are arguments for
+ * the kernel, but we're not using them for now.
+ *
+ * This file is based on (and almost identical with) loader_stage1.S
+ */
+
+.global _boot
+_boot:
+ // Only let the first core execute
+ mrc p15, 0, r3, c0, c0, 5
+ and r3, r3, #3
+ cmp r3, #0
+ beq proceed
+ // this is a kind of blef - races can theoretically still
+ // occur when the main core overwrites this part of memory
+ wfe
+
+ // we'll use the size of stage1 to determine where we have free
+ // space after it. We'll then copy our atags/fdt there, so
+ // it doesn't get overwritten by stage2 we deploy at 0x0
+atags_magic:
+ .word 0x54410001
+
+proceed:
+ // load the second word of structure passed to us through r2;
+ // if it's atags, it's second word should be the magic number
+ // Btw, location of ATAGS is always 0x100.
+ ldr r3, [r2, #4]
+ adr r4, atags_magic
+ ldr r4, [r4]
+
+ // compare second word of assumed atags with magic number
+ // to see, if it's really atags and not sth else (i.e. fdt)
+ cmp r3, r4
+
+ // normally at start r0 contains value 0;
+ // value 3 in r0 would tell stage2 code, we found no atags :(
+ movne r0, #3
+ bne stage2_blob_copying
+
+ // if atags was found, copying of it takes place here
+
+ // the following loop finds, where atags ends
+ // r3 shall point to currently looked-at tag
+ mov r3, r2
+
+find_end_of_atags_loop:
+ // load first word of tag header to r4 (it contains tag size)
+ ldr r4, [r3]
+ // make r3 point at the next tag (by adding 4*tag_size to it)
+ add r3, r4, lsl #2
+
+ // load second word of tag header to r5 (it contains tag type)
+ ldr r5, [r3, #4]
+
+ // if tag value is 0, it is the last tag
+ cmp r5, #0
+ bne find_end_of_atags_loop
+
+ add r3, #8 // make r3 point at the end of last tag
+ sub r3, r2 // get atags size in r3
+
+ // at this pont r2 and r3 point at start and size of atags,
+ // respectively; now we'll compute, where we're going to have
+ // free space to put atags in; we want to put atags either
+ // right after our blob or, if if it doesn't fit between
+ // blob end and the address stage1 is loaded at, after stage1
+
+ // get blob size to r5
+ adr r5, blob_size
+ ldr r5, [r5]
+
+ // we could only copy atags to a 4-aligned address
+ mov r6, #4
+ bl aling_r5_to_r6
+
+ // compute where atags copied right after blob would end
+ add r6, r5, r3
+ // we can only overwrite stuff before the copying loop
+ adr r7, copy_atags_loop
+ cmp r6, r7
+ ble copy_atags
+
+ // atags wouldn't fit - use memory after stage1 as destination
+ adr r5, _boot
+ adr r6, stage1_size
+ ldr r6, [r6]
+ add r5, r6
+ mov r6, #4
+ bl aling_r5_to_r6
+
+copy_atags:
+ // now copy atags (r2 - atags start; r3 - atags size;
+ // r5 - destination; r4 - iterator; r6 - buffor)
+ mov r4, #0
+
+copy_atags_loop:
+ ldr r6, [r2, r4]
+ str r6, [r5, r4]
+ add r4, #4
+ cmp r4, r3
+ blo copy_atags_loop
+
+ mov r2, r5 // place the new atags address in r2
+ b stage2_blob_copying // atags stuff done; proceed
+
+// mini-function, that does what the label says; clobbers r7
+aling_r5_to_r6:
+ sub r5, #1
+ sub r7, r6, #1
+ bic r5, r7
+ add r5, r6
+ mov pc, lr
+
+
+stage2_blob_copying: // copy stage2 of the kernel to address 0x0
+
+ // first, load address of stage2_start to r3 (a PIC way)
+ adr r3, stage2_start
+
+ // load destination address for stage2 code to r4
+ mov r4, #0
+
+ // load blob size to r5
+ // The size might get too big for an immediate value, so
+ // we load it from memory.
+ adr r5, blob_size
+ ldr r5, [r5]
+
+ // r6 is the counter - counts the bytes copied
+ mov r6, #0
+
+ // This initial piece of code might get overwritten when we
+ // copy stage2, so the actual copying loop shall be after
+ // stage2 blob. We want this asm code to be PIC, so we're
+ // computing address of stage2_end into r7.
+ add r7, r3, r5
+ bx r7
+
+blob_size:
+ .word stage2_end - stage2_start
+stage1_size:
+ .word stage1_end - _boot
+
+.align 4
+stage2_start:
+ .incbin "kernel_stage2.img"
+stage2_end:
+
+ // each word of the blob is loaded to r7 and stored
+ // from r7 to it's destination in a loop
+loop:
+ ldr r7, [r3, r6]
+ str r7, [r4, r6]
+ add r6, r6, #4
+ cmp r6, r5
+ blo loop
+
+ // Call stage2 of the kernel (branch to 0x0,
+ // which is the reset handler).
+ bx r4
+
+stage1_end:
diff --git a/src/arm/PL1/kernel/kernel_stage1.ld b/src/arm/PL1/kernel/kernel_stage1.ld
new file mode 100644
index 0000000..3130634
--- /dev/null
+++ b/src/arm/PL1/kernel/kernel_stage1.ld
@@ -0,0 +1,27 @@
+ENTRY(_boot) /* defined in boot.S; qemu needs it to run elf file */
+
+/* Code starts at 0x8000 - that's where RPis in 32-bit mode load
+ * kernel at. My experiments do, however, show, that qemu emulating
+ * RPi2 loads the kernel at 0x10000! (took some pain to find out).
+ * rpi-open-firmware, on the other hand, loads kernel at 0x2000000!
+ * This is not really a problem, since:
+ * 1. We can use our bootloader to load the kernel at 0x8000
+ * 2. We've rewritten stage 1 of both bootloader and kernel in
+ * careful assembly, so that they should work regardless of
+ * where they are loaded.
+ * 3. In qemu, we can load kernel.elf instead of raw binary
+ * (qemu will do the right thing then)
+ */
+
+SECTIONS
+{
+
+ . = 0x8000;
+
+ __start = .;
+ .kernel_stage1 :
+ {
+ KEEP(kernel_stage1.o)
+ }
+ __end = .;
+}
diff --git a/src/arm/PL1/kernel/kernel_stage2.ld b/src/arm/PL1/kernel/kernel_stage2.ld
new file mode 100644
index 0000000..9411ca2
--- /dev/null
+++ b/src/arm/PL1/kernel/kernel_stage2.ld
@@ -0,0 +1,80 @@
+/* This sesond stage of the kernel is run from address 0x0 */
+
+TRANSLATION_TABLE_SIZE = 4096 * 4;
+SECTIONS_LIST_SIZE = 4096 * 8;
+MMU_SECTION_SIZE = 1 << 20;
+
+SECTIONS
+{
+
+ . = 0x0;
+
+ __start = .;
+ .interrupt_vector :
+ {
+ KEEP(interrupt_vector.o)
+ }
+ . = ALIGN(4);
+ .embedded_ramfs :
+ {
+ ramfs_embeddable.o
+ }
+ .rest_of_kernel :
+ {
+ *(.text)
+ *(.data)
+ *(.rodata)
+ *(.bss)
+ *(/COMMON/)
+ *(*)
+ }
+ __end = .;
+
+ . = ALIGN(1 << 14);
+
+ .translation_table (NOLOAD) :
+ {
+ _translation_table_start = .;
+
+ . = . + TRANSLATION_TABLE_SIZE;
+
+ _translation_table_end = .;
+ }
+
+ .sections_list (NOLOAD) :
+ {
+ _sections_list_start = .;
+
+ . = . + SECTIONS_LIST_SIZE;
+
+ _sections_list_end = .;
+ }
+
+ . = ALIGN(1 << 20);
+ . = . + MMU_SECTION_SIZE;
+
+ .stack (NOLOAD) :
+ {
+ _stack_start = .;
+
+ _fiq_stack_start = .;
+
+ . = . + (1 << 18);
+
+ _fiq_stack_top = .;
+
+ _irq_stack_start = .;
+
+ . = . + (1 << 18);
+
+ _irq_stack_top = .;
+
+ _supervisor_stack_start = .;
+
+ . = . + (1 << 19);
+
+ _supervisor_stack_top = .;
+
+ _stack_end = .;
+ }
+}
diff --git a/src/arm/PL1/kernel/memory.h b/src/arm/PL1/kernel/memory.h
new file mode 100644
index 0000000..bdeba52
--- /dev/null
+++ b/src/arm/PL1/kernel/memory.h
@@ -0,0 +1,72 @@
+#ifndef MEMORY_H
+#define MEMORY_H
+
+#include <stddef.h>
+
+// These macros were heavily used b4 I moved all the address
+// computation to the linker script. Now I'm just keeping them
+// in case they're needed for something else :)
+#define POWER_OF_2(EXP) (((size_t) 1) << EXP)
+
+#define ALIGN_POWER_OF_2(ADDR, EXP) \
+ (((ADDR - 1) & ~(POWER_OF_2(EXP) - 1)) + POWER_OF_2(EXP))
+
+#define SECTION_SIZE POWER_OF_2(20)
+
+#define ALIGN_SECTION(ADDR) ALIGN_POWER_OF_2(ADDR, 20)
+
+
+// memory layout
+
+#define INTERRUPT_VECTOR_TABLE_START ((uint32_t) 0x0)
+
+// all those symbols are defined in the linker script
+extern char __end;
+extern char __start;
+extern char _translation_table_start;
+extern char _translation_table_end;
+extern char _sections_list_start;
+extern char _sections_list_end;
+extern char _stack_start;
+extern char _fiq_stack_start;
+extern char _fiq_stack_top;
+extern char _irq_stack_start;
+extern char _irq_stack_top;
+extern char _supervisor_stack_start;
+extern char _supervisor_stack_top;
+extern char _stack_end;
+
+#define KERNEL_START ((size_t) &__start) // this is 0x0
+#define KERNEL_END ((size_t) &__end)
+
+// first 2^14 aligned address after the kernel
+#define TRANSLATION_TABLE_BASE ((size_t) &_translation_table_start)
+#define TRANSLATION_TABLE_END ((size_t) &_translation_table_end)
+
+// another 32KB after the translation table are used for sections list
+#define SECTIONS_LIST_START ((size_t) &_sections_list_start)
+#define SECTIONS_LIST_END ((size_t) &_sections_list_end)
+
+// first section after the translation table is left unused;
+// the next section is used as the stack
+#define STACK_START ((size_t) &_stack_start)
+#define FIQ_STACK_START ((size_t) &_fiq_stack_start)
+#define FIQ_STACK_END ((size_t) &_fiq_stack_top)
+#define IRQ_STACK_START ((size_t) &_irq_stack_start)
+#define IRQ_STACK_END ((size_t) &_irq_stack_top)
+#define SUPERVISOR_STACK_START ((size_t) &_supervisor_stack_start)
+#define SUPERVISOR_STACK_END ((size_t) &_supervisor_stack_top)
+#define STACK_END ((size_t) &_stack_end)
+
+#define PRIVILEGED_MEMORY_END STACK_END
+
+
+// the following describes the virtual section for our PL0 programs
+#define PL0_SECTION_NUMBER ((size_t) 0xaaa)
+
+#define VIRTUAL_PL0_MEMORY_START (PL0_SECTION_NUMBER << 20)
+#define VIRTUAL_PL0_MEMORY_END \
+ (VIRTUAL_PL0_MEMORY_START + SECTION_SIZE)
+
+#endif // MEMORY_H
+
diff --git a/src/arm/PL1/kernel/paging.c b/src/arm/PL1/kernel/paging.c
new file mode 100644
index 0000000..771c681
--- /dev/null
+++ b/src/arm/PL1/kernel/paging.c
@@ -0,0 +1,249 @@
+#include "cp_regs.h"
+#include "strings.h"
+#include "memory.h"
+#include "translation_table_descriptors.h"
+#include "io.h"
+
+#include "paging.h"
+
+void setup_flat_map(void)
+{
+ // compute translation table base address
+ // translation table shall start at first 2^14-bytes aligned
+ // address after the kernel image
+
+ prints("chosen lvl1 translation table address: 0x");
+ printhex(TRANSLATION_TABLE_BASE);
+ puts("");
+
+ // flat map all memory
+ puts("preparing translation table");
+ short_descriptor_lvl1_t volatile *translation_table =
+ (short_descriptor_lvl1_t*) TRANSLATION_TABLE_BASE;
+
+ for (uint32_t i = 0; i < 4096; i++)
+ translation_table[i].section_fields =
+ (short_section_descriptor_t) {
+ .SECTION_BASE_ADDRESS_31_20 = i,
+ .SECTION_OR_SUPERSECTION_BIT = DESCRIBES_SECTION,
+ .ACCESS_PERMISSIONS_2 = AP_2_0_MODEL_RW_PL1 >> 2,
+ .ACCESS_PERMISSIONS_1_0 = AP_2_0_MODEL_RW_PL1 & 0b011,
+ .DESCRIPTOR_TYPE_1 =
+ SHORT_DESCRIPTOR_SECTION_OR_SUPERSECTION >> 1,
+ // rest of fields are 0s
+ };
+
+ // meddle with domain settings
+ puts("setting domain0 to client access and blocking other domains");
+
+ DACR_t DACR = 0;
+ DACR = set_domain_permissions(DACR, 0, DOMAIN_CLIENT_ACCESS);
+ for (int i = 1; i < 16; i++)
+ DACR = set_domain_permissions(DACR, i, DOMAIN_NO_ACCESS);
+
+ // the above should do the same as this:
+ // DACR = 1;
+
+ asm("mcr p15, 0, %0, c3, c0, 0" :: "r" (DACR));
+
+ // meddle with SCTLR, which determines how some bits in
+ // table descriptors work and also controls caches
+ // we don't want to use access flag, so we set AFE to 0
+ // we don't want TEX remap, so we set TRE to 0
+ // we also disable data and instruction caches and the MMU
+
+ // some of this is redundant (i.e. MMU should already be disabled)
+ puts("setting C, I, AFE and TRE to 0 in SCTLR");
+
+ SCTLR_t SCTLR;
+ asm("mrc p15, 0, %0, c1, c0, 0" : "=r" (SCTLR.raw));
+
+ SCTLR.fields.M = 0; // disable MMU
+ SCTLR.fields.C = 0; // disable data cache
+ SCTLR.fields.I = 0; // disable instruction cache
+ SCTLR.fields.TRE = 0; // disable TEX remap
+ SCTLR.fields.AFE = 0; // disable access flag usage
+ asm("mcr p15, 0, %0, c1, c0, 0\n\r"
+ "isb" :: "r" (SCTLR.raw) : "memory");
+
+ // TODO: move invalidation instructions to some header as inlines
+
+ puts("invalidating instruction cache, branch prediction,"
+ " and entire main TLB");
+
+ // invalidate instruction cache
+ asm("mcr p15, 0, r0, c7, c5, 0\n\r" // r0 gets ignored
+ "isb" ::: "memory");
+
+ // invalidate branch-prediction
+ asm("mcr p15, 0, r0, c7, c5, 6\n\r" // r0 - same as above
+ "isb" ::: "memory");
+
+ // invalidate main Translation Lookup Buffer
+ asm("mcr p15, 0, %0, c8, c7, 0\n\r"
+ "isb" :: "r" (0) : "memory");
+
+ // now set TTBCR to use TTBR0 exclusively
+ puts("Setting TTBCR.N to 0, so that TTBR0 is used everywhere");
+
+ uint32_t TTBCR = 0;
+ asm("mcr p15, 0, %0, c2, c0, 2" :: "r" (TTBCR));
+
+ // Now do stuff with TTBR0
+ TTBR_t TTBR0;
+ TTBR0.raw = 0;
+ TTBR0.fields.TTBR_TRANSLATION_TABLE_BASE_ADDRESS =
+ TRANSLATION_TABLE_BASE >> 14;
+ // rest of TTBR0 remains 0s
+
+ asm("mcr p15, 0, %0, c2, c0, 0" :: "r" (TTBR0.raw));
+
+ // enable MMU
+ puts("enabling the MMU");
+
+ // redundant - we already have SCTLR contents in the variable
+ // asm("mrc p15, 0, %0, c1, c0, 0" : "=r" (SCTLR.raw));
+
+ SCTLR.fields.M = 1;
+
+ asm("mcr p15, 0, %0, c1, c0, 0\n\r"
+ "isb" :: "r" (SCTLR.raw) : "memory");
+}
+
+#define OWNER_FREE ((void*) 0)
+#define OWNER_KERNEL ((void*) 1)
+#define OWNER_SPLIT ((void*) 2)
+
+// we want to maintain a list of free and used physical sections
+struct section_node
+{
+ // we're going to add processes, process management and
+ // struct process. Then, owner will be struct process*.
+ void *owner; // 0 if free, 1 if used by kernel, 2 if split to pages
+
+ // it's actually a 2-directional lists;
+ // end of list is marked by reference to SECTION_NULL;
+ // we use offsets into sections_list array instead of pointers;
+ uint16_t prev, next;
+};
+
+static struct section_node volatile *sections_list;
+
+static uint16_t
+ all_sections_count, kernel_sections_count,
+ split_sections_count, free_sections_count;
+
+// those are SECTION_NULL when the corresponding count is 0;
+static uint16_t
+ first_free_section, first_kernel_section, first_split_section;
+
+void setup_pager_structures(uint32_t available_mem)
+{
+ all_sections_count = available_mem / SECTION_SIZE;
+ kernel_sections_count = PRIVILEGED_MEMORY_END / SECTION_SIZE;
+ free_sections_count = all_sections_count - kernel_sections_count;
+ split_sections_count = 0;
+
+ sections_list = (struct section_node*) SECTIONS_LIST_START;
+
+ first_split_section = SECTION_NULL;
+
+ for (uint16_t i = 0; i < kernel_sections_count; i++)
+ sections_list[i] = (struct section_node) {
+ .owner = OWNER_KERNEL,
+ .prev = i == 0 ? SECTION_NULL : i - 1,
+ .next = i == kernel_sections_count - 1 ? SECTION_NULL : i + 1
+ };
+
+ first_kernel_section = 0;
+
+ for (uint16_t i = kernel_sections_count;
+ i < all_sections_count; i++)
+ sections_list[i] = (struct section_node) {
+ .owner = OWNER_FREE,
+ .prev = i == kernel_sections_count ? SECTION_NULL : i - 1,
+ .next = i == all_sections_count - 1 ? SECTION_NULL : i + 1
+ };
+
+ first_free_section = kernel_sections_count;
+
+ puts("Initialized kernel's internal structures for paging");
+ prints("We have "); printdect(free_sections_count);
+ puts(" free sections left for use");
+}
+
+// return section number or SECTION_NULL in case of failure
+static uint16_t claim_section(void *owner)
+{
+ if (!free_sections_count)
+ return SECTION_NULL; // failure
+
+ uint16_t section = first_free_section;
+
+ if (--free_sections_count)
+ {
+ uint16_t next;
+
+ next = sections_list[section].next;
+ sections_list[next].prev = SECTION_NULL;
+
+ first_free_section = next;
+ }
+ else
+ first_free_section = SECTION_NULL;
+
+ if (owner == OWNER_KERNEL)
+ {
+ sections_list[first_kernel_section].prev = section;
+
+ sections_list[section] = (struct section_node) {
+ .owner = owner,
+ .prev = SECTION_NULL,
+ .next = first_kernel_section
+ };
+
+ kernel_sections_count++;
+
+ first_kernel_section = section;
+ }
+ else
+ sections_list[section] = (struct section_node) {
+ .owner = owner,
+ .prev = SECTION_NULL,
+ .next = SECTION_NULL
+ };
+
+ return section;
+}
+
+// return values like claim_section()
+uint16_t claim_and_map_section
+(void *owner, uint16_t where_to_map, uint8_t access_permissions)
+{
+ uint16_t section = claim_section(owner);
+
+ if (section == SECTION_NULL)
+ return section;
+
+ short_section_descriptor_t volatile *section_entry =
+ &((short_section_descriptor_t*)
+ TRANSLATION_TABLE_BASE)[where_to_map];
+
+ short_section_descriptor_t descriptor = *section_entry;
+
+ // set up address of section
+ descriptor.SECTION_BASE_ADDRESS_31_20 = section;
+
+ // set requested permissions on section
+ descriptor.ACCESS_PERMISSIONS_2 = access_permissions >> 2;
+ descriptor.ACCESS_PERMISSIONS_1_0 = access_permissions & 0b011;
+
+ // write modified descriptor to the table
+ *section_entry = descriptor;
+
+ // invalidate main Translation Lookup Buffer
+ asm("mcr p15, 0, r1, c8, c7, 0\n\r"
+ "isb" ::: "memory");
+
+ return section;
+}
diff --git a/src/arm/PL1/kernel/paging.h b/src/arm/PL1/kernel/paging.h
new file mode 100644
index 0000000..4ac8efa
--- /dev/null
+++ b/src/arm/PL1/kernel/paging.h
@@ -0,0 +1,14 @@
+#ifndef PAGING_H
+#define PAGING_H
+
+void setup_flat_map(void);
+
+void setup_pager_structures(uint32_t available_mem);
+
+#define SECTION_NULL 0xffff
+
+// returns section number or SECTION_NULL in case of failure
+uint16_t claim_and_map_section
+(void *owner, uint16_t where_to_map, uint8_t access_permissions);
+
+#endif // PAGING_H
diff --git a/src/arm/PL1/kernel/psr.h b/src/arm/PL1/kernel/psr.h
new file mode 100644
index 0000000..f300a7a
--- /dev/null
+++ b/src/arm/PL1/kernel/psr.h
@@ -0,0 +1,88 @@
+#ifndef PSR_H
+#define PSR_H
+
+#include <stdint.h>
+
+enum execution_mode {
+ MODE_USER = 0b10000,
+ MODE_FIQ = 0b10001,
+ MODE_IRQ = 0b10010,
+ MODE_SUPERVISOR = 0b10011,
+ MODE_MONITOR = 0b10110,
+ MODE_ABORT = 0b10111,
+ MODE_HYPERVISOR = 0b11010,
+ MODE_UNDEFINED = 0b11011,
+ MODE_SYSTEM = 0b11111,
+};
+
+typedef union
+{
+ uint32_t raw;
+ struct
+ {
+ uint32_t M_4_0 : 5; // bits 4:0
+ uint32_t T : 1; // bit 5
+ uint32_t F : 1; // bit 6
+ uint32_t I : 1; // bit 7
+ uint32_t A : 1; // bit 8
+ uint32_t E : 1; // bit 9
+ uint32_t IT_7_2 : 6; // bits 15:10
+ uint32_t GE_3_0 : 4; // bits 19:16
+ uint32_t Bits_23_20 : 4; // bits 23:20
+ uint32_t J : 1; // bit 24
+ uint32_t IT_1_0 : 2; // bits 26:25
+ uint32_t Q : 1; // bit 27
+ uint32_t V : 1; // bit 28
+ uint32_t C : 1; // bit 29
+ uint32_t Z : 1; // bit 30
+ uint32_t N : 1; // bit 31
+#define PSR_MODE_4_0 M_4_0
+#define PSR_THUMB_BIT T
+#define PSR_FIQ_MASKK_BIT F
+#define PSR_IRQ_MASK_BIT I
+#define PSR_ASYNC_ABORT_MASK_BIT A
+#define PSR_ENDIANNESS_BIT E
+#define PSR_IF_THEN_STATE_7_2 IT_7_2
+#define PSR_GREATER_THAN_OR_EQUAL_FLAGS GE_3_0
+ // bits 23:20 are reserved
+#define PSR_JAZELLE_BIT J
+#define PSR_IF_THEN_STATE_1_0 IT_1_0
+#define PSR_CUMULATIVE_SATURATION_BIT Q
+#define PSR_OVERFLOW_CONDITION_BIT V
+#define PSR_CARRY_CONDITION_BIT C
+#define PSR_ZERO_CONDITION_BIT Z
+#define PSR_NEGATIVE_CONDITION_BIT N
+ } fields;
+} PSR_t;
+
+inline static PSR_t read_CPSR(void)
+{
+ PSR_t CPSR;
+ // get content of current program status register
+ asm("mrs %0, cpsr" : "=r" (CPSR.raw) :: "memory");
+
+ return CPSR;
+}
+
+inline static void write_CPSR(PSR_t CPSR)
+{
+ // set content of current program status register
+ asm("msr cpsr, %0" :: "r" (CPSR.raw) : "memory");
+}
+
+inline static PSR_t read_SPSR(void)
+{
+ PSR_t SPSR;
+ // get content of saved program status register
+ asm("mrs %0, spsr" : "=r" (SPSR.raw) :: "memory");
+
+ return SPSR;
+}
+
+inline static void write_SPSR(PSR_t SPSR)
+{
+ // set content of saved program status register
+ asm("msr spsr, %0" :: "r" (SPSR.raw));
+}
+
+#endif // PSR_H
diff --git a/src/arm/PL1/kernel/ramfs.c b/src/arm/PL1/kernel/ramfs.c
new file mode 100644
index 0000000..cc66b4c
--- /dev/null
+++ b/src/arm/PL1/kernel/ramfs.c
@@ -0,0 +1,65 @@
+// driver for the read-only ramfs
+// see makefs.c for details
+
+#include <stdint.h>
+#include "ramfs.h"
+
+static int strcmp(char const *str1, char const *str2)
+{
+ while (1)
+ {
+ int c1 = (unsigned char) *str1, c2 = (unsigned char) *str2;
+
+ if (!c1 && !c2)
+ return 0;
+
+ if (c1 != c2)
+ return c1 - c2;
+
+ str1++; str2++;
+ }
+}
+
+static uint32_t strlen(char const *str1)
+{
+ uint32_t len = 0;
+
+ while (str1[len])
+ len++;
+
+ return len;
+}
+
+static inline char *align4(char *addr)
+{
+ return (char*) (((uint32_t) addr - 1) & ~0b11) + 4;
+}
+
+int find_file(void *ramfs, char *filename, struct ramfile *buf)
+{
+ char *fs_file = ramfs;
+
+ while (*fs_file)
+ {
+ uint32_t *fs_file_size = (uint32_t*)
+ align4(fs_file + strlen(fs_file) + 1);
+
+ char *fs_file_contents = (char*) (fs_file_size + 1);
+
+ if (!strcmp(fs_file, filename))
+ {
+ buf->file_size = *fs_file_size;
+
+ buf->file_name = fs_file;
+
+ buf->file_contents = fs_file_contents;
+
+ return 0;
+ }
+
+ // move to the next file in ramfs
+ fs_file = align4(fs_file_contents + *fs_file_size);
+ }
+
+ return -1; // reached end of ramfs; file not found
+}
diff --git a/src/arm/PL1/kernel/ramfs.h b/src/arm/PL1/kernel/ramfs.h
new file mode 100644
index 0000000..cf45736
--- /dev/null
+++ b/src/arm/PL1/kernel/ramfs.h
@@ -0,0 +1,16 @@
+#ifndef RAMFS_H
+#define RAMFS_H
+
+struct ramfile
+{
+ char *file_name;
+ uint32_t file_size;
+ char *file_contents;
+};
+
+// search for file named filename in ramfs;
+// If found - return 0 and fill buf fields with file's info.
+// Otherwise return a non-zero value.
+int find_file(void *ramfs, char *filename, struct ramfile *buf);
+
+#endif // RAMFS_H
diff --git a/src/arm/PL1/kernel/scheduler.c b/src/arm/PL1/kernel/scheduler.c
new file mode 100644
index 0000000..141ba1d
--- /dev/null
+++ b/src/arm/PL1/kernel/scheduler.c
@@ -0,0 +1,156 @@
+#include "scheduler.h"
+#include "uart.h"
+#include "strings.h"
+#include "armclock.h"
+#include "memory.h"
+#include "io.h"
+
+// for now we only have 1 process in "queue"
+// later there is going to be an actual queue
+uint32_t PL0_regs[14] = {0}; // contains r0-r12, pc
+uint32_t PL0_sp;
+uint32_t PL0_lr;
+
+PSR_t PL0_PSR; // to be put into spsr when jumping to user mode
+
+PSR_t PL1_PSR;
+
+// when set, it means process used GETCHAR system call and once we get
+// a char, we have to return it
+_Bool waiting_for_input = 0;
+
+// when set, it means process used PUTCHAR system call and once we
+// manage to put the char, we can return to process
+_Bool waiting_for_output = 0;
+char waiting_output;
+
+// 0 is kernel code in system mode is being run
+// 1 if our process is being run
+// later when we have many processes and this will hold process id
+uint32_t current_process;
+
+void setup_scheduler_structures(void)
+{
+ PL1_PSR = read_CPSR();
+}
+
+void scheduler_try_output(void)
+{
+ if (waiting_for_output)
+ if (!putchar_non_blocking(waiting_output))
+ {
+ waiting_for_output = 0;
+ uart_send_irq_disable();
+ }
+}
+
+void scheduler_try_input(void)
+{
+ if (waiting_for_input)
+ if ((PL0_regs[0] = getchar_non_blocking()) != (uint32_t) (-1))
+ {
+ waiting_for_input = 0;
+ uart_recv_irq_disable();
+ }
+}
+
+void __attribute__((noreturn))
+schedule_new(uint32_t pc, uint32_t sp)
+{
+ PL0_regs[13] = pc;
+ PL0_sp = sp;
+ PL0_lr = 0;
+
+ PL0_PSR = read_CPSR();
+ PL0_PSR.fields.PSR_MODE_4_0 = MODE_USER;
+ PL0_PSR.fields.PSR_IRQ_MASK_BIT = 0;
+
+ schedule();
+}
+
+void __attribute__((noreturn))
+schedule_wait_for_output(uint32_t regs[14], char c)
+{
+ if (current_process == 0)
+ error("SYSTEM tried waiting for output!");
+
+ waiting_for_output = 1;
+ waiting_output = c;
+ uart_send_irq_enable();
+
+ schedule_save_context(regs);
+}
+
+void __attribute__((noreturn))
+schedule_wait_for_input(uint32_t regs[14])
+{
+ if (current_process == 0)
+ error("SYSTEM tried waiting for input!");
+
+ waiting_for_input = 1;
+ uart_recv_irq_enable();
+
+ schedule_save_context(regs);
+}
+
+void __attribute__((noreturn))
+schedule_save_context(uint32_t regs[14])
+{
+ memcpy(PL0_regs, regs, sizeof(PL0_regs));
+
+ PL0_PSR = read_SPSR();
+
+ asm volatile("cps %[sysmode]\n\r"
+ "isb\n\r"
+ "mov %[sp_transfer], sp\n\r"
+ "mov %[lr_transfer], lr\n\r"
+ "cps %[supmode]\n\r"
+ "isb\n\r" :
+ [sp_transfer]"=r" (PL0_sp),
+ [lr_transfer]"=r" (PL0_lr):
+ [sysmode]"I" (MODE_SYSTEM),
+ [supmode]"I" (MODE_SUPERVISOR) : "memory");
+
+ schedule();
+}
+
+void __attribute__((noreturn)) schedule(void)
+{
+ current_process = 0;
+ armclk_disable_timer_irq();
+
+ if (waiting_for_input || waiting_for_output)
+ {
+ PSR_t new_CPSR = PL1_PSR;
+ new_CPSR.fields.PSR_IRQ_MASK_BIT = 0;
+
+ write_CPSR(new_CPSR);
+
+ asm volatile("wfi");
+
+ __builtin_unreachable();
+ }
+
+ current_process = 1;
+
+ asm volatile("cps %[sysmode]\n\r"
+ "isb\n\r"
+ "mov sp, %[stackaddr]\n\r"
+ "mov lr, %[linkaddr]\n\r"
+ "cps %[supmode]\n\r"
+ "isb" ::
+ [sysmode]"I" (MODE_SYSTEM),
+ [supmode]"I" (MODE_SUPERVISOR),
+ [stackaddr]"r" (PL0_sp),
+ [linkaddr]"r" (PL0_lr) : "memory");
+
+ armclk_irq_settimeout(0x00100000);
+ armclk_enable_timer_irq();
+
+ write_SPSR(PL0_PSR);
+
+ asm volatile("ldm %0, {r0 - r12, pc} ^" ::
+ "r" (PL0_regs) : "memory");
+
+ __builtin_unreachable();
+}
diff --git a/src/arm/PL1/kernel/scheduler.h b/src/arm/PL1/kernel/scheduler.h
new file mode 100644
index 0000000..8c0f569
--- /dev/null
+++ b/src/arm/PL1/kernel/scheduler.h
@@ -0,0 +1,32 @@
+#ifndef SCHEDULER_H
+#define SCHEDULER_H
+
+#include <stdint.h>
+
+#include "psr.h"
+
+extern PSR_t PL1_PSR;
+
+void setup_scheduler_structures(void);
+
+// to be called by irq handler when respective uart interrupt happens
+void scheduler_try_output(void);
+
+// to be called by irq handler when respective uart interrupt happens
+void scheduler_try_input(void);
+
+void __attribute__((noreturn))
+schedule_wait_for_output(uint32_t regs[14], char c);
+
+void __attribute__((noreturn))
+schedule_wait_for_input(uint32_t regs[14]);
+
+void __attribute__((noreturn))
+schedule_save_context(uint32_t regs[14]);
+
+void __attribute__((noreturn)) schedule(void);
+
+void __attribute__((noreturn))
+schedule_new(uint32_t pc, uint32_t sp);
+
+#endif
diff --git a/src/arm/PL1/kernel/setup.c b/src/arm/PL1/kernel/setup.c
new file mode 100644
index 0000000..bf7c9a1
--- /dev/null
+++ b/src/arm/PL1/kernel/setup.c
@@ -0,0 +1,116 @@
+#include "uart.h"
+#include "io.h"
+#include "demo_functionality.h"
+#include "paging.h"
+#include "atags.h"
+// for POWER_OF_2() macro... perhaps the macro should be moved
+#include "memory.h"
+#include "armclock.h"
+#include "scheduler.h"
+
+void setup(uint32_t r0, uint32_t machine_type,
+ struct atag_header *atags)
+{
+ uart_init();
+
+ // When we attach screen session after loading kernel with socat
+ // we miss kernel's greeting... So we'll make the kernel wait for
+ // one char we're going to send from within screen
+ getchar();
+
+ puts("Hello, kernel World!");
+
+ prints("ARM machine type: 0x"); printhext(machine_type); puts("");
+
+ uint32_t memory_size = 0;
+
+ // value 3 introduced by stage1 code means no atags was found
+ if (r0 == 3)
+ {
+ puts ("No ATAGS was found!");
+ }
+ else
+ {
+ prints("ATAGS copied to 0x");
+ printhex((uint32_t) atags); puts("");
+
+ puts("__ ATAGS contents __");
+
+ print_atags(atags);
+
+ puts("__ end of ATAGS contents __");
+
+ memory_size = find_memory_size(atags);
+ }
+
+ if (memory_size)
+ {
+ char *unit;
+ uint32_t size_in_unit;
+
+ if (memory_size % POWER_OF_2(10))
+ {
+ unit = "B";
+ size_in_unit = memory_size;
+ }
+ else if (memory_size % POWER_OF_2(20))
+ {
+ unit = "KB";
+ size_in_unit = memory_size / POWER_OF_2(10);
+ }
+ else if (memory_size % POWER_OF_2(30))
+ {
+ unit = "MB";
+ size_in_unit = memory_size / POWER_OF_2(20);
+ }
+ else
+ {
+ unit = "GB";
+ size_in_unit = memory_size / POWER_OF_2(30);
+ }
+
+ prints ("memory available: ");
+ printdect (size_in_unit);
+ puts (unit);
+ }
+ else
+ {
+ // Most Pis have more, but qemu might give us little
+ puts("Couldn't determine available memory - assuming 192MB");
+ memory_size = 192 * POWER_OF_2(20);
+ }
+
+ // assume we need at least one section for PL0
+ if (memory_size < PRIVILEGED_MEMORY_END + SECTION_SIZE)
+ {
+ puts("Not enough memory to continue");
+ while (1);
+ }
+
+ // prints some info
+ demo_paging_support();
+
+ // prints some info
+ demo_current_mode();
+
+ setup_pager_structures(memory_size);
+
+ // prints some info and sets upp translation table, turns on MMU
+ setup_flat_map();
+
+ puts("Initializing clock");
+ // sets some general settings for arm timer
+ armclk_init();
+
+ puts("Setting up scheduler's internal structures");
+ setup_scheduler_structures();
+
+ puts("Switching uart to use irqs");
+
+ // note, that kernel's puts() is still going to use blocking io
+ uart_irq_enable();
+
+ // prints some info and sets up a section for PL0 code, loads a blob
+ // there, then runs scheduler... never, ever, ever returns
+ demo_setup_PL0();
+}
diff --git a/src/arm/PL1/kernel/translation_table_descriptors.h b/src/arm/PL1/kernel/translation_table_descriptors.h
new file mode 100644
index 0000000..981c3c7
--- /dev/null
+++ b/src/arm/PL1/kernel/translation_table_descriptors.h
@@ -0,0 +1,180 @@
+#ifndef TRANSLATION_TABLE_DESCRIPTORS_H
+#define TRANSLATION_TABLE_DESCRIPTORS_H
+
+#include <stdint.h>
+
+// ARM lets you choose between 32-bit abd 64-bit translation table
+// descriptors (called short and long descriptors respectively).
+// The format of the descriptor differs depending on what it describes
+// (section, supersection, a page table, etc...) and table of which
+// level of lookup it belongs to.
+
+// Even in case of descriptor of a specified type (e.g. short-format
+// section descriptor), a given field inside it may have different
+// meanings depending on settings in coprocessor registers... (yeah, ARM
+// looks a bit messy... all for backward compatibility, i guess)
+
+
+////// Here are the definitions for short-format descriptors
+
+//// short-format page table descriptor
+
+typedef struct
+{
+ uint32_t Bits_1_0 : 1; // bits 1:0
+ uint32_t PXN : 1; // bit 2
+ uint32_t NS : 1; // bit 3
+ uint32_t SBZ : 1; // bit 4
+ uint32_t Domain_3_0 : 4; // bits 8:5
+ uint32_t Bit_9 : 1; // bit 9
+ uint32_t Bits_31_10 : 22; // bits 31:10
+#define DESCRIPTOR_TYPE_1_0 Bits_1_0
+#define PRIVILEGED_EXECUTE_NEVER_BIT PXN
+#define NON_SECURE_BIT NS
+ // me thinks SBZ means "should be zero",
+ // but me sees no point #defining it
+#define DOMAIN_3_0 Domain_3_0
+#define IMPLEMENTATION_DEFINED_BIT Bit_9
+#define PAGE_TABLE_BASE_ADDRESS_31_10 Bits_31_10
+} short_page_table_descriptor_t;
+
+
+//// short-format section descriptor
+
+typedef struct
+{
+ uint32_t PXN : 1; // bit 0
+ uint32_t Bit_1 : 1; // bit 1
+ uint32_t B : 1; // bit 2
+ uint32_t C : 1; // bit 3
+ uint32_t XN : 1; // bit 4
+ uint32_t Domain_3_0 : 4; // bits 8:5
+ uint32_t Bit_9 : 1; // bit 9
+ uint32_t AP_1_0 : 2; // bit 11:10
+ uint32_t TEX_2_0 : 3; // bits 14:12
+ uint32_t AP_2 : 1; // bit 15
+ uint32_t S : 1; // bit 16
+ uint32_t nG : 1; // bit 17
+ uint32_t Bit_18 : 1; // bit 18
+ uint32_t NS : 1; // bit 19
+ uint32_t PA_31_20 : 12; // bits 31:20
+ // some of these are already defined the same for page table
+ //#define PRIVILEGED_EXECUTE_NEVER_BIT PXN
+#define DESCRIPTOR_TYPE_1 Bit_1
+#define BUFFERABLE_BIT B
+#define CACHEABLE_BIT C
+#define EXECUTE_NEVER_BIT XN
+ //#define DOMAIN_3_0 Domain_3_0
+ //#define IMPLEMENTATION_DEFINED_BIT Bit_9
+#define ACCESS_PERMISSIONS_1_0 AP_1_0
+#define TYPE_EXTENSION_2_0 TEX_2_0
+#define ACCESS_PERMISSIONS_2 AP_2
+#define SHAREABLE_BIT S
+#define NON_GLOBAL_BIT nG
+#define SECTION_OR_SUPERSECTION_BIT Bit_18
+ //#define NON_SECURE_BIT NS
+#define SECTION_BASE_ADDRESS_31_20 PA_31_20
+} short_section_descriptor_t;
+
+
+//// short-format supersection descriptor
+
+typedef struct
+{
+ uint32_t PXN : 1; // bit 0
+ uint32_t Bit_1 : 1; // bit 1
+ uint32_t B : 1; // bit 2
+ uint32_t C : 1; // bit 3
+ uint32_t XN : 1; // bit 4
+ uint32_t PA_39_36 : 4; // bits 8:5
+ uint32_t Bit_9 : 1; // bit 9
+ uint32_t AP_1_0 : 2; // bit 11:10
+ uint32_t TEX_2_0 : 3; // bits 14:12
+ uint32_t AP_2 : 1; // bit 15
+ uint32_t S : 1; // bit 16
+ uint32_t nG : 1; // bit 17
+ uint32_t Bit_18 : 1; // bit 18
+ uint32_t NS : 1; // bit 19
+ uint32_t PA_35_32 : 4; // bits 23:20
+ uint32_t PA_31_24 : 8; // bits 31:24
+ // most of these are already defined the same for section
+ //#define PRIVILEGED_EXECUTE_NEVER_BIT PXN
+ //#define DESCRIPTOR_TYPE_1 Bit_1
+ //#define BUFFERABLE_BIT B
+ //#define CACHEABLE_BIT C
+ //#define EXECUTE_NEVER_BIT XN
+#define SUPERSECTION_BASE_ADDRESS_39_36 PA_39_36
+ //#define IMPLEMENTATION_DEFINED_BIT Bit_9
+ //#define ACCESS_PERMISSIONS_1_0 AP_1_0
+ //#define TYPE_EXTENSION_2_0 TEX_2_0
+ //#define ACCESS_PERMISSIONS_2 AP_2
+ //#define SHAREABLE_BIT S
+ //#define NON_GLOBAL_BIT nG
+ //#define SECTION_OR_SUPERSECTION_BIT Bit_18
+ //#define NON_SECURE_BIT NS
+#define SUPERSECTION_BASE_ADDRESS_35_32 PA_35_32
+#define SUPERSECTION_BASE_ADDRESS_31_24 PA_31_24
+} short_supersection_descriptor_t;
+
+
+//// possible access permission field values
+
+// How AP[2:0] is used depends on settings in SCTLR.AFE
+
+// Meaning of #define'd names below:
+// RW - read-write
+// RO - read-only
+// PL1 - a given permission applies to privilege level PL1
+// PL2 - a given permission applies to privilege level PL2
+// ALL - a given permission applies to both privilege levels
+// If only a permission for one privilege level is given in the name,
+// it means the other one has no access.
+
+// When SCTLR.AFE is 0 (access flag not used) and short-format
+// descritor table is used, the following access permission control
+// schema for AP[2:0] is used:
+#define AP_2_0_MODEL_NO_ACCESS 0b000
+#define AP_2_0_MODEL_RW_PL1 0b001
+#define AP_2_0_MODEL_RW_PL1_RO_PL0 0b010
+#define AP_2_0_MODEL_RW_ALL 0b011
+#define AP_2_0_MODEL_RESERVED 0b100
+#define AP_2_0_MODEL_RO_PL1 0b101
+#define AP_2_0_MODEL_RO_ALL_DEPRECATED 0b110 // use 0b111 instead
+#define AP_2_0_MODEL_RO_ALL 0b111 // reserved in VMSAv6
+// TODO: the #define's of RO_ALL and reserved could be done
+// conditionally depending on the VMSA version available (either give
+// the programmer #including this the possibility to #define their
+// VMSA version or assume the VMSA version respective to the ARM
+// version we're compiling against)
+
+
+//// Values for bit18, that determines whether a descriptor describes
+// section or supersection:
+#define DESCRIBES_SECTION 0b0
+#define DESCRIBES_SUPERSECTION 0b1
+
+
+//// short-format descriptor generic type
+
+typedef union
+{
+ uint32_t raw;
+ uint8_t descriptor_type : 2;
+
+ short_page_table_descriptor_t page_table_fields;
+ short_section_descriptor_t section_fields;
+ short_supersection_descriptor_t supersection_fields;
+} short_descriptor_lvl1_t;
+
+
+//// possible values of descriptor_type field:
+
+#define SHORT_DESCRIPTOR_INVALID 0b00
+#define SHORT_DESCRIPTOR_PAGE_TABLE 0b01
+#define SHORT_DESCRIPTOR_SECTION_OR_SUPERSECTION 0b10
+#define SHORT_DESCRIPTOR_SECTION_OR_SUPERSECTION_PXN 0b11
+// on an implementation that does not support the PXN attribute
+// 0b11 should not be used
+#define SHORT_DESCRIPTOR_RESERVED 0b11
+
+#endif // TRANSLATION_TABLE_DESCRIPTORS_H
diff --git a/src/arm/PL1/loader/loader_stage1.S b/src/arm/PL1/loader/loader_stage1.S
new file mode 100644
index 0000000..69d78c5
--- /dev/null
+++ b/src/arm/PL1/loader/loader_stage1.S
@@ -0,0 +1,55 @@
+/* arm mode, cortex-a7 compatibility
+ *
+ * _boot is entry point for the loader.
+ *
+ * Loader copies it's embedded stage 2 to address 0x4000
+ * and jumps to it. Registers r0 - r2 are arguments for the kernel
+ * and should be left intact.
+ */
+
+.global _boot
+_boot:
+ // Only let the first core execute
+ mrc p15, 0, r3, c0, c0, 5
+ and r3, r3, #3
+ cmp r3, #0
+ beq proceed
+ // this is a kind of blef - races can theoretically still occur
+ // when the main core overwrites this part of memory
+ wfe
+
+proceed:
+ // copy stage2 of the loader to address 0x4000
+
+ // first, load address of stage2_start to r3 (a PIC way)
+ adr r3, stage2_start
+
+ // load destination address for stage2 code to r4
+ mov r4, #0x4000
+
+ // load blob size to r5
+ mov r5, #(stage2_end - stage2_start)
+
+ // r6 is the counter - counts the bytes copied
+ mov r6, #0
+
+ // each word of the blob is loaded to r7 and stored
+ // from r7 to it's destination in a loop
+loop:
+ ldr r7, [r3, r6]
+ str r7, [r4, r6]
+ add r6, r6, #4
+ cmp r6, r5
+ blo loop
+
+ // Initialize the stack
+ // _stack_top is defined in loader_stage1_linker.ld
+ ldr sp, =_stack_top
+
+ // Call stage2 of the loader (branch to 0x4000)
+ bx r4
+
+.align 4
+stage2_start:
+ .incbin "loader_stage2.img"
+stage2_end:
diff --git a/src/arm/PL1/loader/loader_stage1_linker.ld b/src/arm/PL1/loader/loader_stage1_linker.ld
new file mode 100644
index 0000000..711fcbf
--- /dev/null
+++ b/src/arm/PL1/loader/loader_stage1_linker.ld
@@ -0,0 +1,16 @@
+ENTRY(_boot)
+
+SECTIONS
+{
+ /* see linker.ld for details */
+ . = 0x2000000;
+
+ __start = .;
+ loader_stage1 :
+ {
+ KEEP(loader_stage1.o)
+ }
+ __end = .;
+
+ _stack_top = 0x8000;
+}
diff --git a/src/arm/PL1/loader/loader_stage2.c b/src/arm/PL1/loader/loader_stage2.c
new file mode 100644
index 0000000..fc3ae1c
--- /dev/null
+++ b/src/arm/PL1/loader/loader_stage2.c
@@ -0,0 +1,33 @@
+#include <stddef.h>
+#include <stdint.h>
+#include "uart.h"
+#include "io.h"
+#include "global.h"
+
+void *const kernel_load_addr = ((void*) 0x8000);
+
+void _stage2_main(uint32_t r0, uint32_t r1, uint32_t atags)
+{
+ uart_init();
+
+ // get kernel size via uart (little endian)
+ uint32_t b0, b1, b2, b3;
+
+ b0 = getchar();
+ b1 = getchar();
+ b2 = getchar();
+ b3 = getchar();
+
+ uint32_t kernel_size = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+
+ // load kernel at kernel_load_addr
+ char *dst = kernel_load_addr, *end = dst + kernel_size;
+
+ while (dst < end)
+ *(dst++) = getchar();
+
+ // jump to kernel
+ ((void(*)(uint32_t, uint32_t, uint32_t)) kernel_load_addr)
+ (r0, r1, atags);
+}
+
diff --git a/src/arm/PL1/loader/loader_stage2_linker.ld b/src/arm/PL1/loader/loader_stage2_linker.ld
new file mode 100644
index 0000000..33e79e9
--- /dev/null
+++ b/src/arm/PL1/loader/loader_stage2_linker.ld
@@ -0,0 +1,16 @@
+ENTRY(_stage2_main)
+
+SECTIONS
+{
+ /* see loader_stage1.S for details */
+ . = 0x4000;
+
+ __start = .;
+ loader_stage2 :
+ {
+ KEEP(loader_stage2.o(.text))
+ loader_stage2.o
+ uart.o
+ }
+ __end = .;
+}