kernel_stage1.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

/* arm mode, cortex-a7 compatibility
 *
 * _boot is entry point for the kernel.
 *
 * Kernel copies it's embedded stage 2 to address 0x0 and jumps to
 * it (to the reset handler). Registers r0 - r2 are arguments for
 * the kernel, but we're not using them for now.
 *
 * This file is based on (and almost identical with) loader_stage1.S
 */
	
.global _boot
_boot:
        // Only let the first core execute
        mrc p15, 0, r3, c0, c0, 5
        and r3, r3, #3
        cmp r3, #0
        beq proceed
	// this is a kind of blef - races can theoretically still
	// occur when the main core overwrites this part of memory
	wfe

proceed:
	// copy stage2 of the kernel to address 0x0

	// first, load address of stage2_start to r3 (a PIC way)
	adr r3, stage2_start

	// load destination address for stage2 code to r4
	mov r4, #0

	// load blob size to r5
	// The size might get too big for an immediate value, so
	// we load it from memory.
	adr r5, blob_size
	ldr r5, [r5]

	// r6 is the counter - counts the bytes copied
	mov r6, #0

	// This initial piece of code might get overwritten when we
	// copy stage2, so the actual copying loop shall be after
	// stage2 blob. We want this asm code to be PIC, so we're
	// computing address of stage2_end into r7.
	add r7, r3, r5
	bx r7
	
blob_size:
	.word stage2_end - stage2_start

.align 4
stage2_start:
	.incbin "kernel_stage2.img"
stage2_end:

	// each word of the blob is loaded to r7 and stored
	// from r7 to it's destination in a loop
loop:
	ldr r7, [r3, r6]
	str r7, [r4, r6]
	add r6, r6, #4
	cmp r6, r5
	blo loop
	
        // Call stage2 of the kernel (branch to 0x0,
	// which is the reset handler).
        bx r4