src/boot/kernel_stage1.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

/* arm mode, cortex-a7 compatibility
 *
 * _boot is entry point for the kernel.
 *
 * Kernel copies it's embedded stage 2 to address 0x0 and jumps to
 * it (to the reset handler). Registers r0 - r2 are arguments for
 * the kernel, but we're not using them for now.
 *
 * This file is based on (and almost identical with) loader_stage1.S
 */

.global _boot
_boot:
        // Only let the first core execute
        mrc p15, 0, r3, c0, c0, 5
        and r3, r3, #3
        cmp r3, #0
        beq proceed
	// this is a kind of blef - races can theoretically still
	// occur when the main core overwrites this part of memory
	wfe

	// we'll use the size of stage1 to determine where we have free
	// space after it. We'll then copy our atags/fdt there, so
	// it doesn't get overwritten by stage2 we deploy at 0x0
atags_magic:
	.word 0x54410001
	
proceed:
	// load the second word of structure passed to us through r2;
	// if it's atags, it's second word should be the magic number
	// Btw, location of ATAGS is always 0x100.
	ldr r3, [r2, #4]
	adr r4, atags_magic
	ldr r4, [r4]

	// compare second word of assumed atags with magic number
	// to see, if it's really atags and not sth else (i.e. fdt)
	cmp r3, r4

	// normally at start r0 contains value 0;
	// value 3 in r0 would tell stage2 code, we found no atags :(
	movne r0, #3
	bne stage2_blob_copying

	// if atags was found, copying of it takes place here
	
	// the following loop finds, where atags ends
	// r3 shall point to currently looked-at tag
	mov r3, r2

find_end_of_atags_loop:
	// load first word of tag header to r4 (it contains tag size)
	ldr r4, [r3]
	// make r3 point at the next tag (by adding 4*tag_size to it)
	add r3, r4, lsl #2

	// load second word of tag header to r5 (it contains tag type)
	ldr r5, [r3, #4]

	// if tag value is 0, it is the last tag
	cmp r5, #0
	bne find_end_of_atags_loop

	add r3, #8 // make r3 point at the end of last tag
	sub r3, r2 // get atags size in r3
	
	// at this pont r2 and r3 point at start and size of atags,
	// respectively; now we'll compute, where we're going to have
	// free space to put atags in; we want to put atags either
	// right after our blob or, if if it doesn't fit between
	// blob end and the address stage1 is loaded at, after stage1

	// get blob size to r5
	adr r5, blob_size
	ldr r5, [r5]

	// we could only copy atags to a 4-aligned address
	mov r6, #4
	bl aling_r5_to_r6

	// compute where atags copied right after blob would end
	add r6, r5, r3
	// we can only overwrite stuff before the copying loop
	adr r7, copy_atags_loop
	cmp r6, r7
	ble copy_atags

	// atags wouldn't fit - use memory after stage1 as destination
	adr r5, _boot
	adr r6, stage1_size
	ldr r6, [r6]
	add r5, r6
	mov r6, #4
	bl aling_r5_to_r6
	
copy_atags:
	// now copy atags (r2 - atags start; r3 - atags size;
	// r5 - destination; r4 - iterator; r6 - buffor)
	mov r4, #0

copy_atags_loop:
	ldr r6, [r2, r4]
	str r6, [r5, r4]
	add r4, #4
	cmp r4, r3
	blo copy_atags_loop

	mov r2, r5 // place the new atags address in r2
	b stage2_blob_copying // atags stuff done; proceed

// mini-function, that does what the label says; clobbers r7
aling_r5_to_r6:
	sub r5, #1
	sub r7, r6, #1
	bic r5, r7
	add r5, r6
	mov pc, lr

	
stage2_blob_copying: // copy stage2 of the kernel to address 0x0

	// first, load address of stage2_start to r3 (a PIC way)
	adr r3, stage2_start

	// load destination address for stage2 code to r4
	mov r4, #0

	// load blob size to r5
	// The size might get too big for an immediate value, so
	// we load it from memory.
	adr r5, blob_size
	ldr r5, [r5]

	// r6 is the counter - counts the bytes copied
	mov r6, #0

	// This initial piece of code might get overwritten when we
	// copy stage2, so the actual copying loop shall be after
	// stage2 blob. We want this asm code to be PIC, so we're
	// computing address of stage2_end into r7.
	add r7, r3, r5
	bx r7

blob_size:
	.word stage2_end - stage2_start
stage1_size:
	.word stage1_end - _boot

.align 4
stage2_start:
	.incbin "kernel_stage2.img"
stage2_end:

	// each word of the blob is loaded to r7 and stored
	// from r7 to it's destination in a loop
loop:
	ldr r7, [r3, r6]
	str r7, [r4, r6]
	add r6, r6, #4
	cmp r6, r5
	blo loop

        // Call stage2 of the kernel (branch to 0x0,
	// which is the reset handler).
        bx r4
	
stage1_end: