/* arm mode, cortex-a7 compatibility
 * _boot is entry point for the loader.
 * Loader copies it's embedded stage 2 to address 0x4000
 * and jumps to it. Registers r0 - r2 are arguments for the kernel
 * and should be left intact.
.global _boot
        // Only let the first core execute
        mrc p15, 0, r3, c0, c0, 5
        and r3, r3, #3
        cmp r3, #0
        beq proceed
	// this is a kind of blef - races can theoretically still occur
	// when the main core overwrites this part of memory

	// copy stage2 of the loader to address 0x4000

	// first, load address of stage2_start to r3 (a PIC way)
	adr r3, stage2_start

	// load destination address for stage2 code to r4
	mov r4, #0x4000

	// load blob size to r5
	mov r5, #(stage2_end - stage2_start)

	// r6 is the counter - counts the bytes copied
	mov r6, #0

	// each word of the blob is loaded to r7 and stored
	// from r7 to it's destination in a loop
	ldr r7, [r3, r6]
	str r7, [r4, r6]
	add r6, r6, #4
	cmp r6, r5
	blo loop
        // Initialize the stack
	// _stack_top is defined in loader_stage1_linker.ld
	ldr sp, =_stack_top
        // Call stage2 of the loader (branch to 0x4000)
        bx r4

.align 4
	.incbin "loader_stage2.img"