.text
.code32
.proc	bzero
.global bzero
bzero:	ldri	r21,0
	andi	r22,r0,0x3
	cmpi	c0,r22,0
	beq	c0,bzero_word_aligned
	nop

bzero_align_word:
	ldri	r23,0xfffffffc
	and	r0,r23,r0
	cmpi	c0,r22,1
	beq	c0,bzero_second_byte
	cmpi	c0,r22,2
	beq	c0,bzero_third_byte
	nop
	ld	r22,r0,0
	ldri	r23,0xffffff00
	and	r22,r22,r23
	st	r22,r0,0
	addi	r0,r0,4
	jmp	bzero_word_aligned
	addi	r1,r1,-1	
bzero_second_byte:
	ldri	r22,0xff00ffff	
	cmpi	c0,r1,1
	beq	c0,bzero_final_store
	nop
	ldri	r22,0xff0000ff	
	cmpi	c0,r1,2
	beq	c0,bzero_final_store
	nop
	ldri	r22,0xff000000		
	cmpi	c0,r1,3
	beq	c0,bzero_final_store
	nop
	ld	r23,r0,0
	and	r23,r22,r23
	st	r23,r0,0
	addi	r1,r1,-3
	jmp	bzero_word_aligned
	addi	r0,r0,4

bzero_third_byte:
	ldri	r22,0xffff00ff		
	cmpi	c0,r1,1
	beq	c0,bzero_final_store
	nop
	ldri	r22,0xffff0000
	ld	r23,r0,0
	and	r23,r22,r23
	st	r23,r0,0
	addi	r1,r1,-2
	jmp	bzero_word_aligned
	addi	r0,r0,2	
	
bzero_word_aligned:
	cmpi	c0,r1, 4
	blt	c0,bzero_less_than_word
	ld	r23,r0,0
	st	r21,r0,0
	addi	r1,r1,-4
	jmp	bzero_word_aligned
	addi	r0,r0,4
	
bzero_less_than_word:
	ldri	r22,0xffffffff
	cmpi	c0,r1,0
	beq	c0,bzero_final_store
	nop
	ldri	r22,0x00ffffff	
	cmpi	c0,r1,1
	beq	c0,bzero_final_store
	nop
	ldri	r22,0x0000ffff	
	cmpi	c0,r1,2
	beq	c0,bzero_final_store
	nop
	ldri	r22,0x000000ff
bzero_final_store:
	ld	r23,r0,0
	and	r22,r22,r23
	st	r22,r0,0
	jmpr	r31
	nop
.endproc bzero
		
.proc bcopy
.global bcopy
bcopy:	
bcopy_start:	
	st	r22,r27,-4
	mov	r22,r1
	cmpi	c0,r2, 4
	blt	c0,bcopy_less_than_4
	nop
	ld	r23,r0,0
	st	r23,r1,0
	addi	r0,r0,4
	addi	r1,r1,4
	addi	r2,r2,-4
	jmp	bcopy_start
	nop
bcopy_less_than_4:
	cmpi	c0,r2,0
	beq	c0,bcopy_all_done
	nop
	ld	r23,r0,0
	ld	r24,r1,0
	ldri	r25,0xff000000
bcopy_looppi:	
	cmpi	c0,r2,0
	beq	c0,bcopy_final_store
	nop
	addi	r2,r2,-1
	and	r23,r23,r25
	jmp	bcopy_looppi
	srai	r25,r25,8
bcopy_final_store:
	not	r25,r25
	and	r24,r24,r25
	or	r24,r23,r25
	st	r24,r1,0	
bcopy_all_done:
	mov	r0,r22
	ld	r22,r27,-4
	jmpr	r31
	nop
.endproc bcopy


.proc memcpy
.global memcpy
memcpy:		
memcpy_start:	
	cmpi	c0,r2,4
	blt	c0,memcpy_less_than_4
	nop	
	ld	r23,r1,0
	st	r23,r0,0
	addi	r0,r0,4
	addi	r1,r1,4
	addi	r2,r2,-4
	jmp	memcpy_start
	nop
memcpy_less_than_4:
	cmpi	c0,r2,0
	belt	c0,memcpy_all_done
	nop
	ld	r23,r1,0
	ld	r24,r0,0
	ldri	r25,0xff000000
memcpy_looppi:	
	cmpi	c0,r2,0
	beq	c0,memcpy_final_store
	nop
	addi	r2,r2,-1
	jmp	memcpy_looppi
	srai	r25,r25,8
memcpy_final_store:
	and	r23,r23,r25
	not	r25,r25
	and	r24,r24,r25
	or	r24,r23,r24
	st	r24,r0,0	
memcpy_all_done:
	jmpr	r31
	nop
.endproc memcpy
