aboutsummaryrefslogtreecommitdiffstats
path: root/Tremolo/bitwiseARM.s
diff options
context:
space:
mode:
Diffstat (limited to 'Tremolo/bitwiseARM.s')
-rw-r--r--Tremolo/bitwiseARM.s368
1 files changed, 368 insertions, 0 deletions
diff --git a/Tremolo/bitwiseARM.s b/Tremolo/bitwiseARM.s
new file mode 100644
index 0000000..f79b26e
--- /dev/null
+++ b/Tremolo/bitwiseARM.s
@@ -0,0 +1,368 @@
+@ Tremolo library
+@ Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd
+
+ .text
+
+ .global oggpack_look
+ .global oggpack_adv
+ .global oggpack_readinit
+ .global oggpack_read
+
+oggpack_look:
+ @ r0 = oggpack_buffer *b
+ @ r1 = int bits
+ STMFD r13!,{r10,r11,r14}
+ LDMIA r0,{r2,r3,r12}
+ @ r2 = bitsLeftInSegment
+ @ r3 = ptr
+ @ r12= bitsLeftInWord
+ SUBS r2,r2,r1 @ bitsLeftinSegment -= bits
+ BLT look_slow @ Not enough bits in this segment for
+ @ this request. Do it slowly.
+ LDR r10,[r3] @ r10= ptr[0]
+ RSB r14,r12,#32 @ r14= 32-bitsLeftInWord
+ SUBS r12,r12,r1 @ r12= bitsLeftInWord -= bits
+ LDRLT r11,[r3,#4]! @ r11= ptr[1]
+ MOV r10,r10,LSR r14 @ r10= ptr[0]>>(32-bitsLeftInWord)
+ ADDLE r12,r12,#32 @ r12= bitsLeftInWord += 32
+ RSB r14,r14,#32 @ r14= 32-bitsLeftInWord
+ ORRLT r10,r10,r11,LSL r14 @ r10= Next 32 bits.
+ MOV r14,#1
+ RSB r14,r14,r14,LSL r1
+ AND r0,r10,r14
+ LDMFD r13!,{r10,r11,PC}
+
+look_slow:
+ STMFD r13!,{r5,r6}
+ ADDS r10,r2,r1 @ r10= bitsLeftInSegment + bits (i.e.
+ @ the initial value of bitsLeftInSeg)
+ @ r10 = bitsLeftInSegment (initial)
+ @ r12 = bitsLeftInWord
+ RSB r14,r12,#32 @ r14= 32-bitsLeftInWord
+ MOV r5,r10 @ r5 = bitsLeftInSegment (initial)
+ BLT look_overrun
+ BEQ look_next_segment @ r10= r12 = 0, if we branch
+ CMP r12,r10 @ If bitsLeftInWord < bitsLeftInSeg
+ @ there must be more in the next word
+ LDR r10,[r3],#4 @ r10= ptr[0]
+ LDRLT r6,[r3] @ r6 = ptr[1]
+ MOV r11,#1
+ MOV r10,r10,LSR r14 @ r10= first bitsLeftInWord bits
+ ORRLT r10,r10,r6,LSL r12 @ r10= first bitsLeftInSeg bits+crap
+ RSB r11,r11,r11,LSL r5 @ r11= mask
+ AND r10,r10,r11 @ r10= first r5 bits
+ @ Load the next segments data
+look_next_segment:
+ @ At this point, r10 contains the first r5 bits of the result
+ LDR r11,[r0,#12] @ r11= head = b->head
+ @ Stall
+ @ Stall
+look_next_segment_2:
+ LDR r11,[r11,#12] @ r11= head = head->next
+ @ Stall
+ @ Stall
+ CMP r11,#0
+ BEQ look_out_of_data
+ LDMIA r11,{r6,r12,r14} @ r6 = buffer
+ @ r12= begin
+ @ r14= length
+ LDR r6,[r6] @ r6 = buffer->data
+ CMP r14,#0
+ BEQ look_next_segment_2
+ ADD r6,r6,r12 @ r6 = buffer->data+begin
+look_slow_loop:
+ LDRB r12,[r6],#1 @ r12= *buffer
+ SUBS r14,r14,#1 @ r14= length
+ @ Stall
+ ORR r10,r10,r12,LSL r5 @ r10= first r5+8 bits
+ ADD r5,r5,#8
+ BLE look_really_slow
+ CMP r5,r1
+ BLT look_slow_loop
+ MOV r14,#1
+ RSB r14,r14,r14,LSL r1
+ AND r0,r10,r14
+ LDMFD r13!,{r5,r6,r10,r11,PC}
+
+
+look_really_slow:
+ CMP r5,r1
+ BLT look_next_segment_2
+ MOV r14,#1
+ RSB r14,r14,r14,LSL r1
+ AND r0,r10,r14
+ LDMFD r13!,{r5,r6,r10,r11,PC}
+
+look_out_of_data:
+ @MVN r0,#0 ; return -1
+ MOV r0,#0
+ LDMFD r13!,{r5,r6,r10,r11,PC}
+
+look_overrun:
+ @ We had overrun when we started, so we need to skip -r10 bits.
+ LDR r11,[r0,#12] @ r11 = head = b->head
+ @ stall
+ @ stall
+look_overrun_next_segment:
+ LDR r11,[r11,#12] @ r11 = head->next
+ @ stall
+ @ stall
+ CMP r11,#0
+ BEQ look_out_of_data
+ LDMIA r11,{r6,r7,r14} @ r6 = buffer
+ @ r7 = begin
+ @ r14= length
+ LDR r6,[r6] @ r6 = buffer->data
+ @ stall
+ @ stall
+ ADD r6,r6,r7 @ r6 = buffer->data+begin
+ MOV r14,r14,LSL #3 @ r14= length in bits
+ ADDS r14,r14,r10 @ r14= length in bits-bits to skip
+ MOVLE r10,r14
+ BLE look_overrun_next_segment
+ RSB r10,r10,#0 @ r10= bits to skip
+ ADD r6,r10,r10,LSR #3 @ r6 = pointer to data
+ MOV r10,#0
+ B look_slow_loop
+
+oggpack_adv:
+ @ r0 = oggpack_buffer *b
+ @ r1 = bits
+ LDMIA r0,{r2,r3,r12}
+ @ r2 = bitsLeftInSegment
+ @ r3 = ptr
+ @ r12= bitsLeftInWord
+ SUBS r2,r2,r1 @ Does this run us out of bits in the
+ BLE adv_slow @ segment? If so, do it slowly
+ SUBS r12,r12,r1
+ ADDLE r12,r12,#32
+ ADDLE r3,r3,#4
+ STMIA r0,{r2,r3,r12}
+ MOV PC,R14
+adv_slow:
+ STMFD r13!,{r10,r14}
+
+ LDR r14,[r0,#12] @ r14= head
+ @ stall
+adv_slow_loop:
+ LDR r1,[r0,#20] @ r1 = count
+ LDR r10,[r14,#8] @ r10= head->length
+ LDR r14,[r14,#12] @ r14= head->next
+ @ stall
+ ADD r1,r1,r10 @ r1 = count += head->length
+ CMP r14,#0
+ BEQ adv_end
+ STR r1,[r0,#20] @ b->count = count
+ STR r14,[r0,#12] @ b->head = head
+ LDMIA r14,{r3,r10,r12} @ r3 = buffer
+ @ r10= begin
+ @ r12= length
+ LDR r3,[r3] @ r3 = buffer->data
+ ADD r3,r3,r10 @ r3 = Pointer to start (byte)
+ AND r10,r3,#3 @ r10= bytes to backtrk to word align
+ MOV r10,r10,LSL #3 @ r10= bits to backtrk to word align
+ RSB r10,r10,#32 @ r10= bits left in word
+ ADDS r10,r10,r2 @ r10= bits left in word after skip
+ ADDLE r10,r10,#32
+ ADDLE r3,r3,#4
+ BIC r3,r3,#3 @ r3 = Pointer to start (word)
+ ADDS r2,r2,r12,LSL #3 @ r2 = length in bits after advance
+ BLE adv_slow_loop
+ STMIA r0,{r2,r3,r10}
+
+ LDMFD r13!,{r10,PC}
+adv_end:
+ MOV r2, #0
+ MOV r12,#0
+ STMIA r0,{r2,r3,r12}
+
+ LDMFD r13!,{r10,PC}
+
+oggpack_readinit:
+ @ r0 = oggpack_buffer *b
+ @ r1 = oggreference *r
+ STR r1,[r0,#12] @ b->head = r1
+ STR r1,[r0,#16] @ b->tail = r1
+ LDMIA r1,{r2,r3,r12} @ r2 = b->head->buffer
+ @ r3 = b->head->begin
+ @ r12= b->head->length
+ LDR r2,[r2] @ r2 = b->head->buffer->data
+ MOV r1,r12,LSL #3 @ r1 = BitsInSegment
+ MOV r12,#0
+ ADD r3,r2,r3 @ r3 = r2+b->head->begin
+ BIC r2,r3,#3 @ r2 = b->headptr (word)
+ AND r3,r3,#3
+ MOV r3,r3,LSL #3
+ RSB r3,r3,#32 @ r3 = BitsInWord
+ STMIA r0,{r1,r2,r3}
+ STR r12,[r0,#20]
+ MOV PC,R14
+
+oggpack_read:
+ @ r0 = oggpack_buffer *b
+ @ r1 = int bits
+ STMFD r13!,{r10,r11,r14}
+ LDMIA r0,{r2,r3,r12}
+ @ r2 = bitsLeftInSegment
+ @ r3 = ptr
+ @ r12= bitsLeftInWord
+ SUBS r2,r2,r1 @ bitsLeftinSegment -= bits
+ BLT read_slow @ Not enough bits in this segment for
+ @ this request. Do it slowly.
+ LDR r10,[r3] @ r10= ptr[0]
+ RSB r14,r12,#32 @ r14= 32-bitsLeftInWord
+ SUBS r12,r12,r1 @ r12= bitsLeftInWord -= bits
+ ADDLE r3,r3,#4
+ LDRLT r11,[r3] @ r11= ptr[1]
+ MOV r10,r10,LSR r14 @ r10= ptr[0]>>(32-bitsLeftInWord)
+ ADDLE r12,r12,#32 @ r12= bitsLeftInWord += 32
+ RSB r14,r14,#32 @ r14= 32-bitsLeftInWord
+ ORRLT r10,r10,r11,LSL r14 @ r10= Next 32 bits.
+ STMIA r0,{r2,r3,r12}
+ MOV r14,#1
+ RSB r14,r14,r14,LSL r1
+ AND r0,r10,r14
+ LDMFD r13!,{r10,r11,PC}
+
+read_slow:
+ STMFD r13!,{r5,r6}
+ ADDS r10,r2,r1 @ r10= bitsLeftInSegment + bits (i.e.
+ @ the initial value of bitsLeftInSeg)
+ @ r10 = bitsLeftInSegment (initial)
+ @ r12 = bitsLeftInWord
+ RSB r14,r12,#32 @ r14= 32-bitsLeftInWord
+ MOV r5,r10 @ r5 = bitsLeftInSegment (initial)
+ BLT read_overrun
+ BEQ read_next_segment @ r10= r12 = 0, if we branch
+ CMP r12,r10 @ If bitsLeftInWord < bitsLeftInSeg
+ @ there must be more in the next word
+ LDR r10,[r3],#4 @ r10= ptr[0]
+ LDRLT r6,[r3] @ r6 = ptr[1]
+ MOV r11,#1
+ MOV r10,r10,LSR r14 @ r10= first bitsLeftInWord bits
+ ORRLT r10,r10,r6,LSL r12 @ r10= first bitsLeftInSeg bits+crap
+ RSB r11,r11,r11,LSL r5 @ r11= mask
+ AND r10,r10,r11 @ r10= first r5 bits
+ @ Load the next segments data
+read_next_segment:
+ @ At this point, r10 contains the first r5 bits of the result
+ LDR r11,[r0,#12] @ r11= head = b->head
+ @ Stall
+read_next_segment_2:
+ @ r11 = head
+ LDR r6,[r0,#20] @ r6 = count
+ LDR r12,[r11,#8] @ r12= length
+ LDR r11,[r11,#12] @ r11= head = head->next
+ @ Stall
+ ADD r6,r6,r12 @ count += length
+ CMP r11,#0
+ BEQ read_out_of_data
+ STR r11,[r0,#12]
+ STR r6,[r0,#20] @ b->count = count
+ LDMIA r11,{r6,r12,r14} @ r6 = buffer
+ @ r12= begin
+ @ r14= length
+ LDR r6,[r6] @ r6 = buffer->data
+ CMP r14,#0
+ BEQ read_next_segment_2
+ ADD r6,r6,r12 @ r6 = buffer->data+begin
+read_slow_loop:
+ LDRB r12,[r6],#1 @ r12= *buffer
+ SUBS r14,r14,#1 @ r14= length
+ @ Stall
+ ORR r10,r10,r12,LSL r5 @ r10= first r5+8 bits
+ ADD r5,r5,#8
+ BLE read_really_slow
+ CMP r5,r1
+ BLT read_slow_loop
+read_end:
+ MOV r12,#1
+ RSB r12,r12,r12,LSL r1
+
+ @ Store back the new position
+ @ r2 = -number of bits to go from this segment
+ @ r6 = ptr
+ @ r14= bytesLeftInSegment
+ @ r11= New head value
+ LDMIA r11,{r3,r6,r14} @ r3 = buffer
+ @ r6 = begin
+ @ r14= length
+ LDR r3,[r3] @ r3 = buffer->data
+ ADD r1,r2,r14,LSL #3 @ r1 = bitsLeftInSegment
+ @ stall
+ ADD r6,r3,r6 @ r6 = pointer
+ AND r3,r6,#3 @ r3 = bytes used in first word
+ RSB r3,r2,r3,LSL #3 @ r3 = bits used in first word
+ BIC r2,r6,#3 @ r2 = word ptr
+ RSBS r3,r3,#32 @ r3 = bitsLeftInWord
+ ADDLE r3,r3,#32
+ ADDLE r2,r2,#4
+ STMIA r0,{r1,r2,r3}
+
+ AND r0,r10,r12
+ LDMFD r13!,{r5,r6,r10,r11,PC}
+
+
+read_really_slow:
+ CMP r5,r1
+ BGE read_end
+ LDR r14,[r11,#8] @ r14= length of segment just done
+ @ stall
+ @ stall
+ ADD r2,r2,r14,LSL #3 @ r2 = -bits to use from next seg
+ B read_next_segment_2
+
+read_out_of_data:
+ @ Store back the new position
+ @ r2 = -number of bits to go from this segment
+ @ r6 = ptr
+ @ r14= bytesLeftInSegment
+ @ RJW: This may be overkill - we leave the buffer empty, with -1
+ @ bits left in it. We might get away with just storing the
+ @ bitsLeftInSegment as -1.
+ LDR r11,[r0,#12] @ r11=head
+
+ LDMIA r11,{r3,r6,r14} @ r3 = buffer
+ @ r6 = begin
+ @ r14= length
+ LDR r3,[r3] @ r3 = buffer->data
+ ADD r6,r3,r6 @ r6 = pointer
+ ADD r6,r6,r14
+ AND r3,r6,#3 @ r3 = bytes used in first word
+ MOV r3,r3,LSL #3 @ r3 = bits used in first word
+ BIC r2,r6,#3 @ r2 = word ptr
+ RSBS r3,r3,#32 @ r3 = bitsLeftInWord
+ MVN r1,#0 @ r1 = -1 = bitsLeftInSegment
+ STMIA r0,{r1,r2,r3}
+ @MVN r0,#0 ; return -1
+ MOV r0,#0
+ LDMFD r13!,{r5,r6,r10,r11,PC}
+
+read_overrun:
+ @ We had overrun when we started, so we need to skip -r10 bits.
+ LDR r11,[r0,#12] @ r11 = head = b->head
+ @ stall
+ @ stall
+read_overrun_next_segment:
+ LDR r11,[r11,#12] @ r11 = head->next
+ @ stall
+ @ stall
+ CMP r11,#0
+ BEQ read_out_of_data
+ LDMIA r11,{r6,r7,r14} @ r6 = buffer
+ @ r7 = begin
+ @ r14= length
+ LDR r6,[r6] @ r6 = buffer->data
+ @ stall
+ @ stall
+ ADD r6,r6,r7 @ r6 = buffer->data+begin
+ MOV r14,r14,LSL #3 @ r14= length in bits
+ ADDS r14,r14,r10 @ r14= length in bits-bits to skip
+ MOVLE r10,r14
+ BLE read_overrun_next_segment
+ RSB r10,r10,#0 @ r10= bits to skip
+ ADD r6,r10,r10,LSR #3 @ r6 = pointer to data
+ MOV r10,#0
+ B read_slow_loop
+
+ @ END