diff options
Diffstat (limited to 'Tremolo/dpen.s')
-rw-r--r-- | Tremolo/dpen.s | 459 |
1 files changed, 459 insertions, 0 deletions
diff --git a/Tremolo/dpen.s b/Tremolo/dpen.s new file mode 100644 index 0000000..278b061 --- /dev/null +++ b/Tremolo/dpen.s @@ -0,0 +1,459 @@ +@ Tremolo library +@ Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd + + .text + + .global decode_packed_entry_number + .global decode_packed_entry_number_REALSTART + .global decode_map + .global vorbis_book_decodevv_add + .global _checksum + + .extern oggpack_adv + .extern oggpack_look + .extern oggpack_eop + .extern crc_lookup + +decode_packed_entry_number_REALSTART: +dpen_nobits: + MOV r0,r5 @ r0 = b + MOV r1,#1 @ r1 = 1 + BL oggpack_adv @ oggpack_adv(b,1) /* Force eop */ +duff: + MVN r0,#0 @ return -1 + LDMFD r13!,{r4-r8,r10,PC} + +dpen_readfailed: + SUBS r4,r4,#1 @ r4 = --read + BEQ dpen_nobits + MOV r0,r5 @ r0 = b + MOV r1,r4 @ r1 = read + ADR r14,dpen_read_return + B oggpack_look + +decode_packed_entry_number: + @ r0 = codebook *book + @ r1 = oggpack_buffer *b + STMFD r13!,{r4-r8,r10,r14} + + LDMIA r0,{r4,r6,r7} @ r4 = read = book->max_length + @ r6 = book->dec_table + @ r7 = book->dec_method + MOV r5,r1 @ r5 = b + + MOV r0,r5 @ r0 = b + MOV r1,r4 @ r1 = read + BL oggpack_look +dpen_read_return: + CMP r0,#0 + BLT dpen_readfailed + + @ r0 = lok + @ r4 = read + @ r5 = b + @ r6 = dec_table + @ r7 = dec_method + + CMP r7, #3 + BGT meth4 + BEQ meth3 + CMP r7, #1 + BGT meth2 + BEQ meth1 +meth0: + RSB r1, r4, #0 @ r1 = i-read = 0-read + MOV r7, #0 @ r7 = chase +m0_loop: + MOVS r0, r0, LSR #1 @ r0 = lok>>1 C = bottom bit + ADC r2, r6, r7, LSL #1 @ r8 = &t[chase*2+C] + LDRB r7, [r2] + ADDS r1, r1, #1 @ r1 = i-read++ (i-read<0 => i<read) + @ stall Xscale + CMPLT r7, #0x80 + BLT m0_loop + AND r7, r7, #0x7F @ r7 = chase + CMP r1, #0 @ if (i-read >= 0) === (i >= read) + MVNGT r7, #0 @ if (i >= read) value to return = -1 + ADD r1, r1, r4 @ r1 = i-read+read+1 = i +1 + MOV r0, r5 @ r0 = b + BL oggpack_adv @ oggpack_adv(b, i+1); + MOV r0, r7 @ return chase + LDMFD r13!,{r4-r8,r10,PC} + +meth1: + @ r0 = lok + @ r4 = read + @ r5 = b + @ r6 = dec_table + RSB r1, r4, #0 @ r1 = i = -read + MOV r10,#0 @ r10= next = 0 +m1_loop: + MOV r7, r10 @ r7 = chase=next + MOVS r0, r0, LSR #1 @ r0 = lok>>1 C = bottom bit + ADC r8, r6, r7 @ r8 = t+chase+bit + LDRB r10,[r8], -r6 @ r10= next=t[chase+bit] r8=chase+bit + ADDS r1, r1, #1 @ r1 = i++ + @ stall Xscale + CMPLT r10,#0x80 @ if (next & 0x80) == 0 + BLT m1_loop + + ADD r1, r1, r4 @ r1 = i+read + MOV r0, r5 @ r0 = b + BL oggpack_adv @ oggpack_adv(b, i) + + CMP r10,#0x80 + BLT duff + + CMP r8, r7 @ if bit==0 (chase+bit==chase) (sets C) + LDRNEB r14,[r6, r7] @ r14= t[chase] + MOVEQ r14,#128 + ADC r12,r8, r6 @ r12= chase+bit+1+t + LDRB r14,[r12,r14,LSR #7] @ r14= t[chase+bit+1+(!bit || t[chase]0x0x80)] + BIC r10,r10,#0x80 @ r3 = next &= ~0x80 + @ stall Xscale + ORR r0, r14,r10,LSL #8 @ r7 = chase = (next<<8) | r14 + + LDMFD r13!,{r4-r8,r10,PC} + + +meth2: + RSB r1, r4, #0 @ r1 = i-read = 0-read + MOV r7, #0 @ r7 = chase + MOV r6, r6, LSR #1 +m2_loop: + MOVS r0, r0, LSR #1 @ r0 = lok>>1 C = bottom bit + ADC r2, r6, r7, LSL #1 @ r8 = &t[chase*2+C] + LDRH r7, [r2, r2] + ADDS r1, r1, #1 @ r1 = i-read++ (i-read<0 => i<read) + @ stall Xscale + CMPLT r7, #0x8000 + BLT m2_loop + BIC r7, r7, #0x8000 @ r7 = chase + CMP r1, #0 @ if (i-read >= 0) === (i >= read) + MVNGT r7, #0 @ if (i >= read) value to return = -1 + ADD r1, r1, r4 @ r1 = i-read+read+1 = i +1 + MOV r0, r5 @ r0 = b + BL oggpack_adv @ oggpack_adv(b, i+1); + MOV r0, r7 @ return chase + LDMFD r13!,{r4-r8,r10,PC} + +meth3: + @ r0 = lok + @ r4 = read + @ r5 = b + @ r6 = dec_table + RSB r1, r4, #0 @ r1 = i = -read + MOV r10,#0 @ r10= next = 0 +m3_loop: + MOV r7, r10 @ r7 = chase=next + MOVS r0, r0, LSR #1 @ r0 = lok>>1 C = bottom bit + ADC r8, r7, #0 @ r8 = chase+bit + MOV r8, r8, LSL #1 @ r8 = (chase+bit)<<1 + LDRH r10,[r6, r8] @ r10= next=t[chase+bit] + ADDS r1, r1, #1 @ r1 = i++ + @ stall Xscale + CMPLT r10,#0x8000 @ if (next & 0x8000) == 0 + BLT m3_loop + + ADD r1, r1, r4 @ r1 = i+read + MOV r0, r5 @ r0 = b + BL oggpack_adv @ oggpack_adv(b, i) + + CMP r10,#0x8000 + BLT duff + + MOV r7, r7, LSL #1 + CMP r8, r7 @ if bit==0 (chase+bit==chase) sets C + LDRNEH r14,[r6, r7] @ r14= t[chase] + MOVEQ r14,#0x8000 + ADC r12,r8, r14,LSR #15 @ r12= 1+((chase+bit)<<1)+(!bit || t[chase]0x0x8000) + ADC r12,r12,r14,LSR #15 @ r12= t + (1+chase+bit+(!bit || t[chase]0x0x8000))<<1 + LDRH r14,[r6, r12] @ r14= t[chase+bit+1 + BIC r10,r10,#0x8000 @ r3 = next &= ~0x8000 + @ stall Xscale + ORR r0, r14,r10,LSL #16 @ r7 = chase = (next<<16) | r14 + + LDMFD r13!,{r4-r8,r10,PC} + +meth4: + RSB r1, r4, #0 @ r1 = i-read = 0-read + MOV r7, #0 @ r7 = chase +m4_loop: + MOVS r0, r0, LSR #1 @ r0 = lok>>1 C = bottom bit + ADC r2, r7, r7 @ r8 = chase*2+C + LDR r7, [r6, r2, LSL #2] + ADDS r1, r1, #1 @ r1 = i-read++ (i-read<0 => i<read) + @ stall Xscale + CMPLT r7, #0x80000000 + BLT m4_loop + BIC r7, r7, #0x80000000 @ r7 = chase + CMP r1, #0 @ if (i-read >= 0) === (i >= read) + MVNGT r7, #0 @ if (i >= read) value to return = -1 + ADD r1, r1, r4 @ r1 = i-read+read+1 = i +1 + MOV r0, r5 @ r0 = b + BL oggpack_adv @ oggpack_adv(b, i+1); + MOV r0, r7 @ return chase + LDMFD r13!,{r4-r8,r10,PC} + +decode_map: + @ r0 = codebook *s + @ r1 = oggpack_buffer *b + @ r2 = int v + @ r3 = int point + STMFD r13!,{r4-r11,r14} + + MOV r4, r0 @ r4 = s + MOV r5, r1 @ r5 = b + MOV r6, r2 @ r6 = v + MOV r7, r3 @ r7 = point + BL decode_packed_entry_number + MOV r8, r0 + + MOV r0, r5 + BL oggpack_eop + CMP r0, #0 + BNE dm_duff + + @ r4 = s + @ r5 = b + @ r6 = v + @ r7 = point + @ r8 = entry + + LDR r1, [r4,#12] @ r1 = s->dec_type + LDR r2, [r4,#16] @ r2 = s->q_bits + LDR r3, [r4,#20] @ r3 = s->dim + LDR r5, [r4,#24] @ r5 = s->q_delp + LDR r11,[r4,#28] @ r11= s->q_minp + LDR r12,[r4,#32] @ r12= s->q_del = mul + LDR r14,[r4,#36] @ r14= s->q_min + SUBS r11,r7, r11 @ r11= add = point - s->q_minp + + MOVGT r14,r14,ASR r11 @ r14= add = s->q_min >> add (if add >0) + RSBLT r11,r11,#0 + MOVLT r14,r14,LSL r11 @ r14= add = s->q_min << -add (if add < 0) + + SUBS r5, r7, r5 @ r5 = shiftM = point - s->q_delp + LDR r7, [r4,#40] @ r7 = s->q_seq + RSBLT r5, r5, #0 @ if (shiftM<0) r5 =-shiftM + MOVLT r12,r12,LSL r5 @ r12=mul<<-shiftM + MOVLT r5, #0 @ r5 =shiftM = 0 + MOVGT r14,r14,LSL r5 @ add <<= shiftM + + CMP r7,#0 @ seqMask = (s->q_seq?-1:0) + MVNNE r7,#0 + + CMP r1, #2 + BEQ dm2 + BGT dm3 + CMP r1,#0 @ probably never happens + BLE dm_duff +dm1: + @ r1 = s->dec_type + @ r2 = s->q_bits + @ r3 = s->dim + @ r5 = shiftM + @ r6 = v + @ r7 = seqMask + @ r8 = entry + @ r12= mul + @ r14= add + MOV r0, #1 + RSB r0, r0, r0, LSL r2 @ r0 = mask = (1<<s->q_bits)-1 + MOV r11,#0 @ r11= prev = 0 +dm1_loop: + AND r1, r8, r0 @ r1 = v = entry & mask + MLA r1, r12, r1, r14 @ r1 = (add + mul*v) + MOV r8, r8, LSR r2 @ r8 = entry>>s->q_bits + SUBS r3, r3, #1 + ADD r1, r11,r1, ASR r5 @ r1 = v = prev+((add+mul*v)>>shiftM) + AND r11,r1, r7 @ r11= prev = seqMask & v + STR r1, [r6], #4 @ *v++ = v + BGT dm1_loop + + MOV r0, #0 + LDMFD r13!,{r4-r11,PC} +dm2: + @ r1 = s->dec_type + @ r2 = s->q_bits + @ r3 = s->dim + @ r4 = s + @ r5 = shiftM + @ r6 = v + @ r7 = seqMask + @ r8 = entry + @ r12= mul + @ r14= add + LDR r1, [r4,#44] @ r1 = s->q_pack + LDR r4, [r4,#48] @ r4 = s->q_val + MOV r11,#0 @ r11= prev + MOV r0, #1 + RSB r0, r0, r0, LSL r1 @ r8 = mask = (1<<s->q_pack)-1 + CMP r2,#8 + BGT dm2_hword +dm2_loop: + AND r2, r8, r0 @ r2 = entry & mask + LDRB r2, [r4, r2] @ r2 = v = q->val[entry & mask] + MOV r8, r8, LSR r1 @ r8 = entry>>q_pack + MLA r2, r12,r2, r14 @ r2 = (add+mul*v) + SUBS r3, r3, #1 + ADD r2, r11,r2, ASR r5 @ r2 = v = prev+(add+mul*v)>>shiftM + AND r11,r2, r7 @ r11= prev = seqMask & v + STR r2, [r6], #4 @ *v++ = v + BGT dm2_loop + MOV r0, #0 + LDMFD r13!,{r4-r11,PC} + +dm2_hword: + AND r2, r8, r0 @ r2 = entry & mask + MOV r2, r2, LSL #1 @ r2 = 2*r2 + LDRH r2, [r4, r2] @ r2 = v = q->val[entry & mask] + MOV r8, r8, LSR r1 @ r8 = entry>>q_pack + MLA r2, r12,r2, r14 @ r2 = (add+mul*v) + SUBS r3, r3, #1 + ADD r2, r11,r2, ASR r5 @ r2 = v = prev+(add+mul*v)>>shiftM + AND r11,r2, r7 @ r11= prev = seqMask & v + STR r2, [r6], #4 @ *v++ = v + BGT dm2_hword + MOV r0, #0 + LDMFD r13!,{r4-r11,PC} + +dm3: + @ r1 = s->dec_type + @ r2 = s->q_bits + @ r3 = s->dim + @ r4 = s + @ r5 = shiftM + @ r6 = v + @ r7 = seqMask + @ r8 = entry + @ r12= mul + @ r14= add + LDR r1, [r4,#44] @ r1 = s->q_pack + LDR r4, [r4,#52] @ r4 = s->q_val + CMP r2,#8 + MOV r11,#0 @ r11= prev + MLA r4,r1,r8,r4 @ r4 = ptr = s->q_val+entry*s->q_pack + + BGT dm3_hword +dm3_loop: + LDRB r2, [r4], #1 @ r2 = v = *ptr++ + SUBS r3, r3, #1 + MLA r2, r12,r2, r14 @ r2 = (add+mul*v) + ADD r2, r11,r2, ASR r5 @ r2 = v = prev+(add+mul*v)>>shiftM + AND r11,r2, r7 @ r11= prev = seqMask & v + STR r2, [r6], #4 @ *v++ = v + BGT dm3_loop + MOV r0, #0 + LDMFD r13!,{r4-r11,PC} + +dm3_hword: + LDRH r2, [r4], #2 @ r2 = *ptr++ + SUBS r3, r3, #1 + MLA r2, r12,r2, r14 @ r2 = (add+mul*v) + ADD r2, r11,r2, ASR r5 @ r2 = v = prev+(add+mul*v)>>shiftM + AND r11,r2, r7 @ r11= prev = seqMask & v + STR r2, [r6], #4 @ *v++ = v + BGT dm3_hword + MOV r0, #0 + LDMFD r13!,{r4-r11,PC} + +dm_duff: + MVN r0,#0 + LDMFD r13!,{r4-r11,PC} + +vorbis_book_decodevv_add: + @ r0 = codebook *book + @ r1 = ogg_int32_t **a + @ r2 = long offset + @ r3 = int ch + @ <> = b + @ <> = n + @ <> = point + STMFD r13!,{r4-r11,R14} + LDR r7, [r0, #13*4] @ r7 = used_entries + MOV r9, r0 @ r9 = book + MOV r10,r1 @ r10= 0xa[chptr] chptr=0 + MOV r6, r3 @ r6 = ch + ADD r8, r10,r3, LSL #2 @ r8 = 0xa[ch] + MOV r11,r2 @ r11= offset + CMP r7, #0 @ if (used_entries <= 0) + BLE vbdvva_exit @ exit + LDR r5, [r13,#10*4] @ r5 = n +vbdvva_loop1: + @ r5 = n + @ r6 = ch + @ r8 = 0xa[ch] + @ r9 = book + @ r10= 0xa[chptr] + @ r11= offset + MOV r0, r9 @ r0 = book + LDR r1, [r13,# 9*4] @ r1 = b + LDR r2, [r9, #14*4] @ r2 = v = dec_buf + LDR r3, [r13,#11*4] @ r3 = point + BL decode_map + CMP r0, #0 + BNE vbdvva_fail + + LDR r0, [r9, # 5*4] @ r0 = book->dim + LDR r1, [r9, #14*4] @ r1 = v = dec_buf +vbdvva_loop2: + LDR r2, [r10],#4 @ r2 = a[chptr++] + LDR r12,[r1], #4 @ r1 = v[j++] + CMP r10,r8 @ if (chptr == ch) + SUBEQ r10,r10,r6, LSL #2 @ chptr = 0 + LDR r14,[r2, r11,LSL #2]! @ r2 = 0xa[chptr++][i] r14=[r12] + ADDEQ r11,r11,#1 @ i++ + SUBEQ r5, r5, #1 @ n-- + SUBS r0, r0, #1 @ r0-- + ADD r12,r12,r14 @ r12= a[chptr++][i]+ v[j] + STR r12,[r2] @ r12= a[chptr++][i]+=v[j] + BGT vbdvva_loop2 + CMP r5,#0 + BGT vbdvva_loop1 +vbdvva_exit: + MOV r0, #0 @ return 0 + LDMFD r13!,{r4-r11,PC} +vbdvva_fail: + MVN r0, #0 @ return -1 + LDMFD r13!,{r4-r11,PC} + +_checksum: + @ r0 = ogg_reference *or + @ r1 = bytes + STMFD r13!,{r5-r6,r14} + + LDR r5,=crc_lookup + MOV r14,#0 @ r14= crc_reg = 0 + MOVS r12,r0 + BEQ _cs_end +_cs_loop1: + LDMIA r12,{r0,r2,r3,r12} @ r0 = or->buffer + @ r2 = or->begin + @ r3 = or->length + @ r12= or->next + LDR r0,[r0] @ r0 = or->buffer->data + CMP r1,r3 @ r3 = post = (bytes < or->length ? + MOVLT r3,r1 @ bytes : or->length) + MOVS r6,r3 @ r6 = j = post + BEQ _cs_no_bytes + ADD r0,r0,r2 @ r0 = or->buffer->data + or->begin +_cs_loop2: + LDRB r2, [r0],#1 @ r2 = data[j] + @ stall + @ stall Xscale + EOR r2, r2, r14,LSR #24 @ r2 = (crc_reg>>24)^data[j] + LDR r2, [r5, r2, LSL #2] @ r2 = crc_lkp[(crc_reg>>24)^data[j]] + SUBS r6, r6, #1 @ j-- + @ stall Xscale + EOR r14,r2, r14,LSL #8 @ r14= crc_reg = (crc_reg<<8)^r2 + BGT _cs_loop2 +_cs_no_bytes: + SUBS r1, r1, r3 + CMPNE r12,#0 + BNE _cs_loop1 +_cs_end: + MOV r0,r14 + LDMFD r13!,{r5-r6,PC} + + @ END |