summaryrefslogtreecommitdiffstats
path: root/libFLAC/ia32/stream_encoder_asm.nasm
diff options
context:
space:
mode:
Diffstat (limited to 'libFLAC/ia32/stream_encoder_asm.nasm')
-rw-r--r--libFLAC/ia32/stream_encoder_asm.nasm159
1 files changed, 159 insertions, 0 deletions
diff --git a/libFLAC/ia32/stream_encoder_asm.nasm b/libFLAC/ia32/stream_encoder_asm.nasm
new file mode 100644
index 0000000..b7ecef8
--- /dev/null
+++ b/libFLAC/ia32/stream_encoder_asm.nasm
@@ -0,0 +1,159 @@
+; vim:filetype=nasm ts=8
+
+; libFLAC - Free Lossless Audio Codec library
+; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; - Neither the name of the Xiph.org Foundation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "nasm.h"
+
+ data_section
+
+cglobal precompute_partition_info_sums_32bit_asm_ia32_
+
+ code_section
+
+
+; **********************************************************************
+;
+; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
+; void precompute_partition_info_sums_32bit_(
+; const FLAC__int32 residual[],
+; FLAC__uint64 abs_residual_partition_sums[],
+; unsigned blocksize,
+; unsigned predictor_order,
+; unsigned min_partition_order,
+; unsigned max_partition_order
+; )
+;
+ ALIGN 16
+cident precompute_partition_info_sums_32bit_asm_ia32_
+
+ ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
+ ;; [esp + 4] const FLAC__int32 residual[]
+ ;; [esp + 8] FLAC__uint64 abs_residual_partition_sums[]
+ ;; [esp + 12] unsigned blocksize
+ ;; [esp + 16] unsigned predictor_order
+ ;; [esp + 20] unsigned min_partition_order
+ ;; [esp + 24] unsigned max_partition_order
+ push ebp
+ push ebx
+ push esi
+ push edi
+ sub esp, 8
+ ;; [esp + 28] const FLAC__int32 residual[]
+ ;; [esp + 32] FLAC__uint64 abs_residual_partition_sums[]
+ ;; [esp + 36] unsigned blocksize
+ ;; [esp + 40] unsigned predictor_order
+ ;; [esp + 44] unsigned min_partition_order
+ ;; [esp + 48] unsigned max_partition_order
+ ;; [esp] partitions
+ ;; [esp + 4] default_partition_samples
+
+ mov ecx, [esp + 48]
+ mov eax, 1
+ shl eax, cl
+ mov [esp], eax ; [esp] <- partitions = 1u << max_partition_order;
+ mov eax, [esp + 36]
+ shr eax, cl
+ mov [esp + 4], eax ; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order;
+
+ ;
+ ; first do max_partition_order
+ ;
+ mov edi, [esp + 4]
+ sub edi, [esp + 40] ; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples
+ xor esi, esi ; esi <- residual_sample = 0
+ xor ecx, ecx ; ecx <- partition = 0
+ mov ebp, [esp + 28] ; ebp <- residual[]
+ xor ebx, ebx ; ebx <- abs_residual_partition_sum = 0;
+ ; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1
+ ALIGN 16
+.loop0: ; for(partition = residual_sample = 0; partition < partitions; partition++) {
+.loop1: ; for( ; residual_sample < end; residual_sample++)
+ mov eax, [ebp + esi * 4]
+ cdq
+ xor eax, edx
+ sub eax, edx
+ add ebx, eax ; abs_residual_partition_sum += abs(residual[residual_sample]);
+ ;@@@@@@ check overflow flag and abort here?
+ add esi, byte 1
+ cmp esi, edi ; /* since the loop will always run at least once, we can put the loop check down here */
+ jb .loop1
+.next1:
+ add edi, [esp + 4] ; end += default_partition_samples;
+ mov eax, [esp + 32]
+ mov [eax + ecx * 8], ebx ; abs_residual_partition_sums[partition] = abs_residual_partition_sum;
+ mov [eax + ecx * 8 + 4], dword 0
+ xor ebx, ebx ; abs_residual_partition_sum = 0;
+ add ecx, byte 1
+ cmp ecx, [esp] ; /* since the loop will always run at least once, we can put the loop check down here */
+ jb .loop0
+.next0: ; }
+ ;
+ ; now merge partitions for lower orders
+ ;
+ mov esi, [esp + 32] ; esi <- abs_residual_partition_sums[from_partition==0];
+ mov eax, [esp]
+ lea edi, [esi + eax * 8] ; edi <- abs_residual_partition_sums[to_partition==partitions];
+ mov ecx, [esp + 48]
+ sub ecx, byte 1 ; ecx <- partition_order = (int)max_partition_order - 1;
+ ALIGN 16
+.loop2: ; for(; partition_order >= (int)min_partition_order; partition_order--) {
+ cmp ecx, [esp + 44]
+ jl .next2
+ mov edx, 1
+ shl edx, cl ; const unsigned partitions = 1u << partition_order;
+ ALIGN 16
+.loop3: ; for(i = 0; i < partitions; i++) {
+ mov eax, [esi]
+ mov ebx, [esi + 4]
+ add eax, [esi + 8]
+ adc ebx, [esi + 12]
+ mov [edi], eax
+ mov [edi + 4], ebx ; a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1];
+ add esi, byte 16
+ add edi, byte 8
+ sub edx, byte 1
+ jnz .loop3 ; }
+ sub ecx, byte 1
+ jmp .loop2 ; }
+.next2:
+
+ add esp, 8
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+end
+
+%ifdef OBJ_FORMAT_elf
+ section .note.GNU-stack noalloc
+%endif