diff options
Diffstat (limited to 'libFLAC/ia32/stream_encoder_asm.nasm')
-rw-r--r-- | libFLAC/ia32/stream_encoder_asm.nasm | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/libFLAC/ia32/stream_encoder_asm.nasm b/libFLAC/ia32/stream_encoder_asm.nasm new file mode 100644 index 0000000..b7ecef8 --- /dev/null +++ b/libFLAC/ia32/stream_encoder_asm.nasm @@ -0,0 +1,159 @@ +; vim:filetype=nasm ts=8 + +; libFLAC - Free Lossless Audio Codec library +; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; - Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; - Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; - Neither the name of the Xiph.org Foundation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +%include "nasm.h" + + data_section + +cglobal precompute_partition_info_sums_32bit_asm_ia32_ + + code_section + + +; ********************************************************************** +; +; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter) +; void precompute_partition_info_sums_32bit_( +; const FLAC__int32 residual[], +; FLAC__uint64 abs_residual_partition_sums[], +; unsigned blocksize, +; unsigned predictor_order, +; unsigned min_partition_order, +; unsigned max_partition_order +; ) +; + ALIGN 16 +cident precompute_partition_info_sums_32bit_asm_ia32_ + + ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time + ;; [esp + 4] const FLAC__int32 residual[] + ;; [esp + 8] FLAC__uint64 abs_residual_partition_sums[] + ;; [esp + 12] unsigned blocksize + ;; [esp + 16] unsigned predictor_order + ;; [esp + 20] unsigned min_partition_order + ;; [esp + 24] unsigned max_partition_order + push ebp + push ebx + push esi + push edi + sub esp, 8 + ;; [esp + 28] const FLAC__int32 residual[] + ;; [esp + 32] FLAC__uint64 abs_residual_partition_sums[] + ;; [esp + 36] unsigned blocksize + ;; [esp + 40] unsigned predictor_order + ;; [esp + 44] unsigned min_partition_order + ;; [esp + 48] unsigned max_partition_order + ;; [esp] partitions + ;; [esp + 4] default_partition_samples + + mov ecx, [esp + 48] + mov eax, 1 + shl eax, cl + mov [esp], eax ; [esp] <- partitions = 1u << max_partition_order; + mov eax, [esp + 36] + shr eax, cl + mov [esp + 4], eax ; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order; + + ; + ; first do max_partition_order + ; + mov edi, [esp + 4] + sub edi, [esp + 40] ; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples + xor esi, esi ; esi <- residual_sample = 0 + xor ecx, ecx ; ecx <- partition = 0 + mov ebp, [esp + 28] ; ebp <- residual[] + xor ebx, ebx ; ebx <- abs_residual_partition_sum = 0; + ; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1 + ALIGN 16 +.loop0: ; for(partition = residual_sample = 0; partition < partitions; partition++) { +.loop1: ; for( ; residual_sample < end; residual_sample++) + mov eax, [ebp + esi * 4] + cdq + xor eax, edx + sub eax, edx + add ebx, eax ; abs_residual_partition_sum += abs(residual[residual_sample]); + ;@@@@@@ check overflow flag and abort here? + add esi, byte 1 + cmp esi, edi ; /* since the loop will always run at least once, we can put the loop check down here */ + jb .loop1 +.next1: + add edi, [esp + 4] ; end += default_partition_samples; + mov eax, [esp + 32] + mov [eax + ecx * 8], ebx ; abs_residual_partition_sums[partition] = abs_residual_partition_sum; + mov [eax + ecx * 8 + 4], dword 0 + xor ebx, ebx ; abs_residual_partition_sum = 0; + add ecx, byte 1 + cmp ecx, [esp] ; /* since the loop will always run at least once, we can put the loop check down here */ + jb .loop0 +.next0: ; } + ; + ; now merge partitions for lower orders + ; + mov esi, [esp + 32] ; esi <- abs_residual_partition_sums[from_partition==0]; + mov eax, [esp] + lea edi, [esi + eax * 8] ; edi <- abs_residual_partition_sums[to_partition==partitions]; + mov ecx, [esp + 48] + sub ecx, byte 1 ; ecx <- partition_order = (int)max_partition_order - 1; + ALIGN 16 +.loop2: ; for(; partition_order >= (int)min_partition_order; partition_order--) { + cmp ecx, [esp + 44] + jl .next2 + mov edx, 1 + shl edx, cl ; const unsigned partitions = 1u << partition_order; + ALIGN 16 +.loop3: ; for(i = 0; i < partitions; i++) { + mov eax, [esi] + mov ebx, [esi + 4] + add eax, [esi + 8] + adc ebx, [esi + 12] + mov [edi], eax + mov [edi + 4], ebx ; a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1]; + add esi, byte 16 + add edi, byte 8 + sub edx, byte 1 + jnz .loop3 ; } + sub ecx, byte 1 + jmp .loop2 ; } +.next2: + + add esp, 8 + pop edi + pop esi + pop ebx + pop ebp + ret + +end + +%ifdef OBJ_FORMAT_elf + section .note.GNU-stack noalloc +%endif |