From 687730892c7bed143df27df76cfb673516a5b6a4 Mon Sep 17 00:00:00 2001 From: Chih-Hung Hsieh Date: Wed, 19 Aug 2015 11:44:57 -0700 Subject: Use unified syntax to compile with both llvm and gas. * Remove useless stab debug info directives .func and .endfunc, which are not recognized by clang/llvm integrated assembler. * Replace require8 with REQUIRE8, preserve8 with PRESERVE8. * Replace LDRNESB with LDRSBNE, LDREQSB with LDRSBEQ, etc. * Replace VST1 {*.U8} with VST1.8 ..., VLD1 {*.U8} with VLD1.8, and VMOV with VMOV.I8. * New assembler output code is identical or equivalent to old gas output. For example, the 3 references of LDR r7, =0x80808080 in omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S produced 3 instances of .word 0x80808080 by llvm integrated assembler but only one by gas. Change-Id: Ifdcd5bee915a7534198d7f219bceab72045f6a22 --- media/libstagefright/codecs/on2/h264dec/Android.mk | 2 - .../src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S | 6 - .../src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S | 4 - .../src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S | 4 - .../src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S | 2 - .../armVCM4P10_InterpolateLuma_Align_unsafe_s.S | 4 - .../armVCM4P10_InterpolateLuma_Copy_unsafe_s.S | 2 - .../armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S | 4 - ...10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S | 2 - ...10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S | 2 - ...rmVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S | 2 - ...rmVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S | 2 - .../src_gcc/armVCM4P10_Interpolate_Chroma_s.S | 2 - .../src_gcc/armVCM4P10_TransformResidual4x4_s.S | 2 - .../vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S | 11 +- .../vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S | 2 - ...4P10_DequantTransformResidualFromPairAndAdd_s.S | 2 - ...omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S | 2 - ...omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S | 2 - .../omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S | 2 - .../omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S | 2 - .../m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S | 2 - .../src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S | 2 - .../src_gcc/omxVCM4P10_PredictIntra_16x16_s.S | 2 - .../m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S | 2 - ...omxVCM4P10_TransformDequantChromaDCFromPair_s.S | 11 +- .../omxVCM4P10_TransformDequantLumaDCFromPair_s.S | 4 - .../h264dec/source/arm_neon_asm_gcc/asm_common.S | 2 - .../source/arm_neon_asm_gcc/h264bsdClearMbLayer.S | 14 +- .../arm_neon_asm_gcc/h264bsdCountLeadingZeros.S | 2 +- .../source/arm_neon_asm_gcc/h264bsdFillRow7.S | 40 ++--- .../source/arm_neon_asm_gcc/h264bsdFlushBits.S | 2 +- .../arm_neon_asm_gcc/h264bsdWriteMacroblock.S | 161 ++++++++++----------- 33 files changed, 119 insertions(+), 188 deletions(-) diff --git a/media/libstagefright/codecs/on2/h264dec/Android.mk b/media/libstagefright/codecs/on2/h264dec/Android.mk index e63b6b1..bf03ad9 100644 --- a/media/libstagefright/codecs/on2/h264dec/Android.mk +++ b/media/libstagefright/codecs/on2/h264dec/Android.mk @@ -94,8 +94,6 @@ ifeq ($(TARGET_ARCH),arm) LOCAL_C_INCLUDES += $(LOCAL_PATH)/./omxdl/arm_neon/api \ $(LOCAL_PATH)/./omxdl/arm_neon/vc/api \ $(LOCAL_PATH)/./omxdl/arm_neon/vc/m4p10/api - # h264bsdWriteMacroblock.S does not compile with Clang. - LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as endif endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S index 46e0018..e1ffb09 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_Average_4x4_Align0_unsafe - .func armVCM4P10_Average_4x4_Align0_unsafe armVCM4P10_Average_4x4_Align0_unsafe: PUSH {r4-r6,lr} LDR r7, =0x80808080 @@ -55,10 +54,8 @@ armVCM4P10_Average_4x4_Align0_unsafe: EOR r4,r4,r7 STR r4,[r2],r3 POP {r4-r6,pc} - .endfunc .global armVCM4P10_Average_4x4_Align2_unsafe - .func armVCM4P10_Average_4x4_Align2_unsafe armVCM4P10_Average_4x4_Align2_unsafe: PUSH {r4-r6,lr} LDR r7, =0x80808080 @@ -99,10 +96,8 @@ armVCM4P10_Average_4x4_Align2_unsafe: EOR r4,r4,r7 STR r4,[r2],r3 POP {r4-r6,pc} - .endfunc .global armVCM4P10_Average_4x4_Align3_unsafe - .func armVCM4P10_Average_4x4_Align3_unsafe armVCM4P10_Average_4x4_Align3_unsafe: PUSH {r4-r6,lr} LDR r7, =0x80808080 @@ -143,7 +138,6 @@ armVCM4P10_Average_4x4_Align3_unsafe: EOR r4,r4,r7 STR r4,[r2],r3 POP {r4-r6,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S index ca64a02..40ea4a9 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_DeblockingChromabSLT4_unsafe - .func armVCM4P10_DeblockingChromabSLT4_unsafe armVCM4P10_DeblockingChromabSLT4_unsafe: VLD1.32 {d18[0]},[r5]! VSUBL.U8 q11,d5,d9 @@ -50,10 +49,8 @@ armVCM4P10_DeblockingChromabSLT4_unsafe: VQMOVUN.S16 d29,q14 VQMOVUN.S16 d24,q12 BX lr - .endfunc .global armVCM4P10_DeblockingChromabSGE4_unsafe - .func armVCM4P10_DeblockingChromabSGE4_unsafe armVCM4P10_DeblockingChromabSGE4_unsafe: VHADD.U8 d13,d4,d9 VHADD.U8 d31,d8,d5 @@ -63,7 +60,6 @@ armVCM4P10_DeblockingChromabSGE4_unsafe: VRHADD.U8 d13,d13,d5 VRHADD.U8 d31,d31,d9 BX lr - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S index 193bc5e..05fb2c5 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_DeblockingLumabSLT4_unsafe - .func armVCM4P10_DeblockingLumabSLT4_unsafe armVCM4P10_DeblockingLumabSLT4_unsafe: VSUBL.U8 q11,d5,d9 VLD1.8 {d18[]},[r5]! @@ -66,10 +65,8 @@ armVCM4P10_DeblockingLumabSLT4_unsafe: VBIF d24,d8,d16 VBIF d25,d9,d12 BX lr - .endfunc .global armVCM4P10_DeblockingLumabSGE4_unsafe - .func armVCM4P10_DeblockingLumabSGE4_unsafe armVCM4P10_DeblockingLumabSGE4_unsafe: VSHR.U8 d19,d0,#2 VADD.I8 d19,d19,d15 @@ -111,7 +108,6 @@ armVCM4P10_DeblockingLumabSGE4_unsafe: VBIF d24,d8,d16 VBIF d28,d10,d12 BX lr - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S index 8e0db37..27c0452 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S @@ -42,7 +42,6 @@ .hidden armVCM4P10_ZigZag_4x4 .global armVCM4P10_DecodeCoeffsToPair - .func armVCM4P10_DecodeCoeffsToPair armVCM4P10_DecodeCoeffsToPair: PUSH {r4-r12,lr} SUB sp,sp,#0x40 @@ -302,7 +301,6 @@ L0x344: L0x35c: ADD sp,sp,#0x40 POP {r4-r12,pc} - .endfunc .LarmVCM4P10_CAVLCCoeffTokenTables: .word armVCM4P10_CAVLCCoeffTokenTables-(P0+8) diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S index 7206d76..1de9004 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_HorAlign9x_unsafe - .func armVCM4P10_InterpolateLuma_HorAlign9x_unsafe armVCM4P10_InterpolateLuma_HorAlign9x_unsafe: MOV r12,r8 AND r7,r0,#3 @@ -83,10 +82,8 @@ CopyEnd: MOV r0,r12 MOV r1,#0xc BX lr - .endfunc .global armVCM4P10_InterpolateLuma_VerAlign4x_unsafe - .func armVCM4P10_InterpolateLuma_VerAlign4x_unsafe armVCM4P10_InterpolateLuma_VerAlign4x_unsafe: AND r7,r0,#3 BIC r0,r0,#3 @@ -132,7 +129,6 @@ CopyVEnd: SUB r0,r8,#0x1c MOV r1,#4 BX lr - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S index e41d662..7ba2890 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_Copy4x4_unsafe - .func armVCM4P10_InterpolateLuma_Copy4x4_unsafe armVCM4P10_InterpolateLuma_Copy4x4_unsafe: PUSH {r4-r6,lr} AND r12,r0,#3 @@ -114,7 +113,6 @@ Copy4x4Align3: STR r8,[r2],r3 Copy4x4End: POP {r4-r6,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S index c8f5cda..8b2c678 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe - .func armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe: PUSH {r4-r6,lr} MOV lr,#4 @@ -57,10 +56,8 @@ LoopStart1: SUB r0,r7,#0x20 MOV r1,#8 POP {r4-r6,pc} - .endfunc .global armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe - .func armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe: PUSH {r4-r6,lr} LDR r6, =0xfe00fe0 @@ -116,7 +113,6 @@ LoopStart: SUB r0,r7,#0x18 MOV r1,#4 POP {r4-r6,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S index f5868c0..77aa927 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe - .func armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe: PUSH {r4-r12,lr} VLD1.8 {d0,d1},[r0],r1 @@ -173,7 +172,6 @@ armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe: VQMOVN.U16 d4,q2 VQMOVN.U16 d6,q3 POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S index 065995d..e5f7f1c 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe - .func armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe: PUSH {r4-r12,lr} VLD1.8 {d0,d1},[r0],r1 @@ -128,7 +127,6 @@ armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe: VQMOVN.U16 d4,q2 VQMOVN.U16 d6,q3 POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S index 1e2d16b..393d385 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe - .func armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe: PUSH {r4-r12,lr} VLD1.8 {d22,d23},[r0],r1 @@ -81,7 +80,6 @@ armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe: VQRSHRUN.S16 d26,q13,#5 VQRSHRUN.S16 d28,q14,#5 POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S index c7def2a..698e7b5 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe - .func armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe: PUSH {r4-r12,lr} VLD1.8 {d7},[r0],r1 @@ -67,7 +66,6 @@ armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe: VQRSHRUN.S16 d4,q2,#5 VQRSHRUN.S16 d6,q3,#5 POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S index 2f4293f..e469516 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S @@ -38,7 +38,6 @@ armVCM4P10_WidthBranchTableMVIsZero: .word WidthIs8MVIsZero-(P0+8) .global armVCM4P10_Interpolate_Chroma - .func armVCM4P10_Interpolate_Chroma armVCM4P10_Interpolate_Chroma: PUSH {r4-r12,lr} VPUSH {d8-d15} @@ -183,7 +182,6 @@ WidthIs2MVIsZero: MOV r0,#0 VPOP {d8-d15} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S index d4cedb5..e18bec7 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_TransformResidual4x4 - .func armVCM4P10_TransformResidual4x4 armVCM4P10_TransformResidual4x4: VPUSH {d8} VLD4.16 {d0,d1,d2,d3},[r1] @@ -61,7 +60,6 @@ armVCM4P10_TransformResidual4x4: VST1.16 {d0,d1,d2,d3},[r0] VPOP {d8} BX lr - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S index 1652dc6..b97efcb 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S @@ -24,9 +24,9 @@ .arm .fpu neon .text + .syntax unified .global armVCM4P10_UnpackBlock4x4 - .func armVCM4P10_UnpackBlock4x4 armVCM4P10_UnpackBlock4x4: PUSH {r4-r8,lr} LDR r2,[r0,#0] @@ -40,16 +40,15 @@ armVCM4P10_UnpackBlock4x4: STRD r4,r5,[r1,#0x18] unpackLoop: TST r3,#0x10 - LDRNESB r5,[r2,#1] - LDRNEB r4,[r2],#2 + LDRSBNE r5,[r2,#1] + LDRBNE r4,[r2],#2 AND r6,r7,r3,LSL #1 - LDREQSB r4,[r2],#1 + LDRSBEQ r4,[r2],#1 ORRNE r4,r4,r5,LSL #8 TST r3,#0x20 - LDREQB r3,[r2],#1 + LDRBEQ r3,[r2],#1 STRH r4,[r1,r6] BEQ unpackLoop STR r2,[r0,#0] POP {r4-r8,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S index 90b0947..6a99bde 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_DeblockLuma_I - .func omxVCM4P10_DeblockLuma_I omxVCM4P10_DeblockLuma_I: PUSH {r4-r9,lr} MOVS r6,r0 @@ -76,7 +75,6 @@ L0x64: BL omxVCM4P10_FilterDeblockingLuma_HorEdge_I ADD sp,sp,#0xc POP {r4-r9,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S index 4a74594..17c5d8b 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_DequantTransformResidualFromPairAndAdd - .func omxVCM4P10_DequantTransformResidualFromPairAndAdd omxVCM4P10_DequantTransformResidualFromPairAndAdd: PUSH {r4-r12,lr} VPUSH {d8-d9} @@ -131,7 +130,6 @@ L0x130: ADD sp,sp,#0x20 VPOP {d8-d9} POP {r4-r12,pc} - .endfunc .LarmVCM4P10_QPModuloTable: .word armVCM4P10_QPModuloTable-(P0+8) diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S index f20fb78..4a83516 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_FilterDeblockingChroma_HorEdge_I - .func omxVCM4P10_FilterDeblockingChroma_HorEdge_I omxVCM4P10_FilterDeblockingChroma_HorEdge_I: PUSH {r4-r10,lr} VPUSH {d8-d15} @@ -96,7 +95,6 @@ L0xe4: MOV r0,#0 VPOP {d8-d15} POP {r4-r10,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S index 003526e..fe10931 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I - .func omxVCM4P10_FilterDeblockingChroma_VerEdge_I omxVCM4P10_FilterDeblockingChroma_VerEdge_I: PUSH {r4-r12,lr} VPUSH {d8-d15} @@ -132,7 +131,6 @@ L0x170: MOV r0,#0 VPOP {d8-d15} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S index 7ddc42e..84ffad2 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I - .func omxVCM4P10_FilterDeblockingLuma_HorEdge_I omxVCM4P10_FilterDeblockingLuma_HorEdge_I: PUSH {r4-r12,lr} VPUSH {d8-d15} @@ -116,7 +115,6 @@ L0x130: MOV r0,#0 VPOP {d8-d15} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S index f71aceb..f2a3682 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I - .func omxVCM4P10_FilterDeblockingLuma_VerEdge_I omxVCM4P10_FilterDeblockingLuma_VerEdge_I: PUSH {r4-r12,lr} VPUSH {d8-d15} @@ -166,7 +165,6 @@ L0x1f0: MOV r0,#0 VPOP {d8-d15} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S index 000fbeb..314eabd 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S @@ -26,7 +26,6 @@ .text .global omxVCM4P10_InterpolateLuma - .func omxVCM4P10_InterpolateLuma omxVCM4P10_InterpolateLuma: PUSH {r4-r12,lr} VPUSH {d8-d15} @@ -332,7 +331,6 @@ L0x434: ADD sp,sp,#0x10 VPOP {d8-d15} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S index 4e2cff6..50d1350 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S @@ -36,7 +36,6 @@ armVCM4P10_MultiplierTableChroma8x8: .hword 1, 2, 3,4 .global omxVCM4P10_PredictIntraChroma_8x8 - .func omxVCM4P10_PredictIntraChroma_8x8 omxVCM4P10_PredictIntraChroma_8x8: PUSH {r4-r10,lr} VPUSH {d8-d15} @@ -226,7 +225,6 @@ L0x28c: MOV r0,#0 VPOP {d8-d15} POP {r4-r10,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S index c71c93b..0044636 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S @@ -42,7 +42,6 @@ armVCM4P10_MultiplierTable16x16: .global omxVCM4P10_PredictIntra_16x16 - .func omxVCM4P10_PredictIntra_16x16 omxVCM4P10_PredictIntra_16x16: PUSH {r4-r12,lr} VPUSH {d8-d15} @@ -246,7 +245,6 @@ L0x2d4: MOV r0,#0 VPOP {d8-d15} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S index cd5d356..d4c8485 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S @@ -35,7 +35,6 @@ armVCM4P10_pSwitchTable4x4: .word OMX_VC_4x4_HU-(P0+8) .global omxVCM4P10_PredictIntra_4x4 - .func omxVCM4P10_PredictIntra_4x4 omxVCM4P10_PredictIntra_4x4: PUSH {r4-r12,lr} VPUSH {d8-d12} @@ -270,6 +269,5 @@ L0x348: MOV r0,#0 VPOP {d8-d12} POP {r4-r12,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S index 5570892..74f5103 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S @@ -24,9 +24,9 @@ .arm .fpu neon .text + .syntax unified .global omxVCM4P10_TransformDequantChromaDCFromPair - .func omxVCM4P10_TransformDequantChromaDCFromPair omxVCM4P10_TransformDequantChromaDCFromPair: push {r4-r10, lr} ldr r9, [r0,#0] @@ -36,13 +36,13 @@ omxVCM4P10_TransformDequantChromaDCFromPair: ldrb r6, [r9], #1 unpackLoop: tst r6, #0x10 - ldrnesb r5, [r9, #1] - ldrneb r4, [r9], #2 + ldrsbne r5, [r9, #1] + ldrbne r4, [r9], #2 and r7, r8, r6, lsl #1 - ldreqsb r4, [r9], #1 + ldrsbeq r4, [r9], #1 orrne r4, r4, r5, lsl #8 tst r6, #0x20 - ldreqb r6, [r9], #1 + ldrbeq r6, [r9], #1 strh r4, [r1, r7] beq unpackLoop ldmia r1, {r3, r4} @@ -66,7 +66,6 @@ P1: add r6, pc vst1.16 {d2}, [r1] mov r0, #0 pop {r4-r10, pc} - .endfunc .LarmVCM4P10_QPDivTable: .word armVCM4P10_QPDivTable-(P0+8) diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S index 5b6eee0..a01030a 100644 --- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S @@ -26,7 +26,6 @@ .text .global armVCM4P10_InvTransformDequantLumaDC4x4 - .func armVCM4P10_InvTransformDequantLumaDC4x4 armVCM4P10_InvTransformDequantLumaDC4x4: PUSH {r4-r6,lr} VPUSH {d8-d13} @@ -73,7 +72,6 @@ P1: ADD r3, pc VST1.16 {d0,d1,d2,d3},[r0] VPOP {d8-d13} POP {r4-r6,pc} - .endfunc .LarmVCM4P10_QPDivTable: .word armVCM4P10_QPDivTable-(P0+8) @@ -81,7 +79,6 @@ P1: ADD r3, pc .word armVCM4P10_VMatrixQPModTable-(P1+8) .global omxVCM4P10_TransformDequantLumaDCFromPair -.func omxVCM4P10_TransformDequantLumaDCFromPair omxVCM4P10_TransformDequantLumaDCFromPair: PUSH {r4-r6,lr} MOV r4,r1 @@ -92,7 +89,6 @@ omxVCM4P10_TransformDequantLumaDCFromPair: BL armVCM4P10_InvTransformDequantLumaDC4x4 MOV r0,#0 POP {r4-r6,pc} - .endfunc .end diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S index f39f5c4..969a75c 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S @@ -31,11 +31,9 @@ .global \name .endif .type \name, %function - .func \name \name: .endm .macro endfunction - .endfunc .endm diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S index c8a940e..3c2752f 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S @@ -16,7 +16,7 @@ #include "asm_common.S" - preserve8 + PRESERVE8 .fpu neon .text @@ -29,7 +29,7 @@ /* -- NEON registers -- */ -#define qZero Q0.U8 +#define qZero Q0 /*------------------------------------------------------------------------------ @@ -47,17 +47,17 @@ function h264bsdClearMbLayer, export=1 - VMOV qZero, #0 + VMOV.I8 qZero, #0 ADD pTmp, pMbLayer, #16 MOV step, #32 SUBS size, size, #64 loop: - VST1 {qZero}, [pMbLayer], step + VST1.8 {qZero}, [pMbLayer], step SUBS size, size, #64 - VST1 {qZero}, [pTmp], step - VST1 {qZero}, [pMbLayer], step - VST1 {qZero}, [pTmp], step + VST1.8 {qZero}, [pTmp], step + VST1.8 {qZero}, [pMbLayer], step + VST1.8 {qZero}, [pTmp], step BCS loop BX lr diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S index 05253d0..b1c9f60 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S @@ -15,7 +15,7 @@ @ #include "asm_common.S" - preserve8 + PRESERVE8 .arm .text diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S index 6955b9a..6ed6227 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S @@ -16,7 +16,7 @@ #include "asm_common.S" - preserve8 + PRESERVE8 .fpu neon .text @@ -33,12 +33,12 @@ /* -- NEON registers -- */ -#define qTmp0 Q0.U8 -#define qTmp1 Q1.U8 -#define dTmp0 D0.U8 -#define dTmp1 D1.U8 -#define dTmp2 D2.U8 -#define dTmp3 D3.U8 +#define qTmp0 Q0 +#define qTmp1 Q1 +#define dTmp0 D0 +#define dTmp1 D1 +#define dTmp2 D2 +#define dTmp3 D3 /* void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center, @@ -74,40 +74,40 @@ switch_center: B case_8 case_8: - VLD1 {qTmp0, qTmp1}, [ref]! + VLD1.8 {qTmp0, qTmp1}, [ref]! SUB center, center, #32 - VST1 {qTmp0}, [fill]! - VST1 {qTmp1}, [fill]! + VST1.8 {qTmp0}, [fill]! + VST1.8 {qTmp1}, [fill]! B loop_center case_7: - VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + VLD1.8 {dTmp0,dTmp1,dTmp2}, [ref]! SUB center, center, #28 LDR tmp2, [ref], #4 - VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + VST1.8 {dTmp0,dTmp1,dTmp2}, [fill]! STR tmp2, [fill],#4 B loop_center case_6: - VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + VLD1.8 {dTmp0,dTmp1,dTmp2}, [ref]! SUB center, center, #24 - VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + VST1.8 {dTmp0,dTmp1,dTmp2}, [fill]! B loop_center case_5: - VLD1 {qTmp0}, [ref]! + VLD1.8 {qTmp0}, [ref]! SUB center, center, #20 LDR tmp2, [ref], #4 - VST1 {qTmp0}, [fill]! + VST1.8 {qTmp0}, [fill]! STR tmp2, [fill],#4 B loop_center case_4: - VLD1 {qTmp0}, [ref]! + VLD1.8 {qTmp0}, [ref]! SUB center, center, #16 - VST1 {qTmp0}, [fill]! + VST1.8 {qTmp0}, [fill]! B loop_center case_3: - VLD1 {dTmp0}, [ref]! + VLD1.8 {dTmp0}, [ref]! SUB center, center, #12 LDR tmp2, [ref], #4 - VST1 dTmp0, [fill]! + VST1.8 dTmp0, [fill]! STR tmp2, [fill],#4 B loop_center case_2: diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S index b3f3191..aa88471 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S @@ -16,7 +16,7 @@ #include "asm_common.S" - preserve8 + PRESERVE8 .arm .text diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S index 495d560..4093b92 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S @@ -16,8 +16,8 @@ #include "asm_common.S" - require8 - preserve8 + REQUIRE8 + PRESERVE8 .arm .fpu neon @@ -34,39 +34,39 @@ /* -- NEON registers -- */ -#define qRow0 Q0.U8 -#define qRow1 Q1.U8 -#define qRow2 Q2.U8 -#define qRow3 Q3.U8 -#define qRow4 Q4.U8 -#define qRow5 Q5.U8 -#define qRow6 Q6.U8 -#define qRow7 Q7.U8 -#define qRow8 Q8.U8 -#define qRow9 Q9.U8 -#define qRow10 Q10.U8 -#define qRow11 Q11.U8 -#define qRow12 Q12.U8 -#define qRow13 Q13.U8 -#define qRow14 Q14.U8 -#define qRow15 Q15.U8 - -#define dRow0 D0.U8 -#define dRow1 D1.U8 -#define dRow2 D2.U8 -#define dRow3 D3.U8 -#define dRow4 D4.U8 -#define dRow5 D5.U8 -#define dRow6 D6.U8 -#define dRow7 D7.U8 -#define dRow8 D8.U8 -#define dRow9 D9.U8 -#define dRow10 D10.U8 -#define dRow11 D11.U8 -#define dRow12 D12.U8 -#define dRow13 D13.U8 -#define dRow14 D14.U8 -#define dRow15 D15.U8 +#define qRow0 Q0 +#define qRow1 Q1 +#define qRow2 Q2 +#define qRow3 Q3 +#define qRow4 Q4 +#define qRow5 Q5 +#define qRow6 Q6 +#define qRow7 Q7 +#define qRow8 Q8 +#define qRow9 Q9 +#define qRow10 Q10 +#define qRow11 Q11 +#define qRow12 Q12 +#define qRow13 Q13 +#define qRow14 Q14 +#define qRow15 Q15 + +#define dRow0 D0 +#define dRow1 D1 +#define dRow2 D2 +#define dRow3 D3 +#define dRow4 D4 +#define dRow5 D5 +#define dRow6 D6 +#define dRow7 D7 +#define dRow8 D8 +#define dRow9 D9 +#define dRow10 D10 +#define dRow11 D11 +#define dRow12 D12 +#define dRow13 D13 +#define dRow14 D14 +#define dRow15 D15 /*------------------------------------------------------------------------------ @@ -99,59 +99,58 @@ function h264bsdWriteMacroblock, export=1 @ Write luma - VLD1 {qRow0, qRow1}, [data]! + VLD1.8 {qRow0, qRow1}, [data]! LSL width, width, #4 - VLD1 {qRow2, qRow3}, [data]! + VLD1.8 {qRow2, qRow3}, [data]! LSR cwidth, width, #1 - VST1 {qRow0}, [luma,:128], width - VLD1 {qRow4, qRow5}, [data]! - VST1 {qRow1}, [luma,:128], width - VLD1 {qRow6, qRow7}, [data]! - VST1 {qRow2}, [luma,:128], width - VLD1 {qRow8, qRow9}, [data]! - VST1 {qRow3}, [luma,:128], width - VLD1 {qRow10, qRow11}, [data]! - VST1 {qRow4}, [luma,:128], width - VLD1 {qRow12, qRow13}, [data]! - VST1 {qRow5}, [luma,:128], width - VLD1 {qRow14, qRow15}, [data]! - VST1 {qRow6}, [luma,:128], width - - VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 - VST1 {qRow7}, [luma,:128], width - VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 - VST1 {qRow8}, [luma,:128], width - VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 - VST1 {qRow9}, [luma,:128], width - VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 - VST1 {qRow10}, [luma,:128], width - VST1 {dRow0}, [cb,:64], cwidth - VST1 {dRow8}, [cr,:64], cwidth - VST1 {qRow11}, [luma,:128], width - VST1 {dRow1}, [cb,:64], cwidth - VST1 {dRow9}, [cr,:64], cwidth - VST1 {qRow12}, [luma,:128], width - VST1 {dRow2}, [cb,:64], cwidth - VST1 {dRow10}, [cr,:64], cwidth - VST1 {qRow13}, [luma,:128], width - VST1 {dRow3}, [cb,:64], cwidth - VST1 {dRow11}, [cr,:64], cwidth - VST1 {qRow14}, [luma,:128], width - VST1 {dRow4}, [cb,:64], cwidth - VST1 {dRow12}, [cr,:64], cwidth - VST1 {qRow15}, [luma] - VST1 {dRow5}, [cb,:64], cwidth - VST1 {dRow13}, [cr,:64], cwidth - VST1 {dRow6}, [cb,:64], cwidth - VST1 {dRow14}, [cr,:64], cwidth - VST1 {dRow7}, [cb,:64] - VST1 {dRow15}, [cr,:64] + VST1.8 {qRow0}, [luma,:128], width + VLD1.8 {qRow4, qRow5}, [data]! + VST1.8 {qRow1}, [luma,:128], width + VLD1.8 {qRow6, qRow7}, [data]! + VST1.8 {qRow2}, [luma,:128], width + VLD1.8 {qRow8, qRow9}, [data]! + VST1.8 {qRow3}, [luma,:128], width + VLD1.8 {qRow10, qRow11}, [data]! + VST1.8 {qRow4}, [luma,:128], width + VLD1.8 {qRow12, qRow13}, [data]! + VST1.8 {qRow5}, [luma,:128], width + VLD1.8 {qRow14, qRow15}, [data]! + VST1.8 {qRow6}, [luma,:128], width + + VLD1.8 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 + VST1.8 {qRow7}, [luma,:128], width + VLD1.8 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 + VST1.8 {qRow8}, [luma,:128], width + VLD1.8 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 + VST1.8 {qRow9}, [luma,:128], width + VLD1.8 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 + VST1.8 {qRow10}, [luma,:128], width + VST1.8 {dRow0}, [cb,:64], cwidth + VST1.8 {dRow8}, [cr,:64], cwidth + VST1.8 {qRow11}, [luma,:128], width + VST1.8 {dRow1}, [cb,:64], cwidth + VST1.8 {dRow9}, [cr,:64], cwidth + VST1.8 {qRow12}, [luma,:128], width + VST1.8 {dRow2}, [cb,:64], cwidth + VST1.8 {dRow10}, [cr,:64], cwidth + VST1.8 {qRow13}, [luma,:128], width + VST1.8 {dRow3}, [cb,:64], cwidth + VST1.8 {dRow11}, [cr,:64], cwidth + VST1.8 {qRow14}, [luma,:128], width + VST1.8 {dRow4}, [cb,:64], cwidth + VST1.8 {dRow12}, [cr,:64], cwidth + VST1.8 {qRow15}, [luma] + VST1.8 {dRow5}, [cb,:64], cwidth + VST1.8 {dRow13}, [cr,:64], cwidth + VST1.8 {dRow6}, [cb,:64], cwidth + VST1.8 {dRow14}, [cr,:64], cwidth + VST1.8 {dRow7}, [cb,:64] + VST1.8 {dRow15}, [cr,:64] VPOP {q4-q7} POP {r4-r6,pc} @ BX lr - .endfunc -- cgit v1.1