diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc')
6 files changed, 109 insertions, 112 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S index f39f5c4..969a75c 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S @@ -31,11 +31,9 @@ .global \name .endif .type \name, %function - .func \name \name: .endm .macro endfunction - .endfunc .endm diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S index c8a940e..3c2752f 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S @@ -16,7 +16,7 @@ #include "asm_common.S" - preserve8 + PRESERVE8 .fpu neon .text @@ -29,7 +29,7 @@ /* -- NEON registers -- */ -#define qZero Q0.U8 +#define qZero Q0 /*------------------------------------------------------------------------------ @@ -47,17 +47,17 @@ function h264bsdClearMbLayer, export=1 - VMOV qZero, #0 + VMOV.I8 qZero, #0 ADD pTmp, pMbLayer, #16 MOV step, #32 SUBS size, size, #64 loop: - VST1 {qZero}, [pMbLayer], step + VST1.8 {qZero}, [pMbLayer], step SUBS size, size, #64 - VST1 {qZero}, [pTmp], step - VST1 {qZero}, [pMbLayer], step - VST1 {qZero}, [pTmp], step + VST1.8 {qZero}, [pTmp], step + VST1.8 {qZero}, [pMbLayer], step + VST1.8 {qZero}, [pTmp], step BCS loop BX lr diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S index 05253d0..b1c9f60 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S @@ -15,7 +15,7 @@ @ #include "asm_common.S" - preserve8 + PRESERVE8 .arm .text diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S index 6955b9a..6ed6227 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S @@ -16,7 +16,7 @@ #include "asm_common.S" - preserve8 + PRESERVE8 .fpu neon .text @@ -33,12 +33,12 @@ /* -- NEON registers -- */ -#define qTmp0 Q0.U8 -#define qTmp1 Q1.U8 -#define dTmp0 D0.U8 -#define dTmp1 D1.U8 -#define dTmp2 D2.U8 -#define dTmp3 D3.U8 +#define qTmp0 Q0 +#define qTmp1 Q1 +#define dTmp0 D0 +#define dTmp1 D1 +#define dTmp2 D2 +#define dTmp3 D3 /* void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center, @@ -74,40 +74,40 @@ switch_center: B case_8 case_8: - VLD1 {qTmp0, qTmp1}, [ref]! + VLD1.8 {qTmp0, qTmp1}, [ref]! SUB center, center, #32 - VST1 {qTmp0}, [fill]! - VST1 {qTmp1}, [fill]! + VST1.8 {qTmp0}, [fill]! + VST1.8 {qTmp1}, [fill]! B loop_center case_7: - VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + VLD1.8 {dTmp0,dTmp1,dTmp2}, [ref]! SUB center, center, #28 LDR tmp2, [ref], #4 - VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + VST1.8 {dTmp0,dTmp1,dTmp2}, [fill]! STR tmp2, [fill],#4 B loop_center case_6: - VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + VLD1.8 {dTmp0,dTmp1,dTmp2}, [ref]! SUB center, center, #24 - VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + VST1.8 {dTmp0,dTmp1,dTmp2}, [fill]! B loop_center case_5: - VLD1 {qTmp0}, [ref]! + VLD1.8 {qTmp0}, [ref]! SUB center, center, #20 LDR tmp2, [ref], #4 - VST1 {qTmp0}, [fill]! + VST1.8 {qTmp0}, [fill]! STR tmp2, [fill],#4 B loop_center case_4: - VLD1 {qTmp0}, [ref]! + VLD1.8 {qTmp0}, [ref]! SUB center, center, #16 - VST1 {qTmp0}, [fill]! + VST1.8 {qTmp0}, [fill]! B loop_center case_3: - VLD1 {dTmp0}, [ref]! + VLD1.8 {dTmp0}, [ref]! SUB center, center, #12 LDR tmp2, [ref], #4 - VST1 dTmp0, [fill]! + VST1.8 dTmp0, [fill]! STR tmp2, [fill],#4 B loop_center case_2: diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S index b3f3191..aa88471 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S @@ -16,7 +16,7 @@ #include "asm_common.S" - preserve8 + PRESERVE8 .arm .text diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S index 495d560..4093b92 100644 --- a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S @@ -16,8 +16,8 @@ #include "asm_common.S" - require8 - preserve8 + REQUIRE8 + PRESERVE8 .arm .fpu neon @@ -34,39 +34,39 @@ /* -- NEON registers -- */ -#define qRow0 Q0.U8 -#define qRow1 Q1.U8 -#define qRow2 Q2.U8 -#define qRow3 Q3.U8 -#define qRow4 Q4.U8 -#define qRow5 Q5.U8 -#define qRow6 Q6.U8 -#define qRow7 Q7.U8 -#define qRow8 Q8.U8 -#define qRow9 Q9.U8 -#define qRow10 Q10.U8 -#define qRow11 Q11.U8 -#define qRow12 Q12.U8 -#define qRow13 Q13.U8 -#define qRow14 Q14.U8 -#define qRow15 Q15.U8 - -#define dRow0 D0.U8 -#define dRow1 D1.U8 -#define dRow2 D2.U8 -#define dRow3 D3.U8 -#define dRow4 D4.U8 -#define dRow5 D5.U8 -#define dRow6 D6.U8 -#define dRow7 D7.U8 -#define dRow8 D8.U8 -#define dRow9 D9.U8 -#define dRow10 D10.U8 -#define dRow11 D11.U8 -#define dRow12 D12.U8 -#define dRow13 D13.U8 -#define dRow14 D14.U8 -#define dRow15 D15.U8 +#define qRow0 Q0 +#define qRow1 Q1 +#define qRow2 Q2 +#define qRow3 Q3 +#define qRow4 Q4 +#define qRow5 Q5 +#define qRow6 Q6 +#define qRow7 Q7 +#define qRow8 Q8 +#define qRow9 Q9 +#define qRow10 Q10 +#define qRow11 Q11 +#define qRow12 Q12 +#define qRow13 Q13 +#define qRow14 Q14 +#define qRow15 Q15 + +#define dRow0 D0 +#define dRow1 D1 +#define dRow2 D2 +#define dRow3 D3 +#define dRow4 D4 +#define dRow5 D5 +#define dRow6 D6 +#define dRow7 D7 +#define dRow8 D8 +#define dRow9 D9 +#define dRow10 D10 +#define dRow11 D11 +#define dRow12 D12 +#define dRow13 D13 +#define dRow14 D14 +#define dRow15 D15 /*------------------------------------------------------------------------------ @@ -99,59 +99,58 @@ function h264bsdWriteMacroblock, export=1 @ Write luma - VLD1 {qRow0, qRow1}, [data]! + VLD1.8 {qRow0, qRow1}, [data]! LSL width, width, #4 - VLD1 {qRow2, qRow3}, [data]! + VLD1.8 {qRow2, qRow3}, [data]! LSR cwidth, width, #1 - VST1 {qRow0}, [luma,:128], width - VLD1 {qRow4, qRow5}, [data]! - VST1 {qRow1}, [luma,:128], width - VLD1 {qRow6, qRow7}, [data]! - VST1 {qRow2}, [luma,:128], width - VLD1 {qRow8, qRow9}, [data]! - VST1 {qRow3}, [luma,:128], width - VLD1 {qRow10, qRow11}, [data]! - VST1 {qRow4}, [luma,:128], width - VLD1 {qRow12, qRow13}, [data]! - VST1 {qRow5}, [luma,:128], width - VLD1 {qRow14, qRow15}, [data]! - VST1 {qRow6}, [luma,:128], width - - VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 - VST1 {qRow7}, [luma,:128], width - VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 - VST1 {qRow8}, [luma,:128], width - VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 - VST1 {qRow9}, [luma,:128], width - VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 - VST1 {qRow10}, [luma,:128], width - VST1 {dRow0}, [cb,:64], cwidth - VST1 {dRow8}, [cr,:64], cwidth - VST1 {qRow11}, [luma,:128], width - VST1 {dRow1}, [cb,:64], cwidth - VST1 {dRow9}, [cr,:64], cwidth - VST1 {qRow12}, [luma,:128], width - VST1 {dRow2}, [cb,:64], cwidth - VST1 {dRow10}, [cr,:64], cwidth - VST1 {qRow13}, [luma,:128], width - VST1 {dRow3}, [cb,:64], cwidth - VST1 {dRow11}, [cr,:64], cwidth - VST1 {qRow14}, [luma,:128], width - VST1 {dRow4}, [cb,:64], cwidth - VST1 {dRow12}, [cr,:64], cwidth - VST1 {qRow15}, [luma] - VST1 {dRow5}, [cb,:64], cwidth - VST1 {dRow13}, [cr,:64], cwidth - VST1 {dRow6}, [cb,:64], cwidth - VST1 {dRow14}, [cr,:64], cwidth - VST1 {dRow7}, [cb,:64] - VST1 {dRow15}, [cr,:64] + VST1.8 {qRow0}, [luma,:128], width + VLD1.8 {qRow4, qRow5}, [data]! + VST1.8 {qRow1}, [luma,:128], width + VLD1.8 {qRow6, qRow7}, [data]! + VST1.8 {qRow2}, [luma,:128], width + VLD1.8 {qRow8, qRow9}, [data]! + VST1.8 {qRow3}, [luma,:128], width + VLD1.8 {qRow10, qRow11}, [data]! + VST1.8 {qRow4}, [luma,:128], width + VLD1.8 {qRow12, qRow13}, [data]! + VST1.8 {qRow5}, [luma,:128], width + VLD1.8 {qRow14, qRow15}, [data]! + VST1.8 {qRow6}, [luma,:128], width + + VLD1.8 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 + VST1.8 {qRow7}, [luma,:128], width + VLD1.8 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 + VST1.8 {qRow8}, [luma,:128], width + VLD1.8 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 + VST1.8 {qRow9}, [luma,:128], width + VLD1.8 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 + VST1.8 {qRow10}, [luma,:128], width + VST1.8 {dRow0}, [cb,:64], cwidth + VST1.8 {dRow8}, [cr,:64], cwidth + VST1.8 {qRow11}, [luma,:128], width + VST1.8 {dRow1}, [cb,:64], cwidth + VST1.8 {dRow9}, [cr,:64], cwidth + VST1.8 {qRow12}, [luma,:128], width + VST1.8 {dRow2}, [cb,:64], cwidth + VST1.8 {dRow10}, [cr,:64], cwidth + VST1.8 {qRow13}, [luma,:128], width + VST1.8 {dRow3}, [cb,:64], cwidth + VST1.8 {dRow11}, [cr,:64], cwidth + VST1.8 {qRow14}, [luma,:128], width + VST1.8 {dRow4}, [cb,:64], cwidth + VST1.8 {dRow12}, [cr,:64], cwidth + VST1.8 {qRow15}, [luma] + VST1.8 {dRow5}, [cb,:64], cwidth + VST1.8 {dRow13}, [cr,:64], cwidth + VST1.8 {dRow6}, [cb,:64], cwidth + VST1.8 {dRow14}, [cr,:64], cwidth + VST1.8 {dRow7}, [cb,:64] + VST1.8 {dRow15}, [cr,:64] VPOP {q4-q7} POP {r4-r6,pc} @ BX lr - .endfunc |