1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
/*
* Copyright (C) 2007-2008 ARM Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/*
*
*/
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.arm
.fpu neon
.text
.global omxVCM4P10_FilterDeblockingChroma_VerEdge_I
.func omxVCM4P10_FilterDeblockingChroma_VerEdge_I
omxVCM4P10_FilterDeblockingChroma_VerEdge_I:
PUSH {r4-r12,lr}
VPUSH {d8-d15}
VLD1.8 {d0[]},[r2]!
SUB r0,r0,#4
VLD1.8 {d2[]},[r3]!
LDR r4,[sp,#0x6c]
LDR r5,[sp,#0x68]
LDR r8, =0x4040404
LDR r9, =0x3030303
VMOV.I8 d14,#0
VMOV.I8 d15,#0x1
VMOV.I16 d1,#0x4
MOV r7,#0x40000000
L0x34:
LDR r6,[r4],#8
ADD r10,r0,r1
ADD lr,r1,r1
VLD1.8 {d7},[r0],lr
VLD1.8 {d8},[r10],lr
VLD1.8 {d5},[r0],lr
VLD1.8 {d10},[r10],lr
VLD1.8 {d6},[r0],lr
VLD1.8 {d9},[r10],lr
VLD1.8 {d4},[r0],lr
VLD1.8 {d11},[r10],lr
VZIP.8 d7,d8
VZIP.8 d5,d10
VZIP.8 d6,d9
VZIP.8 d4,d11
VZIP.16 d7,d5
VZIP.16 d8,d10
VZIP.16 d6,d4
VZIP.16 d9,d11
VTRN.32 d7,d6
VTRN.32 d5,d4
VTRN.32 d10,d11
VTRN.32 d8,d9
CMP r6,#0
VABD.U8 d19,d6,d4
VABD.U8 d13,d4,d8
BEQ L0x170
VABD.U8 d12,d5,d4
VABD.U8 d18,d9,d8
VMOV.32 d26[0],r6
VCGT.U8 d16,d0,d13
VMAX.U8 d12,d18,d12
VMOVL.U8 q13,d26
VABD.U8 d17,d10,d8
VCGT.S16 d27,d26,#0
VCGT.U8 d12,d2,d12
VCGT.U8 d19,d2,d19
VAND d16,d16,d27
TST r6,r9
VCGT.U8 d17,d2,d17
VAND d16,d16,d12
VAND d12,d16,d17
VAND d17,d16,d19
BLNE armVCM4P10_DeblockingChromabSLT4_unsafe
TST r6,r8
SUB r0,r0,r1,LSL #3
VTST.16 d26,d26,d1
BLNE armVCM4P10_DeblockingChromabSGE4_unsafe
VBIT d29,d13,d26
VBIT d24,d31,d26
ADD r10,r0,#3
VBIF d29,d4,d16
ADD r12,r10,r1
ADD lr,r1,r1
VBIF d24,d8,d16
ADDS r7,r7,r7
VST1.8 {d29[0]},[r10],lr
VST1.8 {d29[1]},[r12],lr
VST1.8 {d29[2]},[r10],lr
VST1.8 {d29[3]},[r12],lr
VST1.8 {d29[4]},[r10],lr
VST1.8 {d29[5]},[r12],lr
VST1.8 {d29[6]},[r10],lr
VST1.8 {d29[7]},[r12],lr
ADD r12,r0,#4
ADD r10,r12,r1
VST1.8 {d24[0]},[r12],lr
VST1.8 {d24[1]},[r10],lr
VST1.8 {d24[2]},[r12],lr
VST1.8 {d24[3]},[r10],lr
VST1.8 {d24[4]},[r12],lr
VST1.8 {d24[5]},[r10],lr
VST1.8 {d24[6]},[r12],lr
VST1.8 {d24[7]},[r10],lr
ADD r0,r0,#4
BNE L0x34
MOV r0,#0
VPOP {d8-d15}
POP {r4-r12,pc}
L0x170:
VLD1.8 {d0[]},[r2]
ADD r0,r0,#4
SUB r0,r0,r1,LSL #3
ADDS r7,r7,r7
VLD1.8 {d2[]},[r3]
ADD r5,r5,#4
BNE L0x34
MOV r0,#0
VPOP {d8-d15}
POP {r4-r12,pc}
.endfunc
.end
|