3 * Bluetooth low-complexity, subband codec (SBC) library
5 * Copyright (C) 2008-2010 Nokia Corporation
6 * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
7 * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
8 * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include "sbc_tables.h"
33 #include "sbc_primitives_armv6.h"
36 * ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
39 #ifdef SBC_BUILD_WITH_ARMV6_SUPPORT
41 static void __attribute__((naked
)) sbc_analyze_four_armv6()
43 /* r0 = in, r1 = out, r2 = consts */
45 "push {r1, r4-r7, lr}\n"
47 "ldrd r4, r5, [r0, #0]\n"
48 "ldrd r6, r7, [r2, #0]\n"
49 "ldrd r8, r9, [r0, #16]\n"
50 "ldrd r10, r11, [r2, #16]\n"
52 "smlad r3, r4, r6, r14\n"
53 "smlad r12, r5, r7, r14\n"
54 "ldrd r4, r5, [r0, #32]\n"
55 "ldrd r6, r7, [r2, #32]\n"
56 "smlad r3, r8, r10, r3\n"
57 "smlad r12, r9, r11, r12\n"
58 "ldrd r8, r9, [r0, #48]\n"
59 "ldrd r10, r11, [r2, #48]\n"
60 "smlad r3, r4, r6, r3\n"
61 "smlad r12, r5, r7, r12\n"
62 "ldrd r4, r5, [r0, #64]\n"
63 "ldrd r6, r7, [r2, #64]\n"
64 "smlad r3, r8, r10, r3\n"
65 "smlad r12, r9, r11, r12\n"
66 "ldrd r8, r9, [r0, #8]\n"
67 "ldrd r10, r11, [r2, #8]\n"
68 "smlad r3, r4, r6, r3\n" /* t1[0] is done */
69 "smlad r12, r5, r7, r12\n" /* t1[1] is done */
70 "ldrd r4, r5, [r0, #24]\n"
71 "ldrd r6, r7, [r2, #24]\n"
72 "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */
73 "smlad r12, r8, r10, r14\n"
74 "smlad r14, r9, r11, r14\n"
75 "ldrd r8, r9, [r0, #40]\n"
76 "ldrd r10, r11, [r2, #40]\n"
77 "smlad r12, r4, r6, r12\n"
78 "smlad r14, r5, r7, r14\n"
79 "ldrd r4, r5, [r0, #56]\n"
80 "ldrd r6, r7, [r2, #56]\n"
81 "smlad r12, r8, r10, r12\n"
82 "smlad r14, r9, r11, r14\n"
83 "ldrd r8, r9, [r0, #72]\n"
84 "ldrd r10, r11, [r2, #72]\n"
85 "smlad r12, r4, r6, r12\n"
86 "smlad r14, r5, r7, r14\n"
87 "ldrd r4, r5, [r2, #80]\n" /* start loading cos table */
88 "smlad r12, r8, r10, r12\n" /* t1[2] is done */
89 "smlad r14, r9, r11, r14\n" /* t1[3] is done */
90 "ldrd r6, r7, [r2, #88]\n"
91 "ldrd r8, r9, [r2, #96]\n"
92 "ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */
93 "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */
96 "smlad r4, r12, r8, r4\n"
97 "smlad r5, r12, r9, r5\n"
100 "smlad r6, r12, r10, r6\n"
101 "smlad r7, r12, r11, r7\n"
103 "stmia r1, {r4, r5, r6, r7}\n"
104 "pop {r1, r4-r7, pc}\n"
108 #define sbc_analyze_four(in, out, consts) \
109 ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \
110 sbc_analyze_four_armv6)((in), (out), (consts))
112 static void __attribute__((naked
)) sbc_analyze_eight_armv6()
114 /* r0 = in, r1 = out, r2 = consts */
116 "push {r1, r4-r7, lr}\n"
118 "ldrd r4, r5, [r0, #24]\n"
119 "ldrd r6, r7, [r2, #24]\n"
120 "ldrd r8, r9, [r0, #56]\n"
121 "ldrd r10, r11, [r2, #56]\n"
123 "smlad r3, r4, r6, r14\n"
124 "smlad r12, r5, r7, r14\n"
125 "ldrd r4, r5, [r0, #88]\n"
126 "ldrd r6, r7, [r2, #88]\n"
127 "smlad r3, r8, r10, r3\n"
128 "smlad r12, r9, r11, r12\n"
129 "ldrd r8, r9, [r0, #120]\n"
130 "ldrd r10, r11, [r2, #120]\n"
131 "smlad r3, r4, r6, r3\n"
132 "smlad r12, r5, r7, r12\n"
133 "ldrd r4, r5, [r0, #152]\n"
134 "ldrd r6, r7, [r2, #152]\n"
135 "smlad r3, r8, r10, r3\n"
136 "smlad r12, r9, r11, r12\n"
137 "ldrd r8, r9, [r0, #16]\n"
138 "ldrd r10, r11, [r2, #16]\n"
139 "smlad r3, r4, r6, r3\n" /* t1[6] is done */
140 "smlad r12, r5, r7, r12\n" /* t1[7] is done */
141 "ldrd r4, r5, [r0, #48]\n"
142 "ldrd r6, r7, [r2, #48]\n"
143 "pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */
144 "str r3, [sp, #-4]!\n" /* save to stack */
145 "smlad r3, r8, r10, r14\n"
146 "smlad r12, r9, r11, r14\n"
147 "ldrd r8, r9, [r0, #80]\n"
148 "ldrd r10, r11, [r2, #80]\n"
149 "smlad r3, r4, r6, r3\n"
150 "smlad r12, r5, r7, r12\n"
151 "ldrd r4, r5, [r0, #112]\n"
152 "ldrd r6, r7, [r2, #112]\n"
153 "smlad r3, r8, r10, r3\n"
154 "smlad r12, r9, r11, r12\n"
155 "ldrd r8, r9, [r0, #144]\n"
156 "ldrd r10, r11, [r2, #144]\n"
157 "smlad r3, r4, r6, r3\n"
158 "smlad r12, r5, r7, r12\n"
159 "ldrd r4, r5, [r0, #0]\n"
160 "ldrd r6, r7, [r2, #0]\n"
161 "smlad r3, r8, r10, r3\n" /* t1[4] is done */
162 "smlad r12, r9, r11, r12\n" /* t1[5] is done */
163 "ldrd r8, r9, [r0, #32]\n"
164 "ldrd r10, r11, [r2, #32]\n"
165 "pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */
166 "str r3, [sp, #-4]!\n" /* save to stack */
167 "smlad r3, r4, r6, r14\n"
168 "smlad r12, r5, r7, r14\n"
169 "ldrd r4, r5, [r0, #64]\n"
170 "ldrd r6, r7, [r2, #64]\n"
171 "smlad r3, r8, r10, r3\n"
172 "smlad r12, r9, r11, r12\n"
173 "ldrd r8, r9, [r0, #96]\n"
174 "ldrd r10, r11, [r2, #96]\n"
175 "smlad r3, r4, r6, r3\n"
176 "smlad r12, r5, r7, r12\n"
177 "ldrd r4, r5, [r0, #128]\n"
178 "ldrd r6, r7, [r2, #128]\n"
179 "smlad r3, r8, r10, r3\n"
180 "smlad r12, r9, r11, r12\n"
181 "ldrd r8, r9, [r0, #8]\n"
182 "ldrd r10, r11, [r2, #8]\n"
183 "smlad r3, r4, r6, r3\n" /* t1[0] is done */
184 "smlad r12, r5, r7, r12\n" /* t1[1] is done */
185 "ldrd r4, r5, [r0, #40]\n"
186 "ldrd r6, r7, [r2, #40]\n"
187 "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */
188 "smlad r12, r8, r10, r14\n"
189 "smlad r14, r9, r11, r14\n"
190 "ldrd r8, r9, [r0, #72]\n"
191 "ldrd r10, r11, [r2, #72]\n"
192 "smlad r12, r4, r6, r12\n"
193 "smlad r14, r5, r7, r14\n"
194 "ldrd r4, r5, [r0, #104]\n"
195 "ldrd r6, r7, [r2, #104]\n"
196 "smlad r12, r8, r10, r12\n"
197 "smlad r14, r9, r11, r14\n"
198 "ldrd r8, r9, [r0, #136]\n"
199 "ldrd r10, r11, [r2, #136]!\n"
200 "smlad r12, r4, r6, r12\n"
201 "smlad r14, r5, r7, r14\n"
202 "ldrd r4, r5, [r2, #(160 - 136 + 0)]\n"
203 "smlad r12, r8, r10, r12\n" /* t1[2] is done */
204 "smlad r14, r9, r11, r14\n" /* t1[3] is done */
205 "ldrd r6, r7, [r2, #(160 - 136 + 8)]\n"
208 "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */
211 "pop {r0, r14}\n" /* t2[4:5], t2[6:7] */
212 "ldrd r8, r9, [r2, #(160 - 136 + 32)]\n"
215 "ldrd r10, r11, [r2, #(160 - 136 + 40)]\n"
216 "smlad r4, r12, r8, r4\n"
217 "smlad r5, r12, r9, r5\n"
218 "ldrd r8, r9, [r2, #(160 - 136 + 64)]\n"
219 "smlad r6, r12, r10, r6\n"
220 "smlad r7, r12, r11, r7\n"
221 "ldrd r10, r11, [r2, #(160 - 136 + 72)]\n"
222 "smlad r4, r0, r8, r4\n"
223 "smlad r5, r0, r9, r5\n"
224 "ldrd r8, r9, [r2, #(160 - 136 + 96)]\n"
225 "smlad r6, r0, r10, r6\n"
226 "smlad r7, r0, r11, r7\n"
227 "ldrd r10, r11, [r2, #(160 - 136 + 104)]\n"
228 "smlad r4, r14, r8, r4\n"
229 "smlad r5, r14, r9, r5\n"
230 "ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n"
231 "smlad r6, r14, r10, r6\n"
232 "smlad r7, r14, r11, r7\n"
233 "ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n"
234 "stmia r1!, {r4, r5}\n"
237 "ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n"
238 "stmia r1!, {r6, r7}\n"
239 "smuad r6, r3, r10\n"
240 "smuad r7, r3, r11\n"
241 "ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n"
242 "smlad r4, r12, r8, r4\n"
243 "smlad r5, r12, r9, r5\n"
244 "ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n"
245 "smlad r6, r12, r10, r6\n"
246 "smlad r7, r12, r11, r7\n"
247 "ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n"
248 "smlad r4, r0, r8, r4\n"
249 "smlad r5, r0, r9, r5\n"
250 "ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n"
251 "smlad r6, r0, r10, r6\n"
252 "smlad r7, r0, r11, r7\n"
253 "ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n"
254 "smlad r4, r14, r8, r4\n"
255 "smlad r5, r14, r9, r5\n"
256 "smlad r6, r14, r10, r6\n"
257 "smlad r7, r14, r11, r7\n"
259 "stmia r1!, {r4, r5, r6, r7}\n"
260 "pop {r1, r4-r7, pc}\n"
264 #define sbc_analyze_eight(in, out, consts) \
265 ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \
266 sbc_analyze_eight_armv6)((in), (out), (consts))
268 static void sbc_analyze_4b_4s_armv6(int16_t *x
, int32_t *out
, int out_stride
)
271 sbc_analyze_four(x
+ 12, out
, analysis_consts_fixed4_simd_odd
);
273 sbc_analyze_four(x
+ 8, out
, analysis_consts_fixed4_simd_even
);
275 sbc_analyze_four(x
+ 4, out
, analysis_consts_fixed4_simd_odd
);
277 sbc_analyze_four(x
+ 0, out
, analysis_consts_fixed4_simd_even
);
280 static void sbc_analyze_4b_8s_armv6(int16_t *x
, int32_t *out
, int out_stride
)
283 sbc_analyze_eight(x
+ 24, out
, analysis_consts_fixed8_simd_odd
);
285 sbc_analyze_eight(x
+ 16, out
, analysis_consts_fixed8_simd_even
);
287 sbc_analyze_eight(x
+ 8, out
, analysis_consts_fixed8_simd_odd
);
289 sbc_analyze_eight(x
+ 0, out
, analysis_consts_fixed8_simd_even
);
292 void sbc_init_primitives_armv6(struct sbc_encoder_state
*state
)
294 state
->sbc_analyze_4b_4s
= sbc_analyze_4b_4s_armv6
;
295 state
->sbc_analyze_4b_8s
= sbc_analyze_4b_8s_armv6
;
296 state
->implementation_info
= "ARMv6 SIMD";