| 881 |
theseven |
1 |
/***************************************************************************
|
|
|
2 |
* __________ __ ___.
|
|
|
3 |
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
|
4 |
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
|
5 |
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
|
6 |
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
|
7 |
* \/ \/ \/ \/ \/
|
|
|
8 |
* $Id$
|
|
|
9 |
*
|
|
|
10 |
* Copyright (C) 2008 by Jens Arnold
|
|
|
11 |
* Copyright (C) 2009 by Andrew Mahone
|
|
|
12 |
*
|
|
|
13 |
* Optimised replacements for libgcc functions
|
|
|
14 |
*
|
|
|
15 |
* Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
|
|
|
16 |
* Developer's Guide
|
|
|
17 |
* Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
|
|
|
18 |
* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
|
|
|
19 |
* Free Software Foundation, Inc.
|
|
|
20 |
*
|
|
|
21 |
* This program is free software; you can redistribute it and/or
|
|
|
22 |
* modify it under the terms of the GNU General Public License
|
|
|
23 |
* as published by the Free Software Foundation; either version 2
|
|
|
24 |
* of the License, or (at your option) any later version.
|
|
|
25 |
*
|
|
|
26 |
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
|
27 |
* KIND, either express or implied.
|
|
|
28 |
*
|
|
|
29 |
****************************************************************************/
|
|
|
30 |
|
|
|
31 |
#define ASM_FILE
|
|
|
32 |
#include "global.h"
|
|
|
33 |
|
|
|
34 |
.syntax unified
|
|
|
35 |
|
|
|
36 |
.macro ARM_SDIV32_PRE numerator, divisor, sign
|
|
|
37 |
/* sign[31] = divisor sign */
|
|
|
38 |
ands \sign, \divisor, #1<<31
|
|
|
39 |
rsbeq \divisor, \divisor, #0
|
|
|
40 |
/* sign[31] = result sign, sign[0:30], C = numerator sign */
|
|
|
41 |
eors \sign, \sign, \numerator, asr #32
|
|
|
42 |
rsbcs \numerator, \numerator, #0
|
|
|
43 |
.endm
|
|
|
44 |
|
|
|
45 |
.macro ARM_SDIV32_POST quotient, remainder, sign
|
|
|
46 |
movs \sign, \sign, lsl #1
|
|
|
47 |
.ifnc "", "\quotient"
|
|
|
48 |
rsbcs \quotient, \quotient, #0
|
|
|
49 |
.endif
|
|
|
50 |
.ifnc "", "\remainder"
|
|
|
51 |
rsbmi \remainder, \remainder, #0
|
|
|
52 |
.endif
|
|
|
53 |
.endm
|
|
|
54 |
|
|
|
55 |
#if CPU_ARM_ARCH < 5
|
|
|
56 |
.macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return
|
|
|
57 |
.ifnc "", "\div0label"
|
|
|
58 |
rsbs \divisor, \divisor, #0
|
|
|
59 |
beq \div0label
|
|
|
60 |
.else
|
|
|
61 |
rsb \divisor, \divisor, #0
|
|
|
62 |
.endif
|
|
|
63 |
/* This SWAR divider requires a numerator less than 1<<31, because it must
|
|
|
64 |
be able to shift the remainder left at each step without shifting out
|
|
|
65 |
topmost bit. Since a shift might be needed for the aligned remainder to
|
|
|
66 |
exceed the divisor, the topmost bit must be unset at the start to avoid
|
|
|
67 |
this overflow case. The original numerator is saved so that the result
|
|
|
68 |
can be corrected after the reduced division completes. */
|
|
|
69 |
cmn \numerator, \divisor
|
|
|
70 |
.ifc "", "\quotient"
|
|
|
71 |
.ifc "\numerator", "\remainder"
|
|
|
72 |
.if \return
|
|
|
73 |
bxcc lr
|
|
|
74 |
.else
|
|
|
75 |
b 99f
|
|
|
76 |
.endif
|
|
|
77 |
.else
|
|
|
78 |
bcc 20f
|
|
|
79 |
.endif
|
|
|
80 |
.else
|
|
|
81 |
bcc 20f
|
|
|
82 |
.endif
|
|
|
83 |
movs \tmp, \numerator
|
|
|
84 |
movmi \numerator, \numerator, lsr #1
|
|
|
85 |
mov \bits, #30
|
|
|
86 |
.set shift, 16
|
|
|
87 |
.rept 5
|
|
|
88 |
cmn \divisor, \numerator, lsr #shift
|
|
|
89 |
subcs \bits, \bits, #shift
|
|
|
90 |
movcs \divisor, \divisor, lsl #shift
|
|
|
91 |
.set shift, shift >> 1
|
|
|
92 |
.endr
|
|
|
93 |
adds \numerator, \numerator, \divisor
|
|
|
94 |
subcc \numerator, \numerator, \divisor
|
|
|
95 |
add pc, pc, \bits, lsl #3
|
|
|
96 |
nop
|
|
|
97 |
.rept 30
|
|
|
98 |
adcs \numerator, \divisor, \numerator, lsl #1
|
|
|
99 |
subcc \numerator, \numerator, \divisor
|
|
|
100 |
.endr
|
|
|
101 |
adc \numerator, \numerator, \numerator
|
|
|
102 |
movs \tmp, \tmp, asr #1
|
|
|
103 |
rsb \bits, \bits, #31
|
|
|
104 |
bmi 10f
|
|
|
105 |
.ifc "", "\quotient"
|
|
|
106 |
mov \remainder, \numerator, lsr \bits
|
|
|
107 |
.else
|
|
|
108 |
.ifc "", "\remainder"
|
|
|
109 |
mov \divisor, \numerator, lsr \bits
|
|
|
110 |
eor \quotient, \numerator, \divisor, lsl \bits
|
|
|
111 |
.else
|
|
|
112 |
mov \remainder, \numerator, lsr \bits
|
|
|
113 |
eor \quotient, \numerator, \remainder, lsl \bits
|
|
|
114 |
.endif
|
|
|
115 |
.endif
|
|
|
116 |
.ifne \return
|
|
|
117 |
bx lr
|
|
|
118 |
.else
|
|
|
119 |
b 99f
|
|
|
120 |
.endif
|
|
|
121 |
10:
|
|
|
122 |
mov \tmp, \numerator, lsr \bits
|
|
|
123 |
eor \numerator, \numerator, \tmp, lsl \bits
|
|
|
124 |
sub \bits, \bits, #1
|
|
|
125 |
adc \tmp, \tmp, \tmp
|
|
|
126 |
adds \tmp, \tmp, \divisor, asr \bits
|
|
|
127 |
.ifnc "", "\quotient"
|
|
|
128 |
adc \quotient, \numerator, \numerator
|
|
|
129 |
.endif
|
|
|
130 |
.ifnc "", "\remainder"
|
|
|
131 |
subcc \remainder, \tmp, \divisor, asr \bits
|
|
|
132 |
movcs \remainder, \tmp
|
|
|
133 |
.endif
|
|
|
134 |
.ifne \return
|
|
|
135 |
bx lr
|
|
|
136 |
.else
|
|
|
137 |
b 99f
|
|
|
138 |
.endif
|
|
|
139 |
20:
|
|
|
140 |
.ifnc "", "\remainder"
|
|
|
141 |
.ifnc "\remainder", "\numerator"
|
|
|
142 |
mov \remainder, \numerator
|
|
|
143 |
.endif
|
|
|
144 |
.endif
|
|
|
145 |
.ifnc "", "\quotient"
|
|
|
146 |
mov \quotient, #0
|
|
|
147 |
.endif
|
|
|
148 |
.ifne \return
|
|
|
149 |
bx lr
|
|
|
150 |
.else
|
|
|
151 |
99:
|
|
|
152 |
.endif
|
|
|
153 |
.endm
|
|
|
154 |
|
|
|
155 |
.macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return
|
|
|
156 |
/* When this is wrapped for signed division, the wrapper code will handle
|
|
|
157 |
inverting the divisor, and also the zero divisor test. */
|
|
|
158 |
ARM_SDIV32_PRE \numerator, \divisor, \sign
|
|
|
159 |
.ifnc "", "\div0label"
|
|
|
160 |
tst \divisor, \divisor
|
|
|
161 |
beq \div0label
|
|
|
162 |
.endif
|
|
|
163 |
/* This SWAR divider requires a numerator less than 1<<31, because it must
|
|
|
164 |
be able to shift the remainder left at each step without shifting out
|
|
|
165 |
topmost bit. With signed inputs, whose absolute value may not exceed
|
|
|
166 |
1<<31,this may be accomplished simply by subtracting the divisor before
|
|
|
167 |
beginning division, and adding 1 to the quotient. */
|
|
|
168 |
adds \numerator, \numerator, \divisor
|
|
|
169 |
bcc 20f
|
|
|
170 |
mov \bits, #30
|
|
|
171 |
.set shift, 16
|
|
|
172 |
.rept 5
|
|
|
173 |
cmn \divisor, \numerator, lsr #shift
|
|
|
174 |
subcs \bits, \bits, #shift
|
|
|
175 |
movcs \divisor, \divisor, lsl #shift
|
|
|
176 |
.set shift, shift >> 1
|
|
|
177 |
.endr
|
|
|
178 |
adds \numerator, \numerator, \divisor
|
|
|
179 |
subcc \numerator, \numerator, \divisor
|
|
|
180 |
add pc, pc, \bits, lsl #3
|
|
|
181 |
nop
|
|
|
182 |
.rept 30
|
|
|
183 |
adcs \numerator, \divisor, \numerator, lsl #1
|
|
|
184 |
subcc \numerator, \numerator, \divisor
|
|
|
185 |
.endr
|
|
|
186 |
rsb \bits, \bits, #31
|
|
|
187 |
adc \numerator, \numerator, \numerator
|
|
|
188 |
.ifc "", "\quotient"
|
|
|
189 |
mov \remainder, \numerator, lsr \bits
|
|
|
190 |
.else
|
|
|
191 |
.ifc "", "\remainder"
|
|
|
192 |
mov \divisor, \numerator, lsr \bits
|
|
|
193 |
add \numerator, \numerator, #1
|
|
|
194 |
sub \quotient, \numerator, \divisor, lsl \bits
|
|
|
195 |
.else
|
|
|
196 |
mov \remainder, \numerator, lsr \bits
|
|
|
197 |
add \numerator, \numerator, #1
|
|
|
198 |
sub \quotient, \numerator, \remainder, lsl \bits
|
|
|
199 |
.endif
|
|
|
200 |
.endif
|
|
|
201 |
.ifne \return
|
|
|
202 |
ARM_SDIV32_POST \quotient, \remainder, \sign
|
|
|
203 |
bx lr
|
|
|
204 |
.else
|
|
|
205 |
b 99f
|
|
|
206 |
.endif
|
|
|
207 |
20:
|
|
|
208 |
.ifnc "", "\remainder"
|
|
|
209 |
sub \remainder, \numerator, \divisor
|
|
|
210 |
.endif
|
|
|
211 |
.ifnc "", "\quotient"
|
|
|
212 |
mov \quotient, #0
|
|
|
213 |
.endif
|
|
|
214 |
.ifne \return
|
|
|
215 |
ARM_SDIV32_POST "", \remainder, \sign
|
|
|
216 |
bx lr
|
|
|
217 |
.else
|
|
|
218 |
99:
|
|
|
219 |
ARM_SDIV32_POST \quotient, \remainder, \sign
|
|
|
220 |
.endif
|
|
|
221 |
.endm
|
|
|
222 |
|
|
|
223 |
#else
|
|
|
224 |
.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return
|
|
|
225 |
cmp \numerator, \divisor
|
|
|
226 |
clz \bits, \divisor
|
|
|
227 |
bcc 30f
|
|
|
228 |
mov \inv, \divisor, lsl \bits
|
|
|
229 |
add \neg, pc, \inv, lsr #25
|
|
|
230 |
/* Test whether divisor is 2^N */
|
|
|
231 |
cmp \inv, #1<<31
|
|
|
232 |
/* Load approximate reciprocal */
|
|
|
233 |
ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64]
|
|
|
234 |
bls 20f
|
|
|
235 |
subs \bits, \bits, #7
|
|
|
236 |
rsb \neg, \divisor, #0
|
|
|
237 |
/* Scale approximate reciprocal, or else branch to large-divisor path */
|
|
|
238 |
movpl \divisor, \inv, lsl \bits
|
|
|
239 |
bmi 10f
|
|
|
240 |
/* Newton-Raphson iteration to improve reciprocal accuracy */
|
|
|
241 |
mul \inv, \divisor, \neg
|
|
|
242 |
smlawt \divisor, \divisor, \inv, \divisor
|
|
|
243 |
mul \inv, \divisor, \neg
|
|
|
244 |
/* Complete N-R math and produce approximate quotient. Use smmla/smmul on
|
|
|
245 |
ARMv6. */
|
|
|
246 |
#if CPU_ARM_ARCH >= 6
|
|
|
247 |
tst \numerator, \numerator
|
|
|
248 |
smmla \divisor, \divisor, \inv, \divisor
|
|
|
249 |
/* Branch to large-numerator handler, or else use smmul if sign bit is not
|
|
|
250 |
set. This wins on average with random numerators, and should be no
|
|
|
251 |
slower than using umull for small numerator, even if prediction fails.
|
|
|
252 |
*/
|
|
|
253 |
bmi 40f
|
|
|
254 |
smmul \inv, \numerator, \divisor
|
|
|
255 |
#else
|
|
|
256 |
/* ARMv5e lacks smmul, so always uses umull. */
|
|
|
257 |
mov \bits, #0
|
|
|
258 |
smlal \bits, \divisor, \inv, \divisor
|
|
|
259 |
umull \bits, \inv, \numerator, \divisor
|
|
|
260 |
#endif
|
|
|
261 |
/* Calculate remainder and correct result. */
|
|
|
262 |
add \numerator, \numerator, \neg
|
|
|
263 |
.ifnc "", "\remainder"
|
|
|
264 |
mla \remainder, \inv, \neg, \numerator
|
|
|
265 |
.ifnc "", "\quotient"
|
|
|
266 |
mov \quotient, \inv
|
|
|
267 |
cmn \remainder, \neg
|
|
|
268 |
subcs \remainder, \remainder, \neg
|
|
|
269 |
addpl \remainder, \remainder, \neg, lsl #1
|
|
|
270 |
addcc \quotient, \quotient, #1
|
|
|
271 |
addpl \quotient, \quotient, #2
|
|
|
272 |
.else
|
|
|
273 |
cmn \remainder, \neg
|
|
|
274 |
subcs \remainder, \remainder, \neg
|
|
|
275 |
addpl \remainder, \remainder, \neg, lsl #1
|
|
|
276 |
.endif
|
|
|
277 |
.else
|
|
|
278 |
mla \divisor, \inv, \neg, \numerator
|
|
|
279 |
mov \quotient, \inv
|
|
|
280 |
cmn \divisor, \neg
|
|
|
281 |
addcc \quotient, \quotient, #1
|
|
|
282 |
addpl \quotient, \quotient, #2
|
|
|
283 |
.endif
|
|
|
284 |
.if \return
|
|
|
285 |
bx lr
|
|
|
286 |
.else
|
|
|
287 |
b 99f
|
|
|
288 |
.endif
|
|
|
289 |
10:
|
|
|
290 |
/* Very large divisors can be handled without further improving the
|
|
|
291 |
reciprocal. First the reciprocal must be reduced to ensure that it
|
|
|
292 |
underestimates the correct value. */
|
|
|
293 |
rsb \bits, \bits, #0
|
|
|
294 |
sub \inv, \inv, #4
|
|
|
295 |
mov \divisor, \inv, lsr \bits
|
|
|
296 |
/* Calculate approximate quotient and remainder */
|
|
|
297 |
umull \bits, \inv, \numerator, \divisor
|
|
|
298 |
/* Correct quotient and remainder */
|
|
|
299 |
.ifnc "", "\remainder"
|
|
|
300 |
mla \remainder, \inv, \neg, \numerator
|
|
|
301 |
.ifnc "", "\quotient"
|
|
|
302 |
mov \quotient, \inv
|
|
|
303 |
cmn \neg, \remainder, lsr #1
|
|
|
304 |
addcs \remainder, \remainder, \neg, lsl #1
|
|
|
305 |
addcs \quotient, \quotient, #2
|
|
|
306 |
cmn \neg, \remainder
|
|
|
307 |
addcs \remainder, \remainder, \neg
|
|
|
308 |
addcs \quotient, \quotient, #1
|
|
|
309 |
.else
|
|
|
310 |
cmn \neg, \remainder, lsr #1
|
|
|
311 |
addcs \remainder, \remainder, \neg, lsl #1
|
|
|
312 |
cmn \neg, \remainder
|
|
|
313 |
addcs \remainder, \remainder, \neg
|
|
|
314 |
.endif
|
|
|
315 |
.else
|
|
|
316 |
mla \divisor, \inv, \neg, \numerator
|
|
|
317 |
mov \quotient, \inv
|
|
|
318 |
cmn \neg, \divisor, lsr #1
|
|
|
319 |
addcs \divisor, \divisor, \neg, lsl #1
|
|
|
320 |
addcs \quotient, \quotient, #2
|
|
|
321 |
cmn \neg, \divisor
|
|
|
322 |
addcs \quotient, \quotient, #1
|
|
|
323 |
.endif
|
|
|
324 |
.if \return
|
|
|
325 |
bx lr
|
|
|
326 |
.else
|
|
|
327 |
b 99f
|
|
|
328 |
.endif
|
|
|
329 |
20:
|
|
|
330 |
/* Handle division by powers of two by shifting right. Mod is handled
|
|
|
331 |
by using divisor-1 as a bitmask. */
|
|
|
332 |
.ifnc "", "\remainder"
|
|
|
333 |
.ifnc "", "\div0label"
|
|
|
334 |
bne \div0label
|
|
|
335 |
.endif
|
|
|
336 |
.ifnc "", "\quotient"
|
|
|
337 |
sub \divisor, \divisor, #1
|
|
|
338 |
rsb \bits, \bits, #31
|
|
|
339 |
and \remainder, \numerator, \divisor
|
|
|
340 |
mov \quotient, \numerator, lsr \bits
|
|
|
341 |
.else
|
|
|
342 |
sub \divisor, \divisor, #1
|
|
|
343 |
and \remainder, \numerator, \divisor
|
|
|
344 |
.endif
|
|
|
345 |
.else
|
|
|
346 |
rsb \bits, \bits, #31
|
|
|
347 |
.ifnc "", "\div0label"
|
|
|
348 |
bne \div0label
|
|
|
349 |
.endif
|
|
|
350 |
mov \quotient, \numerator, lsr \bits
|
|
|
351 |
.endif
|
|
|
352 |
.if \return
|
|
|
353 |
bx lr
|
|
|
354 |
.else
|
|
|
355 |
b 99f
|
|
|
356 |
.endif
|
|
|
357 |
30:
|
|
|
358 |
/* Handle numerator < divisor - quotient is zero, remainder is numerator,
|
|
|
359 |
which must be restored to its original value on ARMv6. */
|
|
|
360 |
.ifnc "", "\remainder"
|
|
|
361 |
mov \remainder, \numerator
|
|
|
362 |
.endif
|
|
|
363 |
.ifnc "", "\quotient"
|
|
|
364 |
mov \quotient, #0
|
|
|
365 |
.endif
|
|
|
366 |
.if \return
|
|
|
367 |
bx lr
|
|
|
368 |
.endif
|
|
|
369 |
#if CPU_ARM_ARCH >= 6
|
|
|
370 |
40:
|
|
|
371 |
/* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code
|
|
|
372 |
above 10:. */
|
|
|
373 |
umull \bits, \inv, \numerator, \divisor
|
|
|
374 |
add \numerator, \numerator, \neg
|
|
|
375 |
.ifnc "", "\remainder"
|
|
|
376 |
mla \remainder, \inv, \neg, \numerator
|
|
|
377 |
.ifnc "", "\quotient"
|
|
|
378 |
mla \remainder, \inv, \neg, \numerator
|
|
|
379 |
mov \quotient, \inv
|
|
|
380 |
cmn \remainder, \neg
|
|
|
381 |
subcs \remainder, \remainder, \neg
|
|
|
382 |
addpl \remainder, \remainder, \neg, lsl #1
|
|
|
383 |
addcc \quotient, \quotient, #1
|
|
|
384 |
addpl \quotient, \quotient, #2
|
|
|
385 |
.else
|
|
|
386 |
cmn \remainder, \neg
|
|
|
387 |
subcs \remainder, \remainder, \neg
|
|
|
388 |
addpl \remainder, \remainder, \neg, lsl #1
|
|
|
389 |
.endif
|
|
|
390 |
.else
|
|
|
391 |
mla \divisor, \inv, \neg, \numerator
|
|
|
392 |
mov \quotient, \inv
|
|
|
393 |
cmn \divisor, \neg
|
|
|
394 |
addcc \quotient, \quotient, #1
|
|
|
395 |
addpl \quotient, \quotient, #2
|
|
|
396 |
.endif
|
|
|
397 |
.if \return
|
|
|
398 |
bx lr
|
|
|
399 |
.else
|
|
|
400 |
b 99f
|
|
|
401 |
.endif
|
|
|
402 |
#endif
|
|
|
403 |
99:
|
|
|
404 |
.endm
|
|
|
405 |
|
|
|
406 |
.macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return
|
|
|
407 |
/* sign[31] = divisor sign */
|
|
|
408 |
ands \sign, \divisor, #1<<31
|
|
|
409 |
rsbne \divisor, \divisor, #0
|
|
|
410 |
/* sign[31] = result sign, sign[0:30], C = numerator sign */
|
|
|
411 |
eors \sign, \sign, \numerator, asr #32
|
|
|
412 |
clz \bits, \divisor
|
|
|
413 |
rsbcs \numerator, \numerator, #0
|
|
|
414 |
/* On ARMv6, subtract divisor before performing division, which ensures
|
|
|
415 |
numerator sign bit is clear and smmul may be used in place of umull. The
|
|
|
416 |
fixup for the results can be fit entirely into existing delay slots on
|
|
|
417 |
the main division paths. It costs 1c in the num<div path if the
|
|
|
418 |
the remainder is to be produced in the numerator's register, and 1c in
|
|
|
419 |
the power-of-2-divisor path only if producing both remainder and
|
|
|
420 |
quotient. */
|
|
|
421 |
#if CPU_ARM_ARCH >= 6
|
|
|
422 |
subs \numerator, \numerator, \divisor
|
|
|
423 |
#else
|
|
|
424 |
cmp \numerator, \divisor
|
|
|
425 |
#endif
|
|
|
426 |
movcs \inv, \divisor, lsl \bits
|
|
|
427 |
bcc 30f
|
|
|
428 |
/* Test whether divisor is 2^N */
|
|
|
429 |
cmp \inv, #1<<31
|
|
|
430 |
add \inv, pc, \inv, lsr #25
|
|
|
431 |
bls 20f
|
|
|
432 |
/* Load approximate reciprocal */
|
|
|
433 |
ldrb \inv, [\inv, #.L_udiv_est_table-.-64]
|
|
|
434 |
subs \bits, \bits, #7
|
|
|
435 |
rsb \neg, \divisor, #0
|
|
|
436 |
/* Scale approximate reciprocal, or else branch to large-divisor path */
|
|
|
437 |
movpl \divisor, \inv, lsl \bits
|
|
|
438 |
bmi 10f
|
|
|
439 |
/* Newton-Raphson iteration to improve reciprocal accuracy */
|
|
|
440 |
mul \inv, \divisor, \neg
|
|
|
441 |
smlawt \divisor, \divisor, \inv, \divisor
|
|
|
442 |
mul \inv, \divisor, \neg
|
|
|
443 |
/* Complete N-R math and produce approximate quotient. Use smmla/smmul on
|
|
|
444 |
ARMv6. */
|
|
|
445 |
#if CPU_ARM_ARCH >= 6
|
|
|
446 |
smmla \divisor, \divisor, \inv, \divisor
|
|
|
447 |
smmul \inv, \numerator, \divisor
|
|
|
448 |
#else
|
|
|
449 |
mov \bits, #0
|
|
|
450 |
smlal \bits, \divisor, \inv, \divisor
|
|
|
451 |
umull \bits, \inv, \numerator, \divisor
|
|
|
452 |
#endif
|
|
|
453 |
/* Calculate remainder and correct quotient. */
|
|
|
454 |
add \numerator, \numerator, \neg
|
|
|
455 |
.ifnc "", "\remainder"
|
|
|
456 |
mla \remainder, \inv, \neg, \numerator
|
|
|
457 |
.ifnc "", "\quotient"
|
|
|
458 |
#if CPU_ARM_ARCH >= 6
|
|
|
459 |
add \quotient, \inv, #1
|
|
|
460 |
#else
|
|
|
461 |
mov \quotient, \inv
|
|
|
462 |
#endif
|
|
|
463 |
cmn \remainder, \neg
|
|
|
464 |
subcs \remainder, \remainder, \neg
|
|
|
465 |
addpl \remainder, \remainder, \neg, lsl #1
|
|
|
466 |
addcc \quotient, \quotient, #1
|
|
|
467 |
addpl \quotient, \quotient, #2
|
|
|
468 |
.else
|
|
|
469 |
cmn \remainder, \neg
|
|
|
470 |
subcs \remainder, \remainder, \neg
|
|
|
471 |
addpl \remainder, \remainder, \neg, lsl #1
|
|
|
472 |
.endif
|
|
|
473 |
.else
|
|
|
474 |
mla \divisor, \inv, \neg, \numerator
|
|
|
475 |
#if CPU_ARM_ARCH >= 6
|
|
|
476 |
add \quotient, \inv, #1
|
|
|
477 |
#else
|
|
|
478 |
mov \quotient, \inv
|
|
|
479 |
#endif
|
|
|
480 |
cmn \divisor, \neg
|
|
|
481 |
addcc \quotient, \quotient, #1
|
|
|
482 |
addpl \quotient, \quotient, #2
|
|
|
483 |
.endif
|
|
|
484 |
ARM_SDIV32_POST \quotient, \remainder, \sign
|
|
|
485 |
.ifnc "", "\return"
|
|
|
486 |
\return
|
|
|
487 |
.else
|
|
|
488 |
b 99f
|
|
|
489 |
.endif
|
|
|
490 |
10:
|
|
|
491 |
/* Very large divisors can be handled without further improving the
|
|
|
492 |
reciprocal. First the reciprocal must be reduced to ensure that it
|
|
|
493 |
underestimates the correct value. */
|
|
|
494 |
rsb \bits, \bits, #0
|
|
|
495 |
sub \inv, \inv, #4
|
|
|
496 |
mov \divisor, \inv, lsr \bits
|
|
|
497 |
/* Calculate approximate quotient and remainder */
|
|
|
498 |
#if CPU_ARM_ARCH >= 6
|
|
|
499 |
smmul \inv, \numerator, \divisor
|
|
|
500 |
#else
|
|
|
501 |
umull \bits, \inv, \numerator, \divisor
|
|
|
502 |
#endif
|
|
|
503 |
/* Correct quotient and remainder */
|
|
|
504 |
.ifnc "", "\remainder"
|
|
|
505 |
mla \remainder, \inv, \neg, \numerator
|
|
|
506 |
.ifnc "", "\quotient"
|
|
|
507 |
#if ARM_ARCH >= 6
|
|
|
508 |
add \quotient, \inv, #1
|
|
|
509 |
#else
|
|
|
510 |
mov \quotient, \inv
|
|
|
511 |
#endif
|
|
|
512 |
cmn \neg, \remainder, lsr #1
|
|
|
513 |
addcs \remainder, \remainder, \neg, lsl #1
|
|
|
514 |
addcs \quotient, \quotient, #2
|
|
|
515 |
cmn \neg, \remainder
|
|
|
516 |
addcs \remainder, \remainder, \neg
|
|
|
517 |
addcs \quotient, \quotient, #1
|
|
|
518 |
.else
|
|
|
519 |
cmn \neg, \remainder, lsr #1
|
|
|
520 |
addcs \remainder, \remainder, \neg, lsl #1
|
|
|
521 |
cmn \neg, \remainder
|
|
|
522 |
addcs \remainder, \remainder, \neg
|
|
|
523 |
.endif
|
|
|
524 |
.else
|
|
|
525 |
mla \divisor, \inv, \neg, \numerator
|
|
|
526 |
#if CPU_ARM_ARCH >= 6
|
|
|
527 |
add \quotient, \inv, #1
|
|
|
528 |
#else
|
|
|
529 |
mov \quotient, \inv
|
|
|
530 |
#endif
|
|
|
531 |
cmn \neg, \divisor, lsr #1
|
|
|
532 |
addcs \divisor, \divisor, \neg, lsl #1
|
|
|
533 |
addcs \quotient, \quotient, #2
|
|
|
534 |
cmn \neg, \divisor
|
|
|
535 |
addcs \quotient, \quotient, #1
|
|
|
536 |
.endif
|
|
|
537 |
ARM_SDIV32_POST \quotient, \remainder, \sign
|
|
|
538 |
.ifnc "", "\return"
|
|
|
539 |
\return
|
|
|
540 |
.else
|
|
|
541 |
b 99f
|
|
|
542 |
.endif
|
|
|
543 |
20:
|
|
|
544 |
/* Handle division by powers of two by shifting right. Mod is handled
|
|
|
545 |
by using divisor-1 as a bitmask. */
|
|
|
546 |
.ifnc "", "\div0label"
|
|
|
547 |
bne \div0label
|
|
|
548 |
.endif
|
|
|
549 |
.ifnc "", "\remainder"
|
|
|
550 |
.ifnc "", "\quotient"
|
|
|
551 |
rsb \bits, \bits, #31
|
|
|
552 |
#if CPU_ARM_ARCH >= 6
|
|
|
553 |
add \numerator, \numerator, \divisor
|
|
|
554 |
#endif
|
|
|
555 |
sub \divisor, \divisor, #1
|
|
|
556 |
and \remainder, \numerator, \divisor
|
|
|
557 |
mov \quotient, \numerator, lsr \bits
|
|
|
558 |
.else
|
|
|
559 |
sub \divisor, \divisor, #1
|
|
|
560 |
and \remainder, \numerator, \divisor
|
|
|
561 |
.endif
|
|
|
562 |
.else
|
|
|
563 |
rsb \bits, \bits, #31
|
|
|
564 |
#if CPU_ARM_ARCH >= 6
|
|
|
565 |
add \numerator, \numerator, \divisor
|
|
|
566 |
#endif
|
|
|
567 |
mov \quotient, \numerator, lsr \bits
|
|
|
568 |
.endif
|
|
|
569 |
ARM_SDIV32_POST \quotient, \remainder, \sign
|
|
|
570 |
.ifnc "", "\return"
|
|
|
571 |
\return
|
|
|
572 |
.else
|
|
|
573 |
b 99f
|
|
|
574 |
.endif
|
|
|
575 |
30:
|
|
|
576 |
/* Handle numerator < divisor - quotient is zero, remainder is numerator,
|
|
|
577 |
which must be restored to its original value on ARMv6. */
|
|
|
578 |
.ifnc "", "\remainder"
|
|
|
579 |
#if CPU_ARM_ARCH >= 6
|
|
|
580 |
add \remainder, \numerator, \divisor
|
|
|
581 |
#else
|
|
|
582 |
.ifnc "\remainder", "\numerator"
|
|
|
583 |
mov \remainder, \numerator
|
|
|
584 |
.endif
|
|
|
585 |
#endif
|
|
|
586 |
.endif
|
|
|
587 |
.ifnc "", "\quotient"
|
|
|
588 |
mov \quotient, #0
|
|
|
589 |
.endif
|
|
|
590 |
.ifnc "", "\remainder"
|
|
|
591 |
ARM_SDIV32_POST "", \remainder, \sign
|
|
|
592 |
.endif
|
|
|
593 |
.ifnc "", "\return"
|
|
|
594 |
\return
|
|
|
595 |
.endif
|
|
|
596 |
99:
|
|
|
597 |
.endm
|
|
|
598 |
#endif
|
|
|
599 |
|
|
|
600 |
.section .text.__div0_wrap_s
|
|
|
601 |
__div0_wrap_s:
|
|
|
602 |
sub sp, sp, #4
|
|
|
603 |
b __div0
|
|
|
604 |
.size __div0_wrap_s, . - __div0_wrap_s
|
|
|
605 |
|
|
|
606 |
.section .text.__div0_wrap
|
|
|
607 |
__div0_wrap:
|
|
|
608 |
str lr, [sp, #-4]!
|
|
|
609 |
b __div0
|
|
|
610 |
.size __div0_wrap, . - __div0_wrap
|
|
|
611 |
|
|
|
612 |
/* The div+mod averagess a fraction of a cycle worse for signed values, and
|
|
|
613 |
slightly better for unsigned, so just alias div to divmod. */
|
|
|
614 |
.global __aeabi_uidivmod
|
|
|
615 |
.type __aeabi_uidivmod,%function
|
|
|
616 |
.global __aeabi_uidiv
|
|
|
617 |
.type __aeabi_uidiv,%function
|
|
|
618 |
.set __aeabi_uidiv,__aeabi_uidivmod
|
|
|
619 |
.global __aeabi_idivmod
|
|
|
620 |
.type __aeabi_idivmod,%function
|
|
|
621 |
.global __aeabi_idiv
|
|
|
622 |
.type __aeabi_idiv,%function
|
|
|
623 |
.set __aeabi_idiv,__aeabi_idivmod
|
|
|
624 |
|
|
|
625 |
|
|
|
626 |
#if CPU_ARM_ARCH < 5
|
|
|
627 |
.section .text.__clzsi2
|
|
|
628 |
.global __clzsi2
|
|
|
629 |
.type __clzsi2, %function
|
|
|
630 |
|
|
|
631 |
__clzsi2:
|
|
|
632 |
orr r0, r0, r0, lsr #8
|
|
|
633 |
orr r0, r0, r0, lsr #4
|
|
|
634 |
orr r0, r0, r0, lsr #2
|
|
|
635 |
orr r0, r0, r0, lsr #1
|
|
|
636 |
bic r0, r0, r0, lsr #16
|
|
|
637 |
rsb r0, r0, r0, lsl #14
|
|
|
638 |
rsb r0, r0, r0, lsl #11
|
|
|
639 |
rsb r0, r0, r0, lsl #9
|
|
|
640 |
ldrb r0, [pc, r0, lsr #26]
|
|
|
641 |
bx lr
|
|
|
642 |
.byte 32, 20, 19, 0, 0, 18, 0, 7, 10, 17, 0, 0, 14, 0, 6, 0
|
|
|
643 |
.byte 0, 9, 0, 16, 0, 0, 1, 26, 0, 13, 0, 0, 24, 5, 0, 0
|
|
|
644 |
.byte 0, 21, 0, 8, 11, 0, 15, 0, 0, 0, 0, 2, 27, 0, 25, 0
|
|
|
645 |
.byte 22, 0, 12, 0, 0, 3, 28, 0, 23, 0, 4, 29, 0, 0, 30, 31
|
|
|
646 |
.size __clzsi2, .-__clzsi2
|
|
|
647 |
|
|
|
648 |
.section .text.__divisionhelpers
|
|
|
649 |
__aeabi_uidivmod:
|
|
|
650 |
ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
|
|
|
651 |
.size __aeabi_uidivmod, . - __aeabi_uidivmod
|
|
|
652 |
|
|
|
653 |
__aeabi_idivmod:
|
|
|
654 |
ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
|
|
|
655 |
.size __aeabi_idivmod, . - __aeabi_idivmod
|
|
|
656 |
|
|
|
657 |
#else
|
|
|
658 |
.global __clzsi2
|
|
|
659 |
.type __clzsi2, %function
|
|
|
660 |
|
|
|
661 |
__clzsi2:
|
|
|
662 |
clz r0, r0
|
|
|
663 |
bx lr
|
|
|
664 |
|
|
|
665 |
__aeabi_uidivmod:
|
|
|
666 |
ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1
|
|
|
667 |
.size __aeabi_uidivmod, . - __aeabi_uidivmod
|
|
|
668 |
|
|
|
669 |
__aeabi_idivmod:
|
|
|
670 |
str lr, [sp, #-4]
|
|
|
671 |
ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
|
|
|
672 |
.size __aeabi_idivmod, . - __aeabi_idivmod
|
|
|
673 |
|
|
|
674 |
.L_udiv_est_table:
|
|
|
675 |
.byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
|
|
|
676 |
.byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
|
|
|
677 |
.byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
|
|
|
678 |
.byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
|
|
|
679 |
.byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
|
|
|
680 |
.byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
|
|
|
681 |
.byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
|
|
|
682 |
.byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
|
|
|
683 |
#endif
|