Rev 147 | Blame | Last modification | View Log | RSS feed
/**************************************************************************** __________ __ ___.* Open \______ \ ____ ____ | | _\_ |__ _______ ___* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \* \/ \/ \/ \/ \/* $Id$** Copyright (C) 2008 by Jens Arnold* Copyright (C) 2009 by Andrew Mahone** Optimised replacements for libgcc functions** Based on: libgcc routines for ARM cpu, additional algorithms from ARM System* Developer's Guide* Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005* Free Software Foundation, Inc.** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU General Public License* as published by the Free Software Foundation; either version 2* of the License, or (at your option) any later version.** This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY* KIND, either express or implied.*****************************************************************************/#define ASM_FILE#include "global.h".macro ARM_SDIV32_PRE numerator, divisor, sign/* sign[31] = divisor sign */ands \sign, \divisor, #1<<31rsbeq \divisor, \divisor, #0/* sign[31] = result sign, sign[0:30], C = numerator sign */eors \sign, \sign, \numerator, asr #32rsbcs \numerator, \numerator, #0.endm.macro ARM_SDIV32_POST quotient, remainder, signmovs \sign, \sign, lsl #1.ifnc "", "\quotient"rsbcs \quotient, \quotient, #0.endif.ifnc "", "\remainder"rsbmi \remainder, \remainder, #0.endif.endm#if ARM_ARCH < 5.macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return.ifnc "", "\div0label"rsbs \divisor, \divisor, #0beq \div0label.elsersb \divisor, \divisor, #0.endif/* This SWAR divider requires a numerator less than 1<<31, because it mustbe able to shift the remainder left at each step without shifting outtopmost bit. Since a shift might be needed for the aligned remainder toexceed the divisor, the topmost bit must be unset at the start to avoidthis overflow case. The original numerator is saved so that the resultcan be corrected after the reduced division completes. */cmn \numerator, \divisor.ifc "", "\quotient".ifc "\numerator", "\remainder".if \returnbxcc lr.elseb 99f.endif.elsebcc 20f.endif.elsebcc 20f.endifmovs \tmp, \numeratormovmi \numerator, \numerator, lsr #1mov \bits, #30.set shift, 16.rept 5cmn \divisor, \numerator, lsr #shiftsubcs \bits, \bits, #shiftmovcs \divisor, \divisor, lsl #shift.set shift, shift >> 1.endradds \numerator, \numerator, \divisorsubcc \numerator, \numerator, \divisoradd pc, pc, \bits, lsl #3nop.rept 30adcs \numerator, \divisor, \numerator, lsl #1subcc \numerator, \numerator, \divisor.endradc \numerator, \numerator, \numeratormovs \tmp, \tmp, asr #1rsb \bits, \bits, #31bmi 10f.ifc "", "\quotient"mov \remainder, \numerator, lsr \bits.else.ifc "", "\remainder"mov \divisor, \numerator, lsr \bitseor \quotient, \numerator, \divisor, lsl \bits.elsemov \remainder, \numerator, lsr \bitseor \quotient, \numerator, \remainder, lsl \bits.endif.endif.ifne \returnbx lr.elseb 99f.endif10:mov \tmp, \numerator, lsr \bitseor \numerator, \numerator, \tmp, lsl \bitssub \bits, \bits, #1adc \tmp, \tmp, \tmpadds \tmp, \tmp, \divisor, asr \bits.ifnc "", "\quotient"adc \quotient, \numerator, \numerator.endif.ifnc "", "\remainder"subcc \remainder, \tmp, \divisor, asr \bitsmovcs \remainder, \tmp.endif.ifne \returnbx lr.elseb 99f.endif20:.ifnc "", "\remainder".ifnc "\remainder", "\numerator"mov \remainder, \numerator.endif.endif.ifnc "", "\quotient"mov \quotient, #0.endif.ifne \returnbx lr.else99:.endif.endm.macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return/* When this is wrapped for signed division, the wrapper code will handleinverting the divisor, and also the zero divisor test. */ARM_SDIV32_PRE \numerator, \divisor, \sign.ifnc "", "\div0label"tst \divisor, \divisorbeq \div0label.endif/* This SWAR divider requires a numerator less than 1<<31, because it mustbe able to shift the remainder left at each step without shifting outtopmost bit. With signed inputs, whose absolute value may not exceed1<<31,this may be accomplished simply by subtracting the divisor beforebeginning division, and adding 1 to the quotient. */adds \numerator, \numerator, \divisorbcc 20fmov \bits, #30.set shift, 16.rept 5cmn \divisor, \numerator, lsr #shiftsubcs \bits, \bits, #shiftmovcs \divisor, \divisor, lsl #shift.set shift, shift >> 1.endradds \numerator, \numerator, \divisorsubcc \numerator, \numerator, \divisoradd pc, pc, \bits, lsl #3nop.rept 30adcs \numerator, \divisor, \numerator, lsl #1subcc \numerator, \numerator, \divisor.endrrsb \bits, \bits, #31adc \numerator, \numerator, \numerator.ifc "", "\quotient"mov \remainder, \numerator, lsr \bits.else.ifc "", "\remainder"mov \divisor, \numerator, lsr \bitsadd \numerator, \numerator, #1sub \quotient, \numerator, \divisor, lsl \bits.elsemov \remainder, \numerator, lsr \bitsadd \numerator, \numerator, #1sub \quotient, \numerator, \remainder, lsl \bits.endif.endif.ifne \returnARM_SDIV32_POST \quotient, \remainder, \signbx lr.elseb 99f.endif20:.ifnc "", "\remainder"sub \remainder, \numerator, \divisor.endif.ifnc "", "\quotient"mov \quotient, #0.endif.ifne \returnARM_SDIV32_POST "", \remainder, \signbx lr.else99:ARM_SDIV32_POST \quotient, \remainder, \sign.endif.endm#else.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, returncmp \numerator, \divisorclz \bits, \divisorbcc 30fmov \inv, \divisor, lsl \bitsadd \neg, pc, \inv, lsr #25/* Test whether divisor is 2^N */cmp \inv, #1<<31/* Load approximate reciprocal */ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]bls 20fsubs \bits, \bits, #7rsb \neg, \divisor, #0/* Scale approximate reciprocal, or else branch to large-divisor path */movpl \divisor, \inv, lsl \bitsbmi 10f/* Newton-Raphson iteration to improve reciprocal accuracy */mul \inv, \divisor, \negsmlawt \divisor, \divisor, \inv, \divisormul \inv, \divisor, \neg/* Complete N-R math and produce approximate quotient. Use smmla/smmul onARMv6. */#if ARM_ARCH >= 6tst \numerator, \numeratorsmmla \divisor, \divisor, \inv, \divisor/* Branch to large-numerator handler, or else use smmul if sign bit is notset. This wins on average with random numerators, and should be noslower than using umull for small numerator, even if prediction fails.*/bmi 40fsmmul \inv, \numerator, \divisor#else/* ARMv5e lacks smmul, so always uses umull. */mov \bits, #0smlal \bits, \divisor, \inv, \divisorumull \bits, \inv, \numerator, \divisor#endif/* Calculate remainder and correct result. */add \numerator, \numerator, \neg.ifnc "", "\remainder"mla \remainder, \inv, \neg, \numerator.ifnc "", "\quotient"mov \quotient, \invcmn \remainder, \negsubcs \remainder, \remainder, \negaddpl \remainder, \remainder, \neg, lsl #1addcc \quotient, \quotient, #1addpl \quotient, \quotient, #2.elsecmn \remainder, \negsubcs \remainder, \remainder, \negaddpl \remainder, \remainder, \neg, lsl #1.endif.elsemla \divisor, \inv, \neg, \numeratormov \quotient, \invcmn \divisor, \negaddcc \quotient, \quotient, #1addpl \quotient, \quotient, #2.endif.if \returnbx lr.elseb 99f.endif10:/* Very large divisors can be handled without further improving thereciprocal. First the reciprocal must be reduced to ensure that itunderestimates the correct value. */rsb \bits, \bits, #0sub \inv, \inv, #4mov \divisor, \inv, lsr \bits/* Calculate approximate quotient and remainder */umull \bits, \inv, \numerator, \divisor/* Correct quotient and remainder */.ifnc "", "\remainder"mla \remainder, \inv, \neg, \numerator.ifnc "", "\quotient"mov \quotient, \invcmn \neg, \remainder, lsr #1addcs \remainder, \remainder, \neg, lsl #1addcs \quotient, \quotient, #2cmn \neg, \remainderaddcs \remainder, \remainder, \negaddcs \quotient, \quotient, #1.elsecmn \neg, \remainder, lsr #1addcs \remainder, \remainder, \neg, lsl #1cmn \neg, \remainderaddcs \remainder, \remainder, \neg.endif.elsemla \divisor, \inv, \neg, \numeratormov \quotient, \invcmn \neg, \divisor, lsr #1addcs \divisor, \divisor, \neg, lsl #1addcs \quotient, \quotient, #2cmn \neg, \divisoraddcs \quotient, \quotient, #1.endif.if \returnbx lr.elseb 99f.endif20:/* Handle division by powers of two by shifting right. Mod is handledby using divisor-1 as a bitmask. */.ifnc "", "\remainder".ifnc "", "\div0label"bne \div0label.endif.ifnc "", "\quotient"sub \divisor, \divisor, #1rsb \bits, \bits, #31and \remainder, \numerator, \divisormov \quotient, \numerator, lsr \bits.elsesub \divisor, \divisor, #1and \remainder, \numerator, \divisor.endif.elsersb \bits, \bits, #31.ifnc "", "\div0label"bne \div0label.endifmov \quotient, \numerator, lsr \bits.endif.if \returnbx lr.elseb 99f.endif30:/* Handle numerator < divisor - quotient is zero, remainder is numerator,which must be restored to its original value on ARMv6. */.ifnc "", "\remainder"mov \remainder, \numerator.endif.ifnc "", "\quotient"mov \quotient, #0.endif.if \returnbx lr.endif#if ARM_ARCH >= 640:/* Handle large (sign bit set) numerators. Works exactly as the ARMv5e codeabove 10:. */umull \bits, \inv, \numerator, \divisoradd \numerator, \numerator, \neg.ifnc "", "\remainder"mla \remainder, \inv, \neg, \numerator.ifnc "", "\quotient"mla \remainder, \inv, \neg, \numeratormov \quotient, \invcmn \remainder, \negsubcs \remainder, \remainder, \negaddpl \remainder, \remainder, \neg, lsl #1addcc \quotient, \quotient, #1addpl \quotient, \quotient, #2.elsecmn \remainder, \negsubcs \remainder, \remainder, \negaddpl \remainder, \remainder, \neg, lsl #1.endif.elsemla \divisor, \inv, \neg, \numeratormov \quotient, \invcmn \divisor, \negaddcc \quotient, \quotient, #1addpl \quotient, \quotient, #2.endif.if \returnbx lr.elseb 99f.endif#endif99:.endm.macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return/* sign[31] = divisor sign */ands \sign, \divisor, #1<<31rsbne \divisor, \divisor, #0/* sign[31] = result sign, sign[0:30], C = numerator sign */eors \sign, \sign, \numerator, asr #32clz \bits, \divisorrsbcs \numerator, \numerator, #0/* On ARMv6, subtract divisor before performing division, which ensuresnumerator sign bit is clear and smmul may be used in place of umull. Thefixup for the results can be fit entirely into existing delay slots onthe main division paths. It costs 1c in the num<div path if thethe remainder is to be produced in the numerator's register, and 1c inthe power-of-2-divisor path only if producing both remainder andquotient. */#if ARM_ARCH >= 6subs \numerator, \numerator, \divisor#elsecmp \numerator, \divisor#endifmovcs \inv, \divisor, lsl \bitsbcc 30f/* Test whether divisor is 2^N */cmp \inv, #1<<31add \inv, pc, \inv, lsr #25bls 20f/* Load approximate reciprocal */ldrb \inv, [\inv, #.L_udiv_est_table-.-64]subs \bits, \bits, #7rsb \neg, \divisor, #0/* Scale approximate reciprocal, or else branch to large-divisor path */movpl \divisor, \inv, lsl \bitsbmi 10f/* Newton-Raphson iteration to improve reciprocal accuracy */mul \inv, \divisor, \negsmlawt \divisor, \divisor, \inv, \divisormul \inv, \divisor, \neg/* Complete N-R math and produce approximate quotient. Use smmla/smmul onARMv6. */#if ARM_ARCH >= 6smmla \divisor, \divisor, \inv, \divisorsmmul \inv, \numerator, \divisor#elsemov \bits, #0smlal \bits, \divisor, \inv, \divisorumull \bits, \inv, \numerator, \divisor#endif/* Calculate remainder and correct quotient. */add \numerator, \numerator, \neg.ifnc "", "\remainder"mla \remainder, \inv, \neg, \numerator.ifnc "", "\quotient"#if ARM_ARCH >= 6add \quotient, \inv, #1#elsemov \quotient, \inv#endifcmn \remainder, \negsubcs \remainder, \remainder, \negaddpl \remainder, \remainder, \neg, lsl #1addcc \quotient, \quotient, #1addpl \quotient, \quotient, #2.elsecmn \remainder, \negsubcs \remainder, \remainder, \negaddpl \remainder, \remainder, \neg, lsl #1.endif.elsemla \divisor, \inv, \neg, \numerator#if ARM_ARCH >= 6add \quotient, \inv, #1#elsemov \quotient, \inv#endifcmn \divisor, \negaddcc \quotient, \quotient, #1addpl \quotient, \quotient, #2.endifARM_SDIV32_POST \quotient, \remainder, \sign.ifnc "", "\return"\return.elseb 99f.endif10:/* Very large divisors can be handled without further improving thereciprocal. First the reciprocal must be reduced to ensure that itunderestimates the correct value. */rsb \bits, \bits, #0sub \inv, \inv, #4mov \divisor, \inv, lsr \bits/* Calculate approximate quotient and remainder */#if ARM_ARCH >= 6smmul \inv, \numerator, \divisor#elseumull \bits, \inv, \numerator, \divisor#endif/* Correct quotient and remainder */.ifnc "", "\remainder"mla \remainder, \inv, \neg, \numerator.ifnc "", "\quotient"#if ARM_ARCH >= 6add \quotient, \inv, #1#elsemov \quotient, \inv#endifcmn \neg, \remainder, lsr #1addcs \remainder, \remainder, \neg, lsl #1addcs \quotient, \quotient, #2cmn \neg, \remainderaddcs \remainder, \remainder, \negaddcs \quotient, \quotient, #1.elsecmn \neg, \remainder, lsr #1addcs \remainder, \remainder, \neg, lsl #1cmn \neg, \remainderaddcs \remainder, \remainder, \neg.endif.elsemla \divisor, \inv, \neg, \numerator#if ARM_ARCH >= 6add \quotient, \inv, #1#elsemov \quotient, \inv#endifcmn \neg, \divisor, lsr #1addcs \divisor, \divisor, \neg, lsl #1addcs \quotient, \quotient, #2cmn \neg, \divisoraddcs \quotient, \quotient, #1.endifARM_SDIV32_POST \quotient, \remainder, \sign.ifnc "", "\return"\return.elseb 99f.endif20:/* Handle division by powers of two by shifting right. Mod is handledby using divisor-1 as a bitmask. */.ifnc "", "\div0label"bne \div0label.endif.ifnc "", "\remainder".ifnc "", "\quotient"rsb \bits, \bits, #31#if ARM_ARCH >= 6add \numerator, \numerator, \divisor#endifsub \divisor, \divisor, #1and \remainder, \numerator, \divisormov \quotient, \numerator, lsr \bits.elsesub \divisor, \divisor, #1and \remainder, \numerator, \divisor.endif.elsersb \bits, \bits, #31#if ARM_ARCH >= 6add \numerator, \numerator, \divisor#endifmov \quotient, \numerator, lsr \bits.endifARM_SDIV32_POST \quotient, \remainder, \sign.ifnc "", "\return"\return.elseb 99f.endif30:/* Handle numerator < divisor - quotient is zero, remainder is numerator,which must be restored to its original value on ARMv6. */.ifnc "", "\remainder"#if ARM_ARCH >= 6add \remainder, \numerator, \divisor#else.ifnc "\remainder", "\numerator"mov \remainder, \numerator.endif#endif.endif.ifnc "", "\quotient"mov \quotient, #0.endif.ifnc "", "\remainder"ARM_SDIV32_POST "", \remainder, \sign.endif.ifnc "", "\return"\return.endif99:.endm#endif.section .text.__div0_wrap_s__div0_wrap_s:sub sp, sp, #4b __div0.size __div0_wrap_s, . - __div0_wrap_s.section .text.__div0_wrap__div0_wrap:str lr, [sp, #-4]!b __div0.size __div0_wrap, . - __div0_wrap#ifndef __ARM_EABI__.global __divsi3.type __divsi3,%function.global __udivsi3.type __udivsi3,%function.global __udivsi3.type __udivsi3,%function#else/* The div+mod averagess a fraction of a cycle worse for signed values, andslightly better for unsigned, so just alias div to divmod. */.global __aeabi_uidivmod.type __aeabi_uidivmod,%function.global __aeabi_uidiv.type __aeabi_uidiv,%function.set __aeabi_uidiv,__aeabi_uidivmod.global __aeabi_idivmod.type __aeabi_idivmod,%function.global __aeabi_idiv.type __aeabi_idiv,%function.set __aeabi_idiv,__aeabi_idivmod#endif#if ARM_ARCH < 5.section .text.__clzsi2.global __clzsi2.type __clzsi2, %function__clzsi2:orr r0, r0, r0, lsr #8orr r0, r0, r0, lsr #4orr r0, r0, r0, lsr #2orr r0, r0, r0, lsr #1bic r0, r0, r0, lsr #16rsb r0, r0, r0, lsl #14rsb r0, r0, r0, lsl #11rsb r0, r0, r0, lsl #9ldrb r0, [pc, r0, lsr #26]bx lr.byte 32, 20, 19, 0, 0, 18, 0, 7, 10, 17, 0, 0, 14, 0, 6, 0.byte 0, 9, 0, 16, 0, 0, 1, 26, 0, 13, 0, 0, 24, 5, 0, 0.byte 0, 21, 0, 8, 11, 0, 15, 0, 0, 0, 0, 2, 27, 0, 25, 0.byte 22, 0, 12, 0, 0, 3, 28, 0, 23, 0, 4, 29, 0, 0, 30, 31.size __clzsi2, .-__clzsi2.section .text.__divisionhelpers#ifndef __ARM_EABI____udivsi3:ARMV4_UDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1.size __udivsi3, . - __udivsi3__divsi3:ARMV4_SDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1.size __divsi3, . - __divsi3#else__aeabi_uidivmod:ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1.size __aeabi_uidivmod, . - __aeabi_uidivmo__aeabi_idivmod:ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1.size __aeabi_idivmod, . - __aeabi_idivmod#endif#else.global __clzsi2.type __clzsi2, %function__clzsi2:clz r0, r0bx lr#ifndef __ARM_EABI____udivsi3:ARMV5_UDIV32_BODY r0, r1, r0, "", r2, r3, ip, __div0_wrap, 1.size __udivsi3, . - __udivsi3__divsi3:str lr, [sp, #-4]ARMV5_SDIV32_BODY r0, r1, r0, "", r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]".size __divsi3, . - __divsi3#else__aeabi_uidivmod:ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1.size __aeabi_uidivmod, . - __aeabi_uidivmo__aeabi_idivmod:str lr, [sp, #-4]ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]".size __aeabi_idivmod, . - __aeabi_idivmod#endif.L_udiv_est_table:.byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6.byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf.byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc.byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac.byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f.byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93.byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89.byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81#endif