MloFloatFix.h

00001 /*****************************************************************
00002 |
00003 |    Copyright 2004-2006 Axiomatic Systems LLC
00004 |
00005 |    This file is part of Melo (Melo AAC Decoder).
00006 |
00007 |    Unless you have obtained Melo under a difference license,
00008 |    this version of Melo is Melo|GPL.
00009 |    Melo|GPL is free software; you can redistribute it and/or modify
00010 |    it under the terms of the GNU General Public License as published by
00011 |    the Free Software Foundation; either version 2, or (at your option)
00012 |    any later version.
00013 |
00014 |    Melo|GPL is distributed in the hope that it will be useful,
00015 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 |    GNU General Public License for more details.
00018 |
00019 |    You should have received a copy of the GNU General Public License
00020 |    along with Melo|GPL; see the file COPYING.  If not, write to the
00021 |    Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
00022 |    02111-1307, USA.
00023 |
00024  ****************************************************************/
00025 
00026 #ifndef _MLO_FLOAT_FIX_H_
00027 #define _MLO_FLOAT_FIX_H_
00028 
00029 
00030 
00031 #if ! defined (_MLO_FLOAT_H_)
00032 #error This header can be included only by MloFloat.h
00033 #endif
00034 
00035 
00036 
00037 /*----------------------------------------------------------------------
00038 |       Includes
00039 +---------------------------------------------------------------------*/
00040 
00041 
00042 
00043 #include "MloConfig.h"
00044 #include "MloDebug.h"
00045 
00046 
00047 
00048 /*----------------------------------------------------------------------
00049 |       Definitions
00050 +---------------------------------------------------------------------*/
00051 
00052 
00053 
00054 #if ! defined (MLO_CONFIG_HAVE_INT64)
00055 #error This implementation requires 64-bit integer type
00056 #endif
00057 
00058 
00059 
00060 typedef  MLO_CONFIG_INT64_TYPE   MLO_Float;
00061 
00062 
00063 
00064 /* Resolution in bits */
00065 enum {   MLO_FLOAT_BD   = 64  };
00066 
00067 /* Number of bits of the fractional part. Must be even */
00068 enum {   MLO_FLOAT_FRAC = 30  };
00069 
00070 #define  MLO_FLOAT_C(x) ((MLO_Float) ((x) * (65536.0 * (double) (1L << (MLO_FLOAT_FRAC - 16)))))
00071 
00072 static const MLO_Float  MLO_Float_one = ((MLO_Float) 1) << MLO_FLOAT_FRAC;
00073 static const MLO_Float  MLO_Float_frac_mask = (((MLO_Float) 1) << MLO_FLOAT_FRAC) - 1;
00074 static const MLO_Float  MLO_Float_max = ~((MLO_Float) 1) ^ (((MLO_Float)1) << (MLO_FLOAT_BD - 1));
00075 
00076 #if ! defined (NDEBUG)
00077 #define  MLO_FLOAT_IS_SAME_SIGN(a, b)  (((a) <= 0 && (b) <= 0) || ((a) >= 0 && (b) >= 0))
00078 #endif
00079 
00080 
00081 
00082 /*----------------------------------------------------------------------
00083 |       Private functions
00084 +---------------------------------------------------------------------*/
00085 static inline MLO_CONFIG_INT64_TYPE MLO_Float_Mul3232To64 (MLO_Int32 a, MLO_Int32 b)
00086 {
00087 #if defined (_MSC_VER) && defined(_X86_)
00088 
00089    __asm
00090    {
00091       mov         eax, a
00092       mov         edx, b
00093       imul        edx
00094    }
00095    /* No return */
00096 
00097 #elif defined (__GNUC__) && defined (__i386__)
00098 
00099    MLO_CONFIG_INT64_TYPE   ret_val;
00100     __asm__ (
00101       "imul %2"
00102       : "=A" (ret_val)
00103       : "a" (a), "r" (b)
00104    );
00105    return (ret_val);
00106 
00107 #else
00108 
00109    return (((MLO_CONFIG_INT64_TYPE) a) * ((MLO_CONFIG_INT64_TYPE) b));
00110 
00111 #endif
00112 }
00113 
00114 /*----------------------------------------------------------------------
00115 |       Public functions
00116 +---------------------------------------------------------------------*/
00117 static inline MLO_Float MLO_Float_ConvIntToFloat (int a)
00118 {
00119    return (((MLO_Float) a) << MLO_FLOAT_FRAC);
00120 }
00121 
00122 static inline int MLO_Float_RoundInt (MLO_Float a)
00123 {
00124    const MLO_Float   half = ((MLO_Float) 1) << (MLO_FLOAT_FRAC - 1);
00125    return ((int) ((a + half) >> MLO_FLOAT_FRAC));
00126 }
00127 
00128 static inline MLO_Float MLO_Float_Add (MLO_Float a, MLO_Float b)
00129 {
00130    return (a + b);
00131 }
00132 
00133 static inline MLO_Float MLO_Float_Sub (MLO_Float a, MLO_Float b)
00134 {
00135    return (a - b);
00136 }
00137 
00138 static inline MLO_Float MLO_Float_Neg (MLO_Float a)
00139 {
00140    return (-a);
00141 }
00142 
00143 static inline MLO_Float MLO_Float_Mul (MLO_Float a, MLO_Float b)
00144 {
00145    const MLO_Int32   a_f = ((MLO_Int32) a) & ((MLO_Int32) MLO_Float_frac_mask);
00146    const MLO_Int32   a_i = (MLO_Int32) (a >> MLO_FLOAT_FRAC);
00147    const MLO_Int32   b_f = ((MLO_Int32) b) & ((MLO_Int32) MLO_Float_frac_mask);
00148    const MLO_Int32   b_i = (MLO_Int32) (b >> MLO_FLOAT_FRAC);
00149 
00150 #if 1
00151 
00152    const MLO_Float   ab_ff = MLO_Float_Mul3232To64 (a_f, b_f) >> MLO_FLOAT_FRAC;
00153    const MLO_Float   ab_if = MLO_Float_Mul3232To64 (a_i, b_f) + MLO_Float_Mul3232To64 (a_f, b_i);
00154    const MLO_Float   ab_ii = MLO_Float_Mul3232To64 (a_i, b_i) << MLO_FLOAT_FRAC;
00155 
00156    return (ab_ii + ab_if + ab_ff);
00157 
00158 #else
00159 
00160    const MLO_Float   a_bi = a * b_i;
00161    const MLO_Float   af_bf = MLO_Float_Mul3232To64 (a_f, b_f) >> MLO_FLOAT_FRAC;
00162    const MLO_Float   ai_bf = MLO_Float_Mul3232To64 (a_i, b_f);
00163 
00164    MLO_ASSERT (MLO_FLOAT_IS_SAME_SIGN (a, a_i));
00165    MLO_ASSERT (MLO_FLOAT_IS_SAME_SIGN (b, b_i));
00166 
00167    return (a_bi + af_bf + ai_bf);
00168 
00169 #endif
00170 }
00171 
00172 static inline MLO_Float MLO_Float_MulInt (MLO_Float a, int b)
00173 {
00174    return (a * b);
00175 }
00176 
00177 /* Only for positive operands */
00178 static inline MLO_Float MLO_Float_Div (MLO_Float a, MLO_Float b)
00179 {
00180    MLO_CONFIG_INT64_TYPE   d;
00181    MLO_Float      sum;
00182 
00183    MLO_ASSERT (a >= 0);
00184    MLO_ASSERT (b > 0);
00185    MLO_ASSERT (b < (((MLO_CONFIG_INT64_TYPE)1) << (64-1 - 8)));
00186 
00187    d = a / b;  /* Integer part */
00188    sum = d << MLO_FLOAT_FRAC;
00189    a -= d * b; /* Remainder */
00190 
00191    a <<= MLO_FLOAT_FRAC - 24;
00192    d = a / b;
00193    sum += d << 24;
00194    a -= d * b;
00195 
00196    a <<= 24 - 16;
00197    d = a / b;
00198    sum += d << 16;
00199    a -= d * b;
00200 
00201    a <<= 16 - 8;
00202    d = a / b;
00203    sum += d << 8;
00204    a -= d * b;
00205 
00206    a <<= 8 - 0;
00207    d = a / b;
00208    sum += d;
00209    a -= d * b;
00210 
00211    return (sum);
00212 }
00213 
00214 static inline MLO_Float MLO_Float_DivInt (MLO_Float a, int b)
00215 {
00216    MLO_ASSERT (b > 0);
00217 
00218    return (a / b);
00219 }
00220 
00221 static inline MLO_Float MLO_Float_ScaleP2 (MLO_Float a, int b)
00222 {
00223    if (b < 0)
00224    {
00225       if (b > -64)
00226       {
00227          a >>= -b;
00228       }
00229       else
00230       {
00231          a = 0;
00232       }
00233    }
00234    else
00235    {
00236       MLO_ASSERT (b < 64);
00237 
00238       a <<= b;
00239    }
00240 
00241    return (a);
00242 }
00243 
00244 /*
00245 
00246 Algorithm based on the binary decomposition of a square:
00247 Input: r
00248 Output: a = r*r
00249 
00250 for (i = 0; i < n; ++i)
00251 {
00252    if ((r & (1 << i)) != 0)
00253    {
00254       r -= 1 << i;
00255       a += (1 << (2*i)) + (r << (i+1))
00256    }
00257 }
00258 
00259 Inverted to obtain the square root:
00260 Input: a = r*r
00261 Output: r
00262 
00263 for (i = n-1; i >= 0; --i)
00264 {
00265    an = a - (1 << (2*i)) - (r << (i+1))
00266    if (an >= 0)
00267    {
00268       a = an;
00269       r += 1 << i;
00270    }
00271 }
00272 
00273 */
00274 static inline MLO_Float MLO_Float_Sqrt (MLO_Float a)
00275 {
00276    MLO_Float      r = 0;
00277 
00278    MLO_CHECK_CST (Even_precision_requested, (MLO_FLOAT_FRAC & 1) == 0);
00279 
00280 #if 1 /* Unrolled loop */
00281 
00282 #define  MLO_FLOAT_SQRT_ITERATE(a, r, i)  do \
00283    {  \
00284       const MLO_Float   tmp = a - (((MLO_Float)1) << (2*i)) - (r << (i+1)); \
00285       if (tmp >= 0)   \
00286       {  \
00287          a = tmp;  \
00288          r += ((MLO_Float)1) << i;  \
00289       }  \
00290    } while (0)
00291 
00292    if (a >= ((MLO_Float) 1) << (30+8))
00293    {
00294       MLO_FLOAT_SQRT_ITERATE (a, r, 30);
00295       MLO_FLOAT_SQRT_ITERATE (a, r, 29);
00296       MLO_FLOAT_SQRT_ITERATE (a, r, 28);
00297       MLO_FLOAT_SQRT_ITERATE (a, r, 27);
00298       MLO_FLOAT_SQRT_ITERATE (a, r, 26);
00299       MLO_FLOAT_SQRT_ITERATE (a, r, 25);
00300       MLO_FLOAT_SQRT_ITERATE (a, r, 24);
00301       MLO_FLOAT_SQRT_ITERATE (a, r, 23);
00302       MLO_FLOAT_SQRT_ITERATE (a, r, 22);
00303       MLO_FLOAT_SQRT_ITERATE (a, r, 21);
00304       MLO_FLOAT_SQRT_ITERATE (a, r, 20);
00305       MLO_FLOAT_SQRT_ITERATE (a, r, 19);
00306    }
00307    MLO_FLOAT_SQRT_ITERATE (a, r, 18);
00308    MLO_FLOAT_SQRT_ITERATE (a, r, 17);
00309    MLO_FLOAT_SQRT_ITERATE (a, r, 16);
00310    MLO_FLOAT_SQRT_ITERATE (a, r, 15);
00311    MLO_FLOAT_SQRT_ITERATE (a, r, 14);
00312    MLO_FLOAT_SQRT_ITERATE (a, r, 13);
00313    MLO_FLOAT_SQRT_ITERATE (a, r, 12);
00314    MLO_FLOAT_SQRT_ITERATE (a, r, 11);
00315    MLO_FLOAT_SQRT_ITERATE (a, r, 10);
00316    MLO_FLOAT_SQRT_ITERATE (a, r,  9);
00317    MLO_FLOAT_SQRT_ITERATE (a, r,  8);
00318    MLO_FLOAT_SQRT_ITERATE (a, r,  7);
00319    MLO_FLOAT_SQRT_ITERATE (a, r,  6);
00320    MLO_FLOAT_SQRT_ITERATE (a, r,  5);
00321    MLO_FLOAT_SQRT_ITERATE (a, r,  4);
00322    MLO_FLOAT_SQRT_ITERATE (a, r,  3);
00323    MLO_FLOAT_SQRT_ITERATE (a, r,  2);
00324    MLO_FLOAT_SQRT_ITERATE (a, r,  1);
00325    if (r < a)
00326    {
00327       ++r;
00328    }
00329 
00330    return (r << (MLO_FLOAT_FRAC / 2));
00331 
00332 #undef   MLO_FLOAT_SQRT_ITERATE
00333 
00334 #else /* Naive implementation */
00335 
00336    MLO_Float      m = ((MLO_Float)1) << 60;
00337 
00338    MLO_ASSERT (a >= 0);
00339 
00340    do
00341    {
00342       const MLO_Float   a_new = a - m - r;
00343       r >>= 1;
00344       if (a_new >= 0)
00345       {
00346          a = a_new;
00347          r += m;
00348       }
00349       m >>= 2;
00350    }
00351    while (m > 0);
00352 
00353    return (r << (MLO_FLOAT_FRAC / 2));
00354 
00355 #endif
00356 }
00357 
00358 static inline MLO_Float MLO_Float_Lerp (MLO_Float a, MLO_Float b, int k, int bits)
00359 {
00360    const MLO_Float   diff = MLO_Float_Sub (b, a);
00361 
00362    MLO_ASSERT (k >= 0);
00363    MLO_ASSERT (k < (1L << bits));
00364    
00365    return (MLO_Float_Add (a, MLO_Float_MulInt (diff, k) >> bits));
00366 }
00367 
00368 
00369 
00370 #endif   /* _MLO_FLOAT_FIX_H_ */