POK
e_sqrt.c
1 /*
2  * POK header
3  *
4  * The following file is a part of the POK project. Any modification should
5  * made according to the POK licence. You CANNOT use this file or a part of
6  * this file is this part of a file for your own project
7  *
8  * For more information on the POK licence, please see our LICENCE FILE
9  *
10  * Please follow the coding guidelines described in doc/CODING_GUIDELINES
11  *
12  * Copyright (c) 2007-2009 POK team
13  *
14  * Created by julien on Fri Jan 30 13:44:27 2009
15  */
16 
17 /* @(#)e_sqrt.c 5.1 93/09/24 */
18 /*
19  * ====================================================
20  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
21  *
22  * Developed at SunPro, a Sun Microsystems, Inc. business.
23  * Permission to use, copy, modify, and distribute this
24  * software is freely granted, provided that this notice
25  * is preserved.
26  * ====================================================
27  */
28 
29 /* __ieee754_sqrt(x)
30  * Return correctly rounded sqrt.
31  * ------------------------------------------
32  * | Use the hardware sqrt if you have one |
33  * ------------------------------------------
34  * Method:
35  * Bit by bit method using integer arithmetic. (Slow, but portable)
36  * 1. Normalization
37  * Scale x to y in [1,4) with even powers of 2:
38  * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
39  * sqrt(x) = 2^k * sqrt(y)
40  * 2. Bit by bit computation
41  * Let q = sqrt(y) truncated to i bit after binary point (q = 1),
42  * i 0
43  * i+1 2
44  * s = 2*q , and y = 2 * ( y - q ). (1)
45  * i i i i
46  *
47  * To compute q from q , one checks whether
48  * i+1 i
49  *
50  * -(i+1) 2
51  * (q + 2 ) <= y. (2)
52  * i
53  * -(i+1)
54  * If (2) is false, then q = q ; otherwise q = q + 2 .
55  * i+1 i i+1 i
56  *
57  * With some algebric manipulation, it is not difficult to see
58  * that (2) is equivalent to
59  * -(i+1)
60  * s + 2 <= y (3)
61  * i i
62  *
63  * The advantage of (3) is that s and y can be computed by
64  * i i
65  * the following recurrence formula:
66  * if (3) is false
67  *
68  * s = s , y = y ; (4)
69  * i+1 i i+1 i
70  *
71  * otherwise,
72  * -i -(i+1)
73  * s = s + 2 , y = y - s - 2 (5)
74  * i+1 i i+1 i i
75  *
76  * One may easily use induction to prove (4) and (5).
77  * Note. Since the left hand side of (3) contain only i+2 bits,
78  * it does not necessary to do a full (53-bit) comparison
79  * in (3).
80  * 3. Final rounding
81  * After generating the 53 bits result, we compute one more bit.
82  * Together with the remainder, we can decide whether the
83  * result is exact, bigger than 1/2ulp, or less than 1/2ulp
84  * (it will never equal to 1/2ulp).
85  * The rounding mode can be detected by checking whether
86  * huge + tiny is equal to huge, and whether huge - tiny is
87  * equal to huge for some floating point number "huge" and "tiny".
88  *
89  * Special cases:
90  * sqrt(+-0) = +-0 ... exact
91  * sqrt(inf) = inf
92  * sqrt(-ve) = NaN ... with invalid signal
93  * sqrt(NaN) = NaN ... with invalid signal for signaling NaN
94  *
95  * Other methods : see the appended file at the end of the program below.
96  *---------------
97  */
98 
99 #ifdef POK_NEEDS_LIBMATH
100 
101 #include <types.h>
102 #include "math_private.h"
103 
104 static const double one = 1.0, tiny=1.0e-300;
105 
106 double
107 __ieee754_sqrt(double x)
108 {
109  double z;
110  int32_t sign = (int)0x80000000;
111  int32_t ix0,s0,q,m,t,i;
112  uint32_t r,t1,s1,ix1,q1;
113 
114  EXTRACT_WORDS(ix0,ix1,x);
115 
116  /* take care of Inf and NaN */
117  if((ix0&0x7ff00000)==0x7ff00000) {
118  return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
119  sqrt(-inf)=sNaN */
120  }
121  /* take care of zero */
122  if(ix0<=0) {
123  if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
124  else if(ix0<0)
125  return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
126  }
127  /* normalize x */
128  m = (ix0>>20);
129  if(m==0) { /* subnormal x */
130  while(ix0==0) {
131  m -= 21;
132  ix0 |= (ix1>>11); ix1 <<= 21;
133  }
134  for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
135  m -= i-1;
136  ix0 |= (ix1>>(32-i));
137  ix1 <<= i;
138  }
139  m -= 1023; /* unbias exponent */
140  ix0 = (ix0&0x000fffff)|0x00100000;
141  if(m&1){ /* odd m, double x to make it even */
142  ix0 += ix0 + ((ix1&sign)>>31);
143  ix1 += ix1;
144  }
145  m >>= 1; /* m = [m/2] */
146 
147  /* generate sqrt(x) bit by bit */
148  ix0 += ix0 + ((ix1&sign)>>31);
149  ix1 += ix1;
150  q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
151  r = 0x00200000; /* r = moving bit from right to left */
152 
153  while(r!=0) {
154  t = s0+r;
155  if(t<=ix0) {
156  s0 = t+r;
157  ix0 -= t;
158  q += r;
159  }
160  ix0 += ix0 + ((ix1&sign)>>31);
161  ix1 += ix1;
162  r>>=1;
163  }
164 
165  r = sign;
166  while(r!=0) {
167  t1 = s1+r;
168  t = s0;
169  if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
170  s1 = t1+r;
171  if(((t1&sign)==(uint32_t)sign)&&(s1&sign)==0) s0 += 1;
172  ix0 -= t;
173  if (ix1 < t1) ix0 -= 1;
174  ix1 -= t1;
175  q1 += r;
176  }
177  ix0 += ix0 + ((ix1&sign)>>31);
178  ix1 += ix1;
179  r>>=1;
180  }
181 
182  /* use floating add to find out rounding direction */
183  if((ix0|ix1)!=0) {
184  z = one-tiny; /* trigger inexact flag */
185  if (z>=one) {
186  z = one+tiny;
187  if (q1==(uint32_t)0xffffffff) { q1=0; q += 1;}
188  else if (z>one) {
189  if (q1==(uint32_t)0xfffffffe) q+=1;
190  q1+=2;
191  } else
192  q1 += (q1&1);
193  }
194  }
195  ix0 = (q>>1)+0x3fe00000;
196  ix1 = q1>>1;
197  if ((q&1)==1) ix1 |= sign;
198  ix0 += (m <<20);
199  INSERT_WORDS(z,ix0,ix1);
200  return z;
201 }
202 
203 /*
204 Other methods (use floating-point arithmetic)
205 -------------
206 (This is a copy of a drafted paper by Prof W. Kahan
207 and K.C. Ng, written in May, 1986)
208 
209  Two algorithms are given here to implement sqrt(x)
210  (IEEE double precision arithmetic) in software.
211  Both supply sqrt(x) correctly rounded. The first algorithm (in
212  Section A) uses newton iterations and involves four divisions.
213  The second one uses reciproot iterations to avoid division, but
214  requires more multiplications. Both algorithms need the ability
215  to chop results of arithmetic operations instead of round them,
216  and the INEXACT flag to indicate when an arithmetic operation
217  is executed exactly with no roundoff error, all part of the
218  standard (IEEE 754-1985). The ability to perform shift, add,
219  subtract and logical AND operations upon 32-bit words is needed
220  too, though not part of the standard.
221 
222 A. sqrt(x) by Newton Iteration
223 
224  (1) Initial approximation
225 
226  Let x0 and x1 be the leading and the trailing 32-bit words of
227  a floating point number x (in IEEE double format) respectively
228 
229  1 11 52 ...widths
230  ------------------------------------------------------
231  x: |s| e | f |
232  ------------------------------------------------------
233  msb lsb msb lsb ...order
234 
235 
236  ------------------------ ------------------------
237  x0: |s| e | f1 | x1: | f2 |
238  ------------------------ ------------------------
239 
240  By performing shifts and subtracts on x0 and x1 (both regarded
241  as integers), we obtain an 8-bit approximation of sqrt(x) as
242  follows.
243 
244  k := (x0>>1) + 0x1ff80000;
245  y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
246  Here k is a 32-bit integer and T1[] is an integer array containing
247  correction terms. Now magically the floating value of y (y's
248  leading 32-bit word is y0, the value of its trailing word is 0)
249  approximates sqrt(x) to almost 8-bit.
250 
251  Value of T1:
252  static int T1[32]= {
253  0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
254  29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
255  83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
256  16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
257 
258  (2) Iterative refinement
259 
260  Apply Heron's rule three times to y, we have y approximates
261  sqrt(x) to within 1 ulp (Unit in the Last Place):
262 
263  y := (y+x/y)/2 ... almost 17 sig. bits
264  y := (y+x/y)/2 ... almost 35 sig. bits
265  y := y-(y-x/y)/2 ... within 1 ulp
266 
267 
268  Remark 1.
269  Another way to improve y to within 1 ulp is:
270 
271  y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
272  y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
273 
274  2
275  (x-y )*y
276  y := y + 2* ---------- ...within 1 ulp
277  2
278  3y + x
279 
280 
281  This formula has one division fewer than the one above; however,
282  it requires more multiplications and additions. Also x must be
283  scaled in advance to avoid spurious overflow in evaluating the
284  expression 3y*y+x. Hence it is not recommended uless division
285  is slow. If division is very slow, then one should use the
286  reciproot algorithm given in section B.
287 
288  (3) Final adjustment
289 
290  By twiddling y's last bit it is possible to force y to be
291  correctly rounded according to the prevailing rounding mode
292  as follows. Let r and i be copies of the rounding mode and
293  inexact flag before entering the square root program. Also we
294  use the expression y+-ulp for the next representable floating
295  numbers (up and down) of y. Note that y+-ulp = either fixed
296  point y+-1, or multiply y by nextafter(1,+-inf) in chopped
297  mode.
298 
299  I := FALSE; ... reset INEXACT flag I
300  R := RZ; ... set rounding mode to round-toward-zero
301  z := x/y; ... chopped quotient, possibly inexact
302  If(not I) then { ... if the quotient is exact
303  if(z=y) {
304  I := i; ... restore inexact flag
305  R := r; ... restore rounded mode
306  return sqrt(x):=y.
307  } else {
308  z := z - ulp; ... special rounding
309  }
310  }
311  i := TRUE; ... sqrt(x) is inexact
312  If (r=RN) then z=z+ulp ... rounded-to-nearest
313  If (r=RP) then { ... round-toward-+inf
314  y = y+ulp; z=z+ulp;
315  }
316  y := y+z; ... chopped sum
317  y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
318  I := i; ... restore inexact flag
319  R := r; ... restore rounded mode
320  return sqrt(x):=y.
321 
322  (4) Special cases
323 
324  Square root of +inf, +-0, or NaN is itself;
325  Square root of a negative number is NaN with invalid signal.
326 
327 
328 B. sqrt(x) by Reciproot Iteration
329 
330  (1) Initial approximation
331 
332  Let x0 and x1 be the leading and the trailing 32-bit words of
333  a floating point number x (in IEEE double format) respectively
334  (see section A). By performing shifs and subtracts on x0 and y0,
335  we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
336 
337  k := 0x5fe80000 - (x0>>1);
338  y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
339 
340  Here k is a 32-bit integer and T2[] is an integer array
341  containing correction terms. Now magically the floating
342  value of y (y's leading 32-bit word is y0, the value of
343  its trailing word y1 is set to zero) approximates 1/sqrt(x)
344  to almost 7.8-bit.
345 
346  Value of T2:
347  static int T2[64]= {
348  0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
349  0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
350  0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
351  0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
352  0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
353  0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
354  0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
355  0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
356 
357  (2) Iterative refinement
358 
359  Apply Reciproot iteration three times to y and multiply the
360  result by x to get an approximation z that matches sqrt(x)
361  to about 1 ulp. To be exact, we will have
362  -1ulp < sqrt(x)-z<1.0625ulp.
363 
364  ... set rounding mode to Round-to-nearest
365  y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x)
366  y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
367  ... special arrangement for better accuracy
368  z := x*y ... 29 bits to sqrt(x), with z*y<1
369  z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x)
370 
371  Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
372  (a) the term z*y in the final iteration is always less than 1;
373  (b) the error in the final result is biased upward so that
374  -1 ulp < sqrt(x) - z < 1.0625 ulp
375  instead of |sqrt(x)-z|<1.03125ulp.
376 
377  (3) Final adjustment
378 
379  By twiddling y's last bit it is possible to force y to be
380  correctly rounded according to the prevailing rounding mode
381  as follows. Let r and i be copies of the rounding mode and
382  inexact flag before entering the square root program. Also we
383  use the expression y+-ulp for the next representable floating
384  numbers (up and down) of y. Note that y+-ulp = either fixed
385  point y+-1, or multiply y by nextafter(1,+-inf) in chopped
386  mode.
387 
388  R := RZ; ... set rounding mode to round-toward-zero
389  switch(r) {
390  case RN: ... round-to-nearest
391  if(x<= z*(z-ulp)...chopped) z = z - ulp; else
392  if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
393  break;
394  case RZ:case RM: ... round-to-zero or round-to--inf
395  R:=RP; ... reset rounding mod to round-to-+inf
396  if(x<z*z ... rounded up) z = z - ulp; else
397  if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
398  break;
399  case RP: ... round-to-+inf
400  if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
401  if(x>z*z ...chopped) z = z+ulp;
402  break;
403  }
404 
405  Remark 3. The above comparisons can be done in fixed point. For
406  example, to compare x and w=z*z chopped, it suffices to compare
407  x1 and w1 (the trailing parts of x and w), regarding them as
408  two's complement integers.
409 
410  ...Is z an exact square root?
411  To determine whether z is an exact square root of x, let z1 be the
412  trailing part of z, and also let x0 and x1 be the leading and
413  trailing parts of x.
414 
415  If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
416  I := 1; ... Raise Inexact flag: z is not exact
417  else {
418  j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
419  k := z1 >> 26; ... get z's 25-th and 26-th
420  fraction bits
421  I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
422  }
423  R:= r ... restore rounded mode
424  return sqrt(x):=z.
425 
426  If multiplication is cheaper than the foregoing red tape, the
427  Inexact flag can be evaluated by
428 
429  I := i;
430  I := (z*z!=x) or I.
431 
432  Note that z*z can overwrite I; this value must be sensed if it is
433  True.
434 
435  Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
436  zero.
437 
438  --------------------
439  z1: | f2 |
440  --------------------
441  bit 31 bit 0
442 
443  Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
444  or even of logb(x) have the following relations:
445 
446  -------------------------------------------------
447  bit 27,26 of z1 bit 1,0 of x1 logb(x)
448  -------------------------------------------------
449  00 00 odd and even
450  01 01 even
451  10 10 odd
452  10 00 even
453  11 01 even
454  -------------------------------------------------
455 
456  (4) Special cases (see (4) of Section A).
457 
458  */
459 
460 #endif
461