EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
half.h
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file half.h
1 
2 //
3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4 // Digital Ltd. LLC
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 // * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // * Neither the name of Industrial Light & Magic nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
34 
35 // Primary authors:
36 // Florian Kainz <kainz@ilm.com>
37 // Rod Bogart <rgb@ilm.com>
38 
39 //---------------------------------------------------------------------------
40 //
41 // half -- a 16-bit floating point number class:
42 //
43 // Type half can represent positive and negative numbers whose
44 // magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
45 // error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46 // with an absolute error of 6.0e-8. All integers from -2048 to
47 // +2048 can be represented exactly.
48 //
49 // Type half behaves (almost) like the built-in C++ floating point
50 // types. In arithmetic expressions, half, float and double can be
51 // mixed freely. Here are a few examples:
52 //
53 // half a (3.5);
54 // float b (a + sqrt (a));
55 // a += b;
56 // b += a;
57 // b = a + 7;
58 //
59 // Conversions from half to float are lossless; all half numbers
60 // are exactly representable as floats.
61 //
62 // Conversions from float to half may not preserve a float's value
63 // exactly. If a float is not representable as a half, then the
64 // float value is rounded to the nearest representable half. If a
65 // float value is exactly in the middle between the two closest
66 // representable half values, then the float value is rounded to
67 // the closest half whose least significant bit is zero.
68 //
69 // Overflows during float-to-half conversions cause arithmetic
70 // exceptions. An overflow occurs when the float value to be
71 // converted is too large to be represented as a half, or if the
72 // float value is an infinity or a NAN.
73 //
74 // The implementation of type half makes the following assumptions
75 // about the implementation of the built-in C++ types:
76 //
77 // float is an IEEE 754 single-precision number
78 // sizeof (float) == 4
79 // sizeof (unsigned int) == sizeof (float)
80 // alignof (unsigned int) == alignof (float)
81 // sizeof (unsigned short) == 2
82 //
83 //---------------------------------------------------------------------------
84 
85 #ifndef _HALF_H_
86 #define _HALF_H_
87 
88 #include <iostream>
89 
90 #if defined(OPENEXR_DLL)
91  #if defined(HALF_EXPORTS)
92  #define HALF_EXPORT __declspec(dllexport)
93  #else
94  #define HALF_EXPORT __declspec(dllimport)
95  #endif
96  #define HALF_EXPORT_CONST
97 #else
98  #define HALF_EXPORT
99  #define HALF_EXPORT_CONST const
100 #endif
101 
103 {
104  public:
105 
106  //-------------
107  // Constructors
108  //-------------
109 
110  half (); // no initialization
111  half (float f);
112 
113 
114  //--------------------
115  // Conversion to float
116  //--------------------
117 
118  operator float () const;
119 
120 
121  //------------
122  // Unary minus
123  //------------
124 
125  half operator - () const;
126 
127 
128  //-----------
129  // Assignment
130  //-----------
131 
132  half & operator = (half h);
133  half & operator = (float f);
134 
135  half & operator += (half h);
136  half & operator += (float f);
137 
138  half & operator -= (half h);
139  half & operator -= (float f);
140 
141  half & operator *= (half h);
142  half & operator *= (float f);
143 
144  half & operator /= (half h);
145  half & operator /= (float f);
146 
147 
148  //---------------------------------------------------------
149  // Round to n-bit precision (n should be between 0 and 10).
150  // After rounding, the significand's 10-n least significant
151  // bits will be zero.
152  //---------------------------------------------------------
153 
154  half round (unsigned int n) const;
155 
156 
157  //--------------------------------------------------------------------
158  // Classification:
159  //
160  // h.isFinite() returns true if h is a normalized number,
161  // a denormalized number or zero
162  //
163  // h.isNormalized() returns true if h is a normalized number
164  //
165  // h.isDenormalized() returns true if h is a denormalized number
166  //
167  // h.isZero() returns true if h is zero
168  //
169  // h.isNan() returns true if h is a NAN
170  //
171  // h.isInfinity() returns true if h is a positive
172  // or a negative infinity
173  //
174  // h.isNegative() returns true if the sign bit of h
175  // is set (negative)
176  //--------------------------------------------------------------------
177 
178  bool isFinite () const;
179  bool isNormalized () const;
180  bool isDenormalized () const;
181  bool isZero () const;
182  bool isNan () const;
183  bool isInfinity () const;
184  bool isNegative () const;
185 
186 
187  //--------------------------------------------
188  // Special values
189  //
190  // posInf() returns +infinity
191  //
192  // negInf() returns -infinity
193  //
194  // qNan() returns a NAN with the bit
195  // pattern 0111111111111111
196  //
197  // sNan() returns a NAN with the bit
198  // pattern 0111110111111111
199  //--------------------------------------------
200 
201  static half posInf ();
202  static half negInf ();
203  static half qNan ();
204  static half sNan ();
205 
206 
207  //--------------------------------------
208  // Access to the internal representation
209  //--------------------------------------
210 
211  unsigned short bits () const;
212  void setBits (unsigned short bits);
213 
214 
215  public:
216 
217  union uif
218  {
219  unsigned int i;
220  float f;
221  };
222 
223  private:
224 
225  static short convert (int i);
226  static float overflow ();
227 
228  unsigned short _h;
229 
230  static HALF_EXPORT_CONST uif _toFloat[1 << 16];
231  static HALF_EXPORT_CONST unsigned short _eLut[1 << 9];
232 };
233 
234 //-----------
235 // Stream I/O
236 //-----------
237 
238 HALF_EXPORT std::ostream & operator << (std::ostream &os, half h);
239 HALF_EXPORT std::istream & operator >> (std::istream &is, half &h);
240 
241 
242 //----------
243 // Debugging
244 //----------
245 
246 HALF_EXPORT void printBits (std::ostream &os, half h);
247 HALF_EXPORT void printBits (std::ostream &os, float f);
248 HALF_EXPORT void printBits (char c[19], half h);
249 HALF_EXPORT void printBits (char c[35], float f);
250 
251 
252 //-------------------------------------------------------------------------
253 // Limits
254 //
255 // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
256 // constants, but at least one other compiler (gcc 2.96) produces incorrect
257 // results if they are.
258 //-------------------------------------------------------------------------
259 
260 #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
261 
262  #define HALF_MIN 5.96046448e-08f // Smallest positive half
263 
264  #define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half
265 
266  #define HALF_MAX 65504.0f // Largest positive half
267 
268  #define HALF_EPSILON 0.00097656f // Smallest positive e for which
269  // half (1.0 + e) != half (1.0)
270 #else
271 
272  #define HALF_MIN 5.96046448e-08 // Smallest positive half
273 
274  #define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half
275 
276  #define HALF_MAX 65504.0 // Largest positive half
277 
278  #define HALF_EPSILON 0.00097656 // Smallest positive e for which
279  // half (1.0 + e) != half (1.0)
280 #endif
281 
282 
283 #define HALF_MANT_DIG 11 // Number of digits in mantissa
284  // (significand + hidden leading 1)
285 
286 #define HALF_DIG 2 // Number of base 10 digits that
287  // can be represented without change
288 
289 #define HALF_RADIX 2 // Base of the exponent
290 
291 #define HALF_MIN_EXP -13 // Minimum negative integer such that
292  // HALF_RADIX raised to the power of
293  // one less than that integer is a
294  // normalized half
295 
296 #define HALF_MAX_EXP 16 // Maximum positive integer such that
297  // HALF_RADIX raised to the power of
298  // one less than that integer is a
299  // normalized half
300 
301 #define HALF_MIN_10_EXP -4 // Minimum positive integer such
302  // that 10 raised to that power is
303  // a normalized half
304 
305 #define HALF_MAX_10_EXP 4 // Maximum positive integer such
306  // that 10 raised to that power is
307  // a normalized half
308 
309 
310 //---------------------------------------------------------------------------
311 //
312 // Implementation --
313 //
314 // Representation of a float:
315 //
316 // We assume that a float, f, is an IEEE 754 single-precision
317 // floating point number, whose bits are arranged as follows:
318 //
319 // 31 (msb)
320 // |
321 // | 30 23
322 // | | |
323 // | | | 22 0 (lsb)
324 // | | | | |
325 // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
326 //
327 // s e m
328 //
329 // S is the sign-bit, e is the exponent and m is the significand.
330 //
331 // If e is between 1 and 254, f is a normalized number:
332 //
333 // s e-127
334 // f = (-1) * 2 * 1.m
335 //
336 // If e is 0, and m is not zero, f is a denormalized number:
337 //
338 // s -126
339 // f = (-1) * 2 * 0.m
340 //
341 // If e and m are both zero, f is zero:
342 //
343 // f = 0.0
344 //
345 // If e is 255, f is an "infinity" or "not a number" (NAN),
346 // depending on whether m is zero or not.
347 //
348 // Examples:
349 //
350 // 0 00000000 00000000000000000000000 = 0.0
351 // 0 01111110 00000000000000000000000 = 0.5
352 // 0 01111111 00000000000000000000000 = 1.0
353 // 0 10000000 00000000000000000000000 = 2.0
354 // 0 10000000 10000000000000000000000 = 3.0
355 // 1 10000101 11110000010000000000000 = -124.0625
356 // 0 11111111 00000000000000000000000 = +infinity
357 // 1 11111111 00000000000000000000000 = -infinity
358 // 0 11111111 10000000000000000000000 = NAN
359 // 1 11111111 11111111111111111111111 = NAN
360 //
361 // Representation of a half:
362 //
363 // Here is the bit-layout for a half number, h:
364 //
365 // 15 (msb)
366 // |
367 // | 14 10
368 // | | |
369 // | | | 9 0 (lsb)
370 // | | | | |
371 // X XXXXX XXXXXXXXXX
372 //
373 // s e m
374 //
375 // S is the sign-bit, e is the exponent and m is the significand.
376 //
377 // If e is between 1 and 30, h is a normalized number:
378 //
379 // s e-15
380 // h = (-1) * 2 * 1.m
381 //
382 // If e is 0, and m is not zero, h is a denormalized number:
383 //
384 // S -14
385 // h = (-1) * 2 * 0.m
386 //
387 // If e and m are both zero, h is zero:
388 //
389 // h = 0.0
390 //
391 // If e is 31, h is an "infinity" or "not a number" (NAN),
392 // depending on whether m is zero or not.
393 //
394 // Examples:
395 //
396 // 0 00000 0000000000 = 0.0
397 // 0 01110 0000000000 = 0.5
398 // 0 01111 0000000000 = 1.0
399 // 0 10000 0000000000 = 2.0
400 // 0 10000 1000000000 = 3.0
401 // 1 10101 1111000001 = -124.0625
402 // 0 11111 0000000000 = +infinity
403 // 1 11111 0000000000 = -infinity
404 // 0 11111 1000000000 = NAN
405 // 1 11111 1111111111 = NAN
406 //
407 // Conversion:
408 //
409 // Converting from a float to a half requires some non-trivial bit
410 // manipulations. In some cases, this makes conversion relatively
411 // slow, but the most common case is accelerated via table lookups.
412 //
413 // Converting back from a half to a float is easier because we don't
414 // have to do any rounding. In addition, there are only 65536
415 // different half numbers; we can convert each of those numbers once
416 // and store the results in a table. Later, all conversions can be
417 // done using only simple table lookups.
418 //
419 //---------------------------------------------------------------------------
420 
421 
422 //--------------------
423 // Simple constructors
424 //--------------------
425 
426 inline
428 {
429  // no initialization
430 }
431 
432 
433 //----------------------------
434 // Half-from-float constructor
435 //----------------------------
436 
437 inline
438 half::half (float f)
439 {
440  uif x;
441 
442  x.f = f;
443 
444  if (f == 0)
445  {
446  //
447  // Common special case - zero.
448  // Preserve the zero's sign bit.
449  //
450 
451  _h = (x.i >> 16);
452  }
453  else
454  {
455  //
456  // We extract the combined sign and exponent, e, from our
457  // floating-point number, f. Then we convert e to the sign
458  // and exponent of the half number via a table lookup.
459  //
460  // For the most common case, where a normalized half is produced,
461  // the table lookup returns a non-zero value; in this case, all
462  // we have to do is round f's significand to 10 bits and combine
463  // the result with e.
464  //
465  // For all other cases (overflow, zeroes, denormalized numbers
466  // resulting from underflow, infinities and NANs), the table
467  // lookup returns zero, and we call a longer, non-inline function
468  // to do the float-to-half conversion.
469  //
470 
471  int e = (x.i >> 23) & 0x000001ff;
472 
473  e = _eLut[e];
474 
475  if (e)
476  {
477  //
478  // Simple case - round the significand, m, to 10
479  // bits and combine it with the sign and exponent.
480  //
481 
482  int m = x.i & 0x007fffff;
483  _h = e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);
484  }
485  else
486  {
487  //
488  // Difficult case - call a function.
489  //
490 
491  _h = convert (x.i);
492  }
493  }
494 }
495 
496 
497 //------------------------------------------
498 // Half-to-float conversion via table lookup
499 //------------------------------------------
500 
501 inline
502 half::operator float () const
503 {
504  return _toFloat[_h].f;
505 }
506 
507 
508 //-------------------------
509 // Round to n-bit precision
510 //-------------------------
511 
512 inline half
513 half::round (unsigned int n) const
514 {
515  //
516  // Parameter check.
517  //
518 
519  if (n >= 10)
520  return *this;
521 
522  //
523  // Disassemble h into the sign, s,
524  // and the combined exponent and significand, e.
525  //
526 
527  unsigned short s = _h & 0x8000;
528  unsigned short e = _h & 0x7fff;
529 
530  //
531  // Round the exponent and significand to the nearest value
532  // where ones occur only in the (10-n) most significant bits.
533  // Note that the exponent adjusts automatically if rounding
534  // up causes the significand to overflow.
535  //
536 
537  e >>= 9 - n;
538  e += e & 1;
539  e <<= 9 - n;
540 
541  //
542  // Check for exponent overflow.
543  //
544 
545  if (e >= 0x7c00)
546  {
547  //
548  // Overflow occurred -- truncate instead of rounding.
549  //
550 
551  e = _h;
552  e >>= 10 - n;
553  e <<= 10 - n;
554  }
555 
556  //
557  // Put the original sign bit back.
558  //
559 
560  half h;
561  h._h = s | e;
562 
563  return h;
564 }
565 
566 
567 //-----------------------
568 // Other inline functions
569 //-----------------------
570 
571 inline half
573 {
574  half h;
575  h._h = _h ^ 0x8000;
576  return h;
577 }
578 
579 
580 inline half &
582 {
583  _h = h._h;
584  return *this;
585 }
586 
587 
588 inline half &
590 {
591  *this = half (f);
592  return *this;
593 }
594 
595 
596 inline half &
598 {
599  *this = half (float (*this) + float (h));
600  return *this;
601 }
602 
603 
604 inline half &
606 {
607  *this = half (float (*this) + f);
608  return *this;
609 }
610 
611 
612 inline half &
614 {
615  *this = half (float (*this) - float (h));
616  return *this;
617 }
618 
619 
620 inline half &
622 {
623  *this = half (float (*this) - f);
624  return *this;
625 }
626 
627 
628 inline half &
630 {
631  *this = half (float (*this) * float (h));
632  return *this;
633 }
634 
635 
636 inline half &
638 {
639  *this = half (float (*this) * f);
640  return *this;
641 }
642 
643 
644 inline half &
646 {
647  *this = half (float (*this) / float (h));
648  return *this;
649 }
650 
651 
652 inline half &
654 {
655  *this = half (float (*this) / f);
656  return *this;
657 }
658 
659 
660 inline bool
662 {
663  unsigned short e = (_h >> 10) & 0x001f;
664  return e < 31;
665 }
666 
667 
668 inline bool
670 {
671  unsigned short e = (_h >> 10) & 0x001f;
672  return e > 0 && e < 31;
673 }
674 
675 
676 inline bool
678 {
679  unsigned short e = (_h >> 10) & 0x001f;
680  unsigned short m = _h & 0x3ff;
681  return e == 0 && m != 0;
682 }
683 
684 
685 inline bool
686 half::isZero () const
687 {
688  return (_h & 0x7fff) == 0;
689 }
690 
691 
692 inline bool
693 half::isNan () const
694 {
695  unsigned short e = (_h >> 10) & 0x001f;
696  unsigned short m = _h & 0x3ff;
697  return e == 31 && m != 0;
698 }
699 
700 
701 inline bool
703 {
704  unsigned short e = (_h >> 10) & 0x001f;
705  unsigned short m = _h & 0x3ff;
706  return e == 31 && m == 0;
707 }
708 
709 
710 inline bool
712 {
713  return (_h & 0x8000) != 0;
714 }
715 
716 
717 inline half
719 {
720  half h;
721  h._h = 0x7c00;
722  return h;
723 }
724 
725 
726 inline half
728 {
729  half h;
730  h._h = 0xfc00;
731  return h;
732 }
733 
734 
735 inline half
737 {
738  half h;
739  h._h = 0x7fff;
740  return h;
741 }
742 
743 
744 inline half
746 {
747  half h;
748  h._h = 0x7dff;
749  return h;
750 }
751 
752 
753 inline unsigned short
754 half::bits () const
755 {
756  return _h;
757 }
758 
759 
760 inline void
761 half::setBits (unsigned short bits)
762 {
763  _h = bits;
764 }
765 
766 #endif