slvm/float/
float_56.rs

1//! This module contains F56, a 7-byte struct that represents a 56-bit floating point number.
2//! The SLVM uses 8-byte Values but the first byte stores the type of the Value so 7 bytes remain for the float.
3//! Check out value.rs to see how the F56 is one of the enum variants of the Value struct.
4//!
5//! There are open questions about the benefits of using F56 over f32 that will depend on some peformance benchmarking.
6//! f32 is simpler and faster.
7//! Additionally, there are questions about whether to impl the Eq and Hash Traits <https://github.com/sl-sh-dev/sl-sh/issues/125>
8
9use bridge_types::LooseFloat;
10use std::fmt::{Display, Formatter};
11use std::hash::{Hash, Hasher};
12use std::str::FromStr;
13
14#[allow(rustdoc::broken_intra_doc_links)]
15/// The F56 struct represents a 56-bit floating point number using 7 bytes.
16/// Most operations on F56 are done by converting to f64, performing the operation, and then converting back to F56
17///
18/// F56 uses 1 bit for the sign, 10 bits for the exponent, and 45 bits for the mantissa.
19/// Compared to f32, it has +2 exponent bits and +22 mantissa bits.
20/// Compared to f64, it has -1 exponent bit and -7 mantissa bits.
21///   Byte 0    Byte 1    Byte 2    Byte 3    Byte 4    Byte 5    Byte 6
22/// [sEEEEEEE][EEEmmmmm][mmmmmmmm][mmmmmmmm][mmmmmmmm][mmmmmmmm][mmmmmmmm]]
23///
24/// Exponent bits range from 0 to 1023
25/// they represent -511 to +512 but are stored biased by +511
26/// the exponent of -511 is reserved for the number 0 and subnormal numbers
27/// the exponent of +512 is reserved for infinity and NaN
28/// so normal exponents range from -510 to +511
29///
30/// smallest positive subnormal value is 8.48e-168 (2^-555)
31/// smallest positive normal value is 2.98e-154 (2^-510)
32/// maximum finite value is 1.34e154
33///
34/// A f64 number like 1.00000000001 with 12 decimal digits will be 1.000000000001
35/// A f64 number like 1.000000000001 with 13 decimal digits will be converted to 1.0
36#[derive(Copy, Clone)]
37pub struct F56(pub [u8; 7]);
38
39impl Eq for F56 {}
40
41/// appropriate for `identical?` comparison
42impl PartialEq for F56 {
43    fn eq(&self, other: &Self) -> bool {
44        self.strictest_equal(other)
45    }
46}
47
48/// In order to use F56 as a key in a hash map, we need to ensure:
49/// If a == b then hash(a) == hash(b)
50impl Hash for F56 {
51    fn hash<H: Hasher>(&self, state: &mut H) {
52        state.write_u64(self.hash_for_strictest_equal())
53    }
54}
55
56impl std::fmt::Debug for F56 {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        write!(f, "F56({:?})", self.0)
59    }
60}
61
62impl Display for F56 {
63    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
64        // write!(f, "{}", f64::from(*self))
65        write!(f, "{}", F56::round_f64_to_f56_precision(f64::from(*self)))
66    }
67}
68
69impl From<f64> for F56 {
70    fn from(f: f64) -> F56 {
71        let f64_bytes = f.to_be_bytes();
72        let f64_word = u64::from_be_bytes(f64_bytes);
73        let f64_sign: u8 = (f64_word >> 63) as u8; // first bit
74        let f64_biased_exponent: u16 = ((f64_word >> 52) & 0b111_1111_1111) as u16; // first 11 bits after the sign bit
75        let true_exponent: i16 = f64_biased_exponent as i16 - 1023i16; // remove the bias of 2^10-1
76        let f64_mantissa = f64_word & 0x000f_ffff_ffff_ffff; // everything after first 12 bits
77        let mut f56_mantissa = f64_mantissa >> 7; // we lose 7 bits in mantissa
78        if F56::ROUNDUP_ENABLED {
79            let round_up_bit = f64_mantissa & 0b0100_0000u64 > 0; // the highest bit we lost (7th bit from the end)
80            f56_mantissa += if round_up_bit { 1 } else { 0 }; // round up if the 7th bit is 1
81        }
82
83        let f56_biased_exponent = match f64_biased_exponent {
84            0b111_1111_1111 => {
85                // Special case meaning the f64 is NaN or Infinity
86                // NaN's mantissa has at least one [1]
87                // Infinity's mantissa is all [0]s
88                // We need to make sure that the lost bits from the f64 mantissa don't change it from NaN to Infinity
89                if f64_mantissa == 0u64 {
90                    f56_mantissa = 0u64; // mantissa must be all [0]s to represent Infinity
91                } else {
92                    f56_mantissa = 0b11_1111_1111u64; // mantissa must be all [1]s to represent NaN
93                }
94                0b11_1111_1111u64 // 10 bits of all 1s
95            }
96            0b000_0000_0000 => {
97                // Special case meaning the f64 is 0 or subnormal
98                // in both cases the f56 will be 0
99                // F56 cannot represent any numbers that are subnormal in F64
100                // The smallest positive F56 number is 8e-168 and F64 subnormals start at 1e-308
101                f56_mantissa = 0u64;
102                0b00_0000_0000u64
103            }
104            _ if true_exponent > 511 => {
105                // TOO LARGE TO PROPERLY REPRESENT
106                // standard behavior converting from f64 to f32 is to represent this as Infinity rather than panicking
107                f56_mantissa = 0u64; // mantissa must be all [0]s to represent Infinity
108                0b11_1111_1111u64 // exponent for Infinity
109            }
110            _ if true_exponent < -510 => {
111                // This will be either a subnormal or 0
112                // Requires a subnormal f56 which will lose precision as we near 8.48e-168
113
114                // to calculate a 45 bit subnormal mantissa as 0.fraction,
115                // take the 45 bits and treat them like an unsigned int and then divide by 2^45
116
117                // value of subnormal f56 = value of f64
118                // value of subnormal f56 = 2^-510 * 0.fraction
119                // value of f64 = 2^-510 * (u45 / 2^45)
120                // value of f64 * 2^555 = u45
121
122                // multiplying the f64 by 2^555 can be done by adding 555 to the exponent
123                // we can do this safely because the max biased exponent is 2047
124                // and we know that the current biased exponent is < 513 (corresponding to true exponent of -510)
125                let new_f64_exponent = (f64_biased_exponent + 555) as u64;
126                let new_f64_word = (f64_sign as u64) << 63 | new_f64_exponent << 52 | f64_mantissa;
127                let new_f64 = f64::from_bits(new_f64_word);
128                let u45 = new_f64 as u64; // we only care about the integer part
129                f56_mantissa = u45; // mantissa is set to u45
130
131                0b00_0000_0000u64 // exponent is set to 0
132            }
133            _ => {
134                // Generic case
135                (true_exponent + 511) as u64 // add in the bias for F56
136            }
137        };
138
139        let f56_sign: u64 = f64_sign as u64;
140        let word = f56_sign << 55 | f56_biased_exponent << 45 | f56_mantissa;
141        let f56_bytes = word.to_be_bytes();
142        F56([
143            f56_bytes[1],
144            f56_bytes[2],
145            f56_bytes[3],
146            f56_bytes[4],
147            f56_bytes[5],
148            f56_bytes[6],
149            f56_bytes[7],
150        ])
151    }
152}
153
154impl From<LooseFloat> for F56 {
155    fn from(value: LooseFloat) -> Self {
156        F56(value.0)
157    }
158}
159
160impl From<F56> for LooseFloat {
161    fn from(value: F56) -> Self {
162        LooseFloat(value.0)
163    }
164}
165
166impl From<f32> for F56 {
167    fn from(f: f32) -> F56 {
168        f64::from(f).into()
169    }
170}
171
172impl From<F56> for f64 {
173    fn from(f: F56) -> f64 {
174        // f64 has 1 sign bit, 11 exponent bits, and 52 mantissa bits
175        // f56 has 1 sign bit, 10 exponent bits, and 45 mantissa bits
176        let bytes7 = f.0;
177        let f56_word = u64::from_be_bytes([
178            0, bytes7[0], bytes7[1], bytes7[2], bytes7[3], bytes7[4], bytes7[5], bytes7[6],
179        ]);
180        let f56_sign: u8 = (f56_word >> 55) as u8; // first bit
181        let f56_biased_exponent: u16 = (f56_word >> 45) as u16 & 0x3FF; // first 10 bits after the sign bit
182        let f56_mantissa: u64 = f56_word & 0x1FFF_FFFF_FFFF; // the rightmost 45 bits
183        let true_exponent = f56_biased_exponent as i16 - 511; // remove the bias of 2^9-1
184
185        let f64_biased_exponent: u64 = match f56_biased_exponent {
186            // NaN or Infinity
187            // Either way the f64 will also have an exponent of all [1]s
188            0b11_1111_1111 => 0b111_1111_1111_u64,
189
190            // Zero
191            _ if f56_biased_exponent == 0b00_0000_0000 && f56_mantissa == 0u64 => {
192                0b000_0000_0000_u64
193            }
194            // Subnormal
195            _ if f56_biased_exponent == 0b00_0000_0000 && f56_mantissa > 0u64 => {
196                // the f56's exponent is actually representing -510 instead of 0
197                // note that -510 exponent would also represented by 0x1
198                // which is why we only need to add 1022 instead of 1023 to bias this for f64
199                (true_exponent + 1022) as u64
200            }
201
202            // Generic case
203            _ => {
204                (true_exponent + 1023) as u64 // add in the bias for F64
205            }
206        };
207
208        let f64_sign = f56_sign as u64;
209        let f64_mantissa = f56_mantissa << 7_u64; // we add 7 bits in mantissa, but they're all zeros
210        let word: u64 = f64_sign << 63 | f64_biased_exponent << 52 | f64_mantissa;
211        f64::from_be_bytes(word.to_be_bytes())
212    }
213}
214
215impl From<F56> for f32 {
216    fn from(f: F56) -> f32 {
217        f64::from(f) as f32
218    }
219}
220
221impl FromStr for F56 {
222    type Err = std::num::ParseFloatError;
223    fn from_str(s: &str) -> Result<Self, Self::Err> {
224        f64::from_str(s).map(F56::from)
225    }
226}
227
228impl F56 {
229    // Largest finite F56, roughly 1.34e154
230    pub const MAX: F56 = F56([0x7F, 0xDF, 0xff, 0xff, 0xff, 0xff, 0xff]);
231    // Smallest positive normal F56, roughly 2.98e-154
232    pub const MIN_POSITIVE: F56 = F56([0x00, 0b0010_0000, 0x00, 0x00, 0x00, 0x00, 0x00]);
233    // Smallest positive subnormal F56, roughly 8.48e-168
234    pub const MIN_POSITIVE_SUBNORMAL: F56 = F56([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]);
235    // Minimum numer of decimal digits of precision (experimentally derived)
236    // for comparison, f32 has 6-9 decimal digits of precision and f64 has 15-17. I believe F56 has 12-14
237    pub const DIGITS: usize = 12;
238    // Cutoff for relative difference between an f64 and the F56's approximation
239    pub const EPSILON: f64 = 1e-10;
240    // When converting from f64 to F56 we truncate 7 bits of the mantissa
241    // We could round up if the 7th bit is 1, but this is might cause issues.
242    // Mantissas like 0xFFFF_FFFF_... can catastrophically round to 0x0000_0000_...
243    pub const ROUNDUP_ENABLED: bool = false;
244
245    pub fn round_f64_to_7_sig_figs(raw_f64: f64) -> f64 {
246        if raw_f64.is_nan() || raw_f64.is_infinite() || raw_f64 == 0.0 {
247            return raw_f64;
248        }
249        let orig_exponent_value = raw_f64.abs().log10().floor() as i32; // the number after 'e' in scientific notation
250        let target_exponent_value = 2; // exponent that we will shift this number to
251        let scale_factor = 10f64.powi(target_exponent_value - orig_exponent_value);
252        let scaled_and_rounded = (raw_f64 * scale_factor).round();
253        scaled_and_rounded / scale_factor
254    }
255
256    pub fn round_f64_to_f56_precision(raw_f64: f64) -> f64 {
257        if raw_f64.is_nan() || raw_f64.is_infinite() || raw_f64 == 0.0 {
258            return raw_f64;
259        }
260        // round to a max of F56::DIGITS sig figs
261        let orig_exponent_value = raw_f64.abs().log10().floor() as i32; // the number after 'e' in scientific notation
262        let target_exponent_value = F56::DIGITS as i32 - 1; // exponent that we will shift this number to
263        let scale_factor = 10f64.powi(target_exponent_value - orig_exponent_value);
264        let scaled_and_rounded = (raw_f64 * scale_factor).round();
265
266        scaled_and_rounded / scale_factor
267    }
268
269    /// Returns true if the two F56s's decimal forms are equal to 7 significant figures
270    /// This is a lenient type of equality suitable for human use
271    /// It preserves transitivity, reflexivity, and symmetry
272    /// It meets the requirements of the Eq trait
273    pub fn roughly_equal_using_rounding_sig_figs(&self, other: &F56) -> bool {
274        println!("Rounding two numbers to 7 sig figs, {} and {}", self, other);
275        let self_as_f64 = f64::from(*self);
276        let other_as_f64 = f64::from(*other);
277        // NaNs are considered equal, deviating from IEEE 754 floats, but allowing us to use this as a hash key
278        if self_as_f64.is_nan() && other_as_f64.is_nan() {
279            return true;
280        }
281        F56::round_f64_to_7_sig_figs(self_as_f64) == F56::round_f64_to_7_sig_figs(other_as_f64)
282    }
283
284    /// Returns true if the relative difference between the two F56s is less than F56::EPSILON
285    /// This is a lenient type of equality suitable for human use
286    /// It preserves reflexivity, and symmetry but not transitivity
287    /// It does not meet the requirements of the Eq trait
288    /// The relative difference is the absolute difference divided by the larger of the two numbers
289    pub fn roughly_equal_using_relative_difference(&self, other: &F56) -> bool {
290        if self == other {
291            return true;
292        }
293        let self_as_f64 = f64::from(*self);
294        let other_as_f64 = f64::from(*other);
295        if self_as_f64.is_nan() && other_as_f64.is_nan() {
296            return true;
297        }
298        if self_as_f64.is_infinite() && other_as_f64.is_infinite() {
299            return self_as_f64.is_sign_positive() == other_as_f64.is_sign_positive();
300        }
301        let larger = self_as_f64.abs().max(other_as_f64.abs());
302        if larger == 0.0 {
303            return true;
304        }
305        let relative_difference = (self_as_f64 - other_as_f64).abs() / larger;
306        relative_difference < F56::EPSILON
307    }
308
309    /// Returns true if the two F56s are bitwise identical
310    pub fn strictest_equal(&self, other: &F56) -> bool {
311        self.0 == other.0
312    }
313    pub fn hash_for_strictest_equal(&self) -> u64 {
314        u64::from_be_bytes([
315            0, self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], self.0[6],
316        ])
317    }
318
319    /// TODO PC #125 this ticket is outdated now but this may need to be used somewhere?
320    /// Returns true if the two F56s are bitwise identical OR if they are both NaN or both 0
321    pub fn strictly_equal_except_nan_and_0(&self, other: &F56) -> bool {
322        // if the bit patterns are identical, then they are equal
323        if self.0 == other.0 {
324            return true;
325        }
326        // if both are 0 or -0 return true
327        if self.0 == [0, 0, 0, 0, 0, 0, 0] && other.0 == [0x80, 0, 0, 0, 0, 0, 0] {
328            return true;
329        }
330        if self.0 == [0x80, 0, 0, 0, 0, 0, 0] && other.0 == [0, 0, 0, 0, 0, 0, 0] {
331            return true;
332        }
333
334        // if both are NaN return true
335        if self.is_nan() && other.is_nan() {
336            return true;
337        }
338
339        false
340    }
341
342    /// TODO PC #125 this ticket is outdated now but this may need to be used somewhere?
343    pub fn hash_for_strictly_equal_except_nan_and_0(&self) -> u64 {
344        let f56_word = u64::from_be_bytes([
345            0, self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], self.0[6],
346        ]);
347        // Special Case 1: Convert any 0 to the same bit pattern
348        if f56_word == 0x0080000000000000u64 {
349            return 0x0000000000000000u64;
350        }
351        // Special Case 2: Convert any NaN to the same bit pattern
352        if self.is_nan() {
353            return 0x7FF8000000000001u64;
354        }
355        f56_word
356    }
357
358    /// Returns true if the F56 is NaN. Note that there are many bit patterns that represent NaN
359    pub fn is_nan(&self) -> bool {
360        let bytes7 = self.0;
361        let f56_word = u64::from_be_bytes([
362            0, bytes7[0], bytes7[1], bytes7[2], bytes7[3], bytes7[4], bytes7[5], bytes7[6],
363        ]);
364        let f56_biased_exponent: u16 = (f56_word >> 45) as u16 & 0x3FF; // first 10 bits after the sign bit
365        let f56_mantissa: u64 = f56_word & 0x1FFF_FFFF_FFFF; // the rightmost 45 bits
366        // all 1s in the exponent and a nonzero mantissa means NaN
367        f56_biased_exponent == 0b11_1111_1111 && f56_mantissa > 0u64
368    }
369}
370
371#[cfg(test)]
372mod tests {
373
374    use crate::float::F56;
375
376    const MAXIMUM_ACCEPTABLE_RELATIVE_DIFFERENCE: f64 = 1e-10;
377
378    pub fn log_f32(f: f32) -> String {
379        let bytes = f.to_be_bytes();
380        let word = u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
381        let f32_biased_exponent: u8 = (word >> 23) as u8; // first 8 bits after the sign bit
382        let mut true_exponent: i16 = (f32_biased_exponent as i16) - 127; // remove the bias of 2^7-1
383        if f32_biased_exponent == 0 {
384            true_exponent = 0;
385        }
386        let f32_mantissa = word & 0x007f_ffff; // everything after first 9 bits
387        // print the f32 in scientific notation, the true exponent in decimal, and the mantissa in decimal, and the hex word
388        format!(
389            "f32: {:.5e}, true exponent: {}, mantissa: {:016x}, word: {:016x}",
390            f, true_exponent, f32_mantissa, word
391        )
392    }
393    pub fn log_f56(f: F56) -> String {
394        let bytes7 = f.0;
395        let word = u64::from_be_bytes([
396            0, bytes7[0], bytes7[1], bytes7[2], bytes7[3], bytes7[4], bytes7[5], bytes7[6],
397        ]);
398        let f56_biased_exponent: u16 = ((word >> 45) as u16) & 0x3FF; // first 10 bits after the sign bit
399        let f56_mantissa = word & 0x1FFFF_FFFF_FFFF; // everything after first 10 bits
400        let mut true_exponent = f56_biased_exponent as i16 - 511; // remove the bias of 2^9-1
401        if f56_biased_exponent == 0 {
402            true_exponent = 0;
403        }
404        format!(
405            "f56: {:.5e}, true exponent: {}, mantissa: {:016x}, word: {:016x}",
406            f64::from(f),
407            true_exponent,
408            f56_mantissa,
409            word
410        )
411    }
412    pub fn log_f64(f: f64) -> String {
413        let bytes = f.to_be_bytes();
414        let word = u64::from_be_bytes([
415            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
416        ]);
417        let f64_biased_exponent: i16 = ((word & 0x7ff0_0000_0000_0000) >> 52) as i16; // first 11 bits after the sign bit
418        let mut true_exponent: i16 = f64_biased_exponent - 1023; // remove the bias of 2^10-1
419        if f64_biased_exponent == 0 {
420            true_exponent = 0;
421        }
422        let f64_mantissa = word & 0x001f_ffff_ffff_ffff; // everything after first 11 bits
423        format!(
424            "f64: {:.5e}, true exponent: {}, mantissa: {:016x}, word: {:016x}",
425            f, true_exponent, f64_mantissa, word
426        )
427    }
428    fn debug(orig_f64: f64, index: usize) {
429        println!("index: {}", index);
430        println!("original f64      : {}", log_f64(orig_f64));
431        println!("f64 -> f32        : {}", log_f32(orig_f64 as f32));
432        println!("f64 -> f32 -> f64 : {}", log_f64((orig_f64 as f32) as f64));
433        println!("f64 -> f56        : {}", log_f56(F56::from(orig_f64)));
434        println!(
435            "f64 -> f56 -> f64 : {}",
436            log_f64(f64::from(F56::from(orig_f64)))
437        );
438        let f32_diff = (f64::from(orig_f64 as f32) - orig_f64).abs();
439        let f32_relative_difference = f32_diff / orig_f64.abs();
440        if f32_relative_difference > 0.0 {
441            println!("f32 relative difference {:.5e}", f32_relative_difference);
442        }
443        let f56_diff: f64 = (f64::from(F56::from(orig_f64)) - orig_f64).abs();
444        let f56_relative_difference = f56_diff / orig_f64.abs();
445        if f56_relative_difference > 0.0 {
446            println!("f56 relative difference {:.5e}", f56_relative_difference);
447        }
448        println!("")
449    }
450
451    fn get_regular_f64_values() -> [f64; 15] {
452        [
453            0_f64,
454            0.0,
455            1.0,
456            2.0,
457            2.5123,
458            3.0,
459            4.0,
460            5.0,
461            6.0,
462            7.0,
463            8.0,
464            9.0,
465            10.0,
466            -1.0,
467            -0.33333333333333333333333333333,
468        ]
469    }
470
471    fn get_variety_f64_values() -> [f64; 133] {
472        [
473            399.999_999_999_58527_f64,
474            399.999_999_999_585_f64,
475            399.999_999_999_58_f64,
476            399.999_999_999_6_f64,
477            399.999_999_999_f64,
478            0.000_000_012_312_312_412_412_312_3_f64,
479            0.000_000_012_312_312_452_57_f64,
480            399_999_999.999_58_f64,
481            399_999_999_999_600_000_000_000_000_000_000_0_f64,
482            399_999_999_999_600_000_000_000_000_000_000_0_f64,
483            0x0000_0000_0000_0000u64 as f64,
484            0x0000_0000_0000_0001u64 as f64,
485            0x8000_0000_0000_0000u64 as f64,
486            0x7FFF_FFFF_FFFF_FFFFu64 as f64,
487            0x7FFF_FFFF_FFFF_FFFEu64 as f64,
488            0xFFFF_FFFF_FFFF_FFFFu64 as f64,
489            0x7000_0000_0000_0000u64 as f64,
490            0xDEAD_BEEF_DEAD_BEEFu64 as f64,
491            0x1234_5678_9ABC_DEF0u64 as f64,
492            0x1111_1111_1111_1111u64 as f64,
493            0x2222_2222_2222_2222u64 as f64,
494            0x3333_3333_3333_3333u64 as f64,
495            0x4444_4444_4444_4444u64 as f64,
496            0x5555_5555_5555_5555u64 as f64,
497            0x6666_6666_6666_6666u64 as f64,
498            0x7777_7777_7777_7777u64 as f64,
499            0x8888_8888_8888_8888u64 as f64,
500            0x9999_9999_9999_9999u64 as f64,
501            0xAAAA_AAAA_AAAA_AAAAu64 as f64,
502            0xBBBB_BBBB_BBBB_BBBBu64 as f64,
503            0xCCCC_CCCC_CCCC_CCCCu64 as f64,
504            0xDDDD_DDDD_DDDD_DDDDu64 as f64,
505            0xEEEE_EEEE_EEEE_EEEEu64 as f64,
506            0xFFFF_FFFF_FFFF_FFFEu64 as f64,
507            0xFFF0_0000_0000_0001u64 as f64,
508            0xDEAD_BEEF_DEAD_BEEFu64 as f64,
509            0x1234_5678_9ABC_DEF0u64 as f64,
510            0x1111_1111_1111_1111u64 as f64,
511            0x2222_2222_2222_2222u64 as f64,
512            0x3333_3333_3333_3333u64 as f64,
513            0x4444_4444_4444_4444u64 as f64,
514            0x5555_5555_5555_5555u64 as f64,
515            0x6666_6666_6666_6666u64 as f64,
516            0x7777_7777_7777_7777u64 as f64,
517            0x8888_8888_8888_8888u64 as f64,
518            0x9999_9999_9999_9999u64 as f64,
519            0xAAAA_AAAA_AAAA_AAAAu64 as f64,
520            0xBBBB_BBBB_BBBB_BBBBu64 as f64,
521            0xCCCC_CCCC_CCCC_CCCCu64 as f64,
522            0xDDDD_DDDD_DDDD_DDDDu64 as f64,
523            0xEEEE_EEEE_EEEE_EEEEu64 as f64,
524            2.3,
525            23.0,
526            230.0,
527            2300.0,
528            23000.0,
529            230000.0,
530            23e5,
531            23e6,
532            2.3e5,
533            0.23,
534            0.023,
535            0.0023,
536            0.00023,
537            0.000023,
538            0.0000023,
539            0.23e-5,
540            -1234567890123456789012345678901.0,
541            -1.1412314e108,
542            -3.33e55,
543            -1e44,
544            -1337.1337,
545            -222.2,
546            -0.0,
547            -0.1,
548            0.0,
549            0.1,
550            0.01,
551            0.001,
552            0.249,
553            0.999,
554            1.0,
555            1.001,
556            1.01,
557            1.1,
558            1.999,
559            2.0,
560            2.2345,
561            3.0,
562            3.33333333333333333333333333333333,
563            4.0,
564            4.44,
565            5.0,
566            5.1,
567            6.0,
568            6.2,
569            7.0,
570            8.0,
571            9.0,
572            10.0,
573            100.0,
574            234.432,
575            420.69,
576            1234.0,
577            12345.0,
578            123456.0,
579            1234567.0,
580            12345678.0,
581            123456789.0,
582            1234567890.0,
583            12345678901.0,
584            123456789012.0,
585            1234567890123.0,
586            12345678901234.0,
587            123456789012345.0,
588            1234567890123456.0,
589            12345678901234567.0,
590            123456789012345678.0,
591            1234567890123456789.0,
592            12345678901234567890.0,
593            123456789012345678901.0,
594            1234567890123456789012.0,
595            12345678901234567890123.0,
596            123456789012345678901234.0,
597            1234567890123456789012345.0,
598            12345678901234567890123456.0,
599            123456789012345678901234567.0,
600            1234567890123456789012345678.0,
601            12345678901234567890123456789.0,
602            123456789012345678901234567890.0,
603            123456789012345678901234567890.1,
604            999.999e99,
605            1e100,
606        ]
607    }
608
609    fn get_edge_case_f64_values() -> [f64; 39] {
610        [
611            f64::MIN,
612            f64::MAX,
613            f64::MIN_POSITIVE,
614            f64::INFINITY,
615            f64::NEG_INFINITY,
616            f64::NAN,
617            -0.0,
618            f32::MIN_POSITIVE as f64, // 42
619            f32::MIN_POSITIVE as f64 / 3.0,
620            f32::MIN_POSITIVE as f64 / 7e5,
621            f32::MIN_POSITIVE as f64 / 7e6,
622            f32::MIN_POSITIVE as f64 / 7e7,
623            f32::MIN_POSITIVE as f64 / 7e8,
624            f32::MIN_POSITIVE as f64 / 4.123e14,
625            f32::MAX as f64,
626            f32::MAX as f64 * 3.0,
627            f32::MAX as f64 + 99.0,
628            f64::MIN_POSITIVE,
629            f64::MIN_POSITIVE / 2.0,
630            f64::MIN_POSITIVE / 10.0,
631            f64::MIN,
632            f64::MAX,
633            f64::INFINITY,
634            f64::NEG_INFINITY,
635            f64::NAN,
636            F56::EPSILON,
637            F56::EPSILON / 3.0,
638            8.4e-168,
639            8.4e-169,
640            8.4e-170,
641            0xFFFF_FFFF_FFFF_FFFEu64 as f64,
642            0xFFF0_0000_0000_0001u64 as f64,
643            0x0000_0000_0000_0000u64 as f64,
644            0x0000_0000_0000_0001u64 as f64,
645            0x8000_0000_0000_0000u64 as f64,
646            0x7FFF_FFFF_FFFF_FFFFu64 as f64,
647            0x7FFF_FFFF_FFFF_FFFEu64 as f64,
648            0xFFFF_FFFF_FFFF_FFFFu64 as f64,
649            0x7000_0000_0000_0000u64 as f64,
650        ]
651    }
652
653    fn relative_difference(a: f64, b: f64) -> f64 {
654        (a - b).abs() / b.abs()
655    }
656
657    // TODO: test the F56::MAX, F56::MIN_POSITIVE, F56::MIN_POSITIVE_SUBNORMAL cases
658
659    #[test]
660    fn f56_strings_match_f64_strings() {
661        let string_test_closure = |f64_value: &f64| {
662            let f64_string = format!("{}", f64_value);
663            let f56_value = F56::from(*f64_value);
664            let f56_string = format!("{}", f56_value);
665            if f56_string == f64_string {
666                return;
667            }
668            println!("f64: {} not quite equal\nF56: {}", f64_string, f56_string);
669            // let abs: f64 = f64_value.abs();
670            if f64_value.abs() > f64::from(F56::MAX) && f56_string.contains("inf") {
671                println!(
672                    "But F56 is expected to be infinite if f64 is outside of its range. {:.0e} > {:.0e}\n",
673                    f64_value,
674                    f64::from(F56::MAX)
675                );
676                return;
677            }
678            if f64_value.abs() < F56::MIN_POSITIVE_SUBNORMAL.into() && f64::from(f56_value) == 0.0 {
679                println!(
680                    "But F56 is expected to be 0 if f64 is outside of its range. {:.0e} < {:.0e}\n",
681                    f64_value,
682                    f64::from(F56::MIN_POSITIVE_SUBNORMAL)
683                );
684                return;
685            }
686
687            let f56_string_to_f64_value = f56_string.parse::<f64>().unwrap();
688            let f64_string_to_f64_value = f64_string.parse::<f64>().unwrap();
689            let relative_difference =
690                relative_difference(f56_string_to_f64_value, f64_string_to_f64_value);
691            if relative_difference < MAXIMUM_ACCEPTABLE_RELATIVE_DIFFERENCE {
692                println!(
693                    "But the relative difference of {} is acceptably below the maximum {}\n",
694                    relative_difference, MAXIMUM_ACCEPTABLE_RELATIVE_DIFFERENCE,
695                );
696                return;
697            }
698            // Failing this test case
699            debug(*f64_value, 0);
700            assert_eq!(
701                f64_string, f56_string,
702                "f64(left) and f56(right) string values must be equal"
703            );
704        };
705        println!("\n\n\n\nRegular f64 values");
706        for f in get_regular_f64_values().iter() {
707            string_test_closure(f);
708        }
709        println!("\n\n\n\nVariety f64 values");
710        for f in get_variety_f64_values().iter() {
711            string_test_closure(f);
712        }
713        println!("\n\n\n\nEdge case f64 values");
714        for f in get_edge_case_f64_values().iter() {
715            string_test_closure(f);
716        }
717    }
718
719    #[test]
720    fn f56_operations() {
721        let op1 = "1.1".parse::<F56>().unwrap();
722        let op2 = "1.3".parse::<F56>().unwrap();
723        let target = "2.4".parse::<F56>().unwrap();
724
725        let op1_f64 = f64::from(op1);
726        let op2_f64 = f64::from(op2);
727        let target_f64 = f64::from(target);
728
729        let calculated_f64 = op1_f64 + op2_f64;
730        assert_eq!(calculated_f64, target_f64);
731
732        // Test > on the edge of F56 precision
733        let op1 = "1.0000000000001".parse::<F56>().unwrap();
734        let op2 = "1.00000000000001".parse::<F56>().unwrap();
735        let gt = f64::from(op1) > f64::from(op2);
736        assert!(gt);
737
738        // Test < on numbers too precise for F56
739        let op1 = "1.000000000000001".parse::<F56>().unwrap(); // 16 digits (rounds to 1.0)
740        let op2 = "1.00000000000001".parse::<F56>().unwrap(); // 15 digits (rounds to 1.0)
741        let lt = f64::from(op1) < f64::from(op2);
742        let gt = f64::from(op1) > f64::from(op2);
743        assert_eq!(op1, op2);
744        assert!(!lt);
745        assert!(!gt);
746
747        // Test < on numbers straddling precision boundary
748        let op1 = "1.000000000001".parse::<F56>().unwrap(); // 13 digits (rounds to 1.0)
749        let op2 = "1.00000000001".parse::<F56>().unwrap(); // 12 digits (stays at 1.000000000001)
750        let lt = f64::from(op1) < f64::from(op2);
751        let gt = f64::from(op1) > f64::from(op2);
752        assert!(lt);
753        assert!(!gt);
754        assert_ne!(op1, op2);
755
756        // Test op1 - op2 > 0 instead of op1 > op2
757        let lt = (f64::from(op1) - f64::from(op2)) < 0.0;
758        let gt = (f64::from(op1) - f64::from(op2)) > 0.0;
759        assert!(lt);
760        assert!(!gt);
761        assert_ne!((f64::from(op1) - f64::from(op2)), 0.0);
762    }
763}
slvm/float/float_56.rs

slvm/float/
float_56.rs