//! Shared trait and methods for parsing floats.
//!
//! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust),
//! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust.
// NOTE: We never want to disable multi-digit optimizations when parsing our floats,
// since the nanoseconds it saves on branching is irrelevant when considering decimal
// points and fractional digits and it majorly improves longer floats.
#![doc(hidden)]
#[cfg(not(feature = "compact"))]
use lexical_parse_integer::algorithm;
#[cfg(feature = "f16")]
use lexical_util::bf16::bf16;
use lexical_util::digit::{char_to_digit_const, char_to_valid_digit_const};
use lexical_util::error::Error;
#[cfg(feature = "f16")]
use lexical_util::f16::f16;
use lexical_util::format::NumberFormat;
use lexical_util::iterator::{AsBytes, Bytes, DigitsIter, Iter};
use lexical_util::result::Result;
use lexical_util::step::u64_step;
#[cfg(any(feature = "compact", feature = "radix"))]
use crate::bellerophon::bellerophon;
#[cfg(feature = "power-of-two")]
use crate::binary::{binary, slow_binary};
use crate::float::{extended_to_float, ExtendedFloat80, LemireFloat};
#[cfg(not(feature = "compact"))]
use crate::lemire::lemire;
use crate::number::Number;
use crate::options::Options;
use crate::shared;
use crate::slow::slow_radix;
// API
// ---
/// Check if the radix is a power-of-2.
#[cfg(feature = "power-of-two")]
macro_rules! is_power_two {
($radix:expr) => {
matches!($radix, 2 | 4 | 8 | 16 | 32)
};
}
/// Check if the radix is valid and error otherwise
#[cfg(feature = "power-of-two")]
macro_rules! check_radix {
($format:ident) => {{
let format = NumberFormat::<{ $format }> {};
if format.error() != Error::Success {
return Err(Error::InvalidRadix);
} else if format.radix() != format.exponent_base() {
let valid_radix = matches!(
(format.radix(), format.exponent_base()),
(4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
);
if !valid_radix {
return Err(Error::InvalidRadix);
}
}
}};
}
/// Check if the decimal radix is valid and error otherwise.
#[cfg(not(feature = "power-of-two"))]
macro_rules! check_radix {
($format:ident) => {{
let format = NumberFormat::<{ $format }> {};
if format.error() != Error::Success {
return Err(Error::InvalidRadix);
}
}};
}
/// Parse integer trait, implemented in terms of the optimized back-end.
pub trait ParseFloat: LemireFloat {
/// Forward complete parser parameters to the backend.
#[cfg_attr(not(feature = "compact"), inline(always))]
fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
check_radix!(FORMAT);
parse_complete::<Self, FORMAT>(bytes, options)
}
/// Forward partial parser parameters to the backend.
#[cfg_attr(not(feature = "compact"), inline(always))]
fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<(Self, usize)> {
check_radix!(FORMAT);
parse_partial::<Self, FORMAT>(bytes, options)
}
/// Forward complete parser parameters to the backend, using only the fast
/// path.
#[cfg_attr(not(feature = "compact"), inline(always))]
fn fast_path_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
check_radix!(FORMAT);
fast_path_complete::<Self, FORMAT>(bytes, options)
}
/// Forward partial parser parameters to the backend, using only the fast
/// path.
#[cfg_attr(not(feature = "compact"), inline(always))]
fn fast_path_partial<const FORMAT: u128>(
bytes: &[u8],
options: &Options,
) -> Result<(Self, usize)> {
check_radix!(FORMAT);
fast_path_partial::<Self, FORMAT>(bytes, options)
}
}
macro_rules! parse_float_impl {
($($t:ty)*) => ($(
impl ParseFloat for $t {}
)*)
}
parse_float_impl! { f32 f64 }
#[cfg(feature = "f16")]
macro_rules! parse_float_as_f32 {
($($t:ty)*) => ($(
impl ParseFloat for $t {
#[cfg_attr(not(feature = "compact"), inline(always))]
fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options)
-> Result<Self>
{
Ok(Self::from_f32(parse_complete::<f32, FORMAT>(bytes, options)?))
}
#[cfg_attr(not(feature = "compact"), inline(always))]
fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options)
-> Result<(Self, usize)>
{
let (float, count) = parse_partial::<f32, FORMAT>(bytes, options)?;
Ok((Self::from_f32(float), count))
}
#[cfg_attr(not(feature = "compact"), inline(always))]
fn fast_path_complete<const FORMAT: u128>(bytes: &[u8], options: &Options)
-> Result<Self>
{
Ok(Self::from_f32(fast_path_complete::<f32, FORMAT>(bytes, options)?))
}
#[cfg_attr(not(feature = "compact"), inline(always))]
fn fast_path_partial<const FORMAT: u128>(bytes: &[u8], options: &Options)
-> Result<(Self, usize)>
{
let (float, count) = fast_path_partial::<f32, FORMAT>(bytes, options)?;
Ok((Self::from_f32(float), count))
}
}
)*)
}
#[cfg(feature = "f16")]
parse_float_as_f32! { bf16 f16 }
// PARSE
// -----
// NOTE:
// The partial and complete parsers are done separately because it provides
// minor optimizations when parsing invalid input, and the logic is slightly
// different internally. Most of the code is shared, so the duplicated
// code is only like 30 lines.
/// Parse the sign from the leading digits.
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn parse_mantissa_sign<const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
let format = NumberFormat::<{ FORMAT }> {};
parse_sign!(
byte,
true,
format.no_positive_mantissa_sign(),
format.required_mantissa_sign(),
InvalidPositiveSign,
MissingSign
)
}
/// Parse the sign from the leading digits.
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn parse_exponent_sign<const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
let format = NumberFormat::<{ FORMAT }> {};
parse_sign!(
byte,
true,
format.no_positive_exponent_sign(),
format.required_exponent_sign(),
InvalidPositiveExponentSign,
MissingExponentSign
)
}
/// Utility to extract the result and handle any errors from parsing a `Number`.
///
/// - `format` - The numerical format as a packed integer
/// - `byte` - The `DigitsIter` iterator
/// - `is_negative` - If the final value is negative
/// - `parse_normal` - The function to parse non-special numbers with
/// - `parse_special` - The function to parse special numbers with
macro_rules! parse_number {
(
$format:ident,
$byte:ident,
$is_negative:ident,
$options:ident,
$parse_normal:ident,
$parse_special:ident
) => {{
match $parse_normal::<$format>($byte.clone(), $is_negative, $options) {
Ok(n) => n,
Err(e) => {
if let Some(value) =
$parse_special::<_, $format>($byte.clone(), $is_negative, $options)
{
return Ok(value);
} else {
return Err(e);
}
},
}
}};
}
/// Convert extended float to native.
///
/// - `type` - The native floating point type.
/// - `fp` - The extended floating-point representation.
macro_rules! to_native {
($type:ident, $fp:ident, $is_negative:ident) => {{
let mut float = extended_to_float::<$type>($fp);
if $is_negative {
float = -float;
}
float
}};
}
/// Parse a float from bytes using a complete parser.
#[inline(always)]
#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
bytes: &[u8],
options: &Options,
) -> Result<F> {
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok(F::ZERO);
}
}
// Parse our a small representation of our number.
let num: Number<'_> =
parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
// Try the fast-path algorithm.
if let Some(value) = num.try_fast_path::<_, FORMAT>() {
return Ok(value);
}
// Now try the moderate path algorithm.
let mut fp = moderate_path::<F, FORMAT>(&num, options.lossy());
// Unable to correctly round the float using the fast or moderate algorithms.
// Fallback to a slower, but always correct algorithm. If we have
// lossy, we can't be here.
if fp.exp < 0 {
debug_assert!(!options.lossy(), "lossy algorithms never use slow algorithms");
// Undo the invalid extended float biasing.
fp.exp -= shared::INVALID_FP;
fp = slow_path::<F, FORMAT>(num, fp);
}
// Convert to native float and return result.
Ok(to_native!(F, fp, is_negative))
}
/// Parse a float using only the fast path as a complete parser.
#[inline(always)]
#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
bytes: &[u8],
options: &Options,
) -> Result<F> {
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok(F::ZERO);
}
}
// Parse our a small representation of our number.
let num =
parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
Ok(num.force_fast_path::<_, FORMAT>())
}
/// Parse a float from bytes using a partial parser.
#[inline(always)]
#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
bytes: &[u8],
options: &Options,
) -> Result<(F, usize)> {
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok((F::ZERO, byte.cursor()));
}
}
// Parse our a small representation of our number.
let (num, count) = parse_number!(
FORMAT,
byte,
is_negative,
options,
parse_partial_number,
parse_partial_special
);
// Try the fast-path algorithm.
if let Some(value) = num.try_fast_path::<_, FORMAT>() {
return Ok((value, count));
}
// Now try the moderate path algorithm.
let mut fp = moderate_path::<F, FORMAT>(&num, options.lossy());
// Unable to correctly round the float using the fast or moderate algorithms.
// Fallback to a slower, but always correct algorithm. If we have
// lossy, we can't be here.
if fp.exp < 0 {
debug_assert!(!options.lossy(), "lossy algorithms never use slow algorithms");
// Undo the invalid extended float biasing.
fp.exp -= shared::INVALID_FP;
fp = slow_path::<F, FORMAT>(num, fp);
}
// Convert to native float and return result.
Ok((to_native!(F, fp, is_negative), count))
}
/// Parse a float using only the fast path as a partial parser.
#[inline(always)]
#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
bytes: &[u8],
options: &Options,
) -> Result<(F, usize)> {
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok((F::ZERO, byte.cursor()));
}
}
// Parse our a small representation of our number.
let (num, count) = parse_number!(
FORMAT,
byte,
is_negative,
options,
parse_partial_number,
parse_partial_special
);
Ok((num.force_fast_path::<_, FORMAT>(), count))
}
// PATHS
// -----
/// Wrapper for different moderate-path algorithms.
/// A return exponent of `-1` indicates an invalid value.
#[must_use]
#[inline(always)]
pub fn moderate_path<F: LemireFloat, const FORMAT: u128>(
num: &Number,
lossy: bool,
) -> ExtendedFloat80 {
#[cfg(feature = "compact")]
{
#[cfg(feature = "power-of-two")]
{
let format = NumberFormat::<{ FORMAT }> {};
if is_power_two!(format.mantissa_radix()) {
// Implement the power-of-two backends.
binary::<F, FORMAT>(num, lossy)
} else {
bellerophon::<F, FORMAT>(num, lossy)
}
}
#[cfg(not(feature = "power-of-two"))]
{
bellerophon::<F, FORMAT>(num, lossy)
}
}
#[cfg(not(feature = "compact"))]
{
#[cfg(feature = "radix")]
{
let format = NumberFormat::<{ FORMAT }> {};
let radix = format.mantissa_radix();
if radix == 10 {
lemire::<F>(num, lossy)
} else if is_power_two!(radix) {
// Implement the power-of-two backends.
binary::<F, FORMAT>(num, lossy)
} else {
bellerophon::<F, FORMAT>(num, lossy)
}
}
#[cfg(all(feature = "power-of-two", not(feature = "radix")))]
{
let format = NumberFormat::<{ FORMAT }> {};
let radix = format.mantissa_radix();
debug_assert!(matches!(radix, 2 | 4 | 8 | 10 | 16 | 32));
if radix == 10 {
lemire::<F>(num, lossy)
} else {
// Implement the power-of-two backends.
binary::<F, FORMAT>(num, lossy)
}
}
#[cfg(not(feature = "power-of-two"))]
{
lemire::<F>(num, lossy)
}
}
}
/// Invoke the slow path.
/// At this point, the float string has already been validated.
#[must_use]
#[inline(always)]
pub fn slow_path<F: LemireFloat, const FORMAT: u128>(
num: Number,
fp: ExtendedFloat80,
) -> ExtendedFloat80 {
#[cfg(not(feature = "power-of-two"))]
{
slow_radix::<F, FORMAT>(num, fp)
}
#[cfg(feature = "power-of-two")]
{
let format = NumberFormat::<{ FORMAT }> {};
if is_power_two!(format.mantissa_radix()) {
slow_binary::<F, FORMAT>(num)
} else {
slow_radix::<F, FORMAT>(num, fp)
}
}
}
// NUMBER
// ------
/// Parse a partial, non-special floating point number.
///
/// This creates a representation of the float as the
/// significant digits and the decimal exponent.
#[cfg_attr(not(feature = "compact"), inline(always))]
#[allow(unused_mut)] // reason = "used when format is enabled"
#[allow(clippy::unwrap_used)] // reason = "developer error if we incorrectly assume an overflow"
#[allow(clippy::collapsible_if)] // reason = "more readable uncollapsed"
#[allow(clippy::cast_possible_wrap)] // reason = "no hardware supports buffers >= i64::MAX"
#[allow(clippy::too_many_lines)] // reason = "function is one logical entity"
pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
mut byte: Bytes<'a, FORMAT>,
is_negative: bool,
options: &Options,
) -> Result<(Number<'a>, usize)> {
// NOTE:
// There are no satisfactory optimizations to reduce the number
// of multiplications for very long input strings, but this will
// be a small fraction of the performance penalty anyway.
//
// We've tried:
// - checking for explicit overflow, via `overflowing_mul`.
// - counting the max number of steps.
// - subslicing the string, and only processing the first `step`
// digits.
// - pre-computing the maximum power, and only adding until then.
//
// All of these lead to substantial performance penalty.
// If we pre-parse the string, then only process it then, we
// get a performance penalty of ~2.5x (20ns to 50ns) for common
// floats, an unacceptable cost, while only improving performance
// for rare floats 5-25% (9.3µs to 7.5µs for denormal with 6400
// digits, and 7.8µs to 7.4µs for large floats with 6400 digits).
//
// The performance cost is **almost** entirely in this function,
// but additional branching **does** not improve performance,
// and pre-tokenization is a recipe for failure. For halfway
// cases with smaller numbers of digits, the majority of the
// performance cost is in the big integer arithmetic (`pow` and
// `parse_mantissa`), which suggests few optimizations can or should
// be made.
// Config options
let format = NumberFormat::<{ FORMAT }> {};
let decimal_point = options.decimal_point();
let exponent_character = options.exponent();
debug_assert!(format.is_valid(), "should have already checked for an invalid number format");
debug_assert!(!byte.is_buffer_empty(), "should have previously checked for empty input");
let bits_per_digit = shared::log2(format.mantissa_radix()) as i64;
let bits_per_base = shared::log2(format.exponent_base()) as i64;
// INTEGER
// Check to see if we have a valid base prefix.
#[allow(unused_variables)]
let mut is_prefix = false;
#[cfg(feature = "format")]
{
let base_prefix = format.base_prefix();
let mut iter = byte.integer_iter();
if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
// Check to see if the next character is the base prefix.
// We must have a format like `0x`, `0d`, `0o`.
// NOTE: The check for empty integer digits happens below so
// we don't need a redundant check here.
is_prefix = true;
if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some()
&& iter.is_buffer_empty()
&& format.required_integer_digits()
{
return Err(Error::EmptyInteger(iter.cursor()));
}
}
}
// Parse our integral digits.
let mut mantissa = 0_u64;
let start = byte.clone();
#[cfg(not(feature = "compact"))]
parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
parse_digits(byte.integer_iter(), format.mantissa_radix(), |digit| {
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
});
let mut n_digits = byte.current_count() - start.current_count();
#[cfg(feature = "format")]
if format.required_integer_digits() && n_digits == 0 {
return Err(Error::EmptyInteger(byte.cursor()));
}
// Store the integer digits for slow-path algorithms.
// NOTE: We can't use the number of digits to extract the slice for
// non-contiguous iterators, but we also need to the number of digits
// for our value calculation. We store both, and let the compiler know
// to optimize it out when not needed.
let b_digits = if cfg!(feature = "format") && !byte.integer_iter().is_contiguous() {
byte.cursor() - start.cursor()
} else {
n_digits
};
debug_assert!(
b_digits <= start.as_slice().len(),
"number of digits parsed must <= buffer length"
);
// SAFETY: safe, since `n_digits <= start.as_slice().len()`.
// This is since `byte.len() >= start.len()` but has to have
// the same end bounds (that is, `start = byte.clone()`), so
// `0 <= byte.current_count() <= start.current_count() <= start.lent()`
// so, this will always return only the integer digits.
//
// NOTE: Removing this code leads to ~10% reduction in parsing
// that triggers the Eisell-Lemire algorithm or the digit comp
// algorithms, so don't remove the unsafe indexing.
let integer_digits = unsafe { start.as_slice().get_unchecked(..b_digits) };
// Check if integer leading zeros are disabled.
#[cfg(feature = "format")]
if !is_prefix && format.no_float_leading_zeros() {
if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') {
return Err(Error::InvalidLeadingZeros(start.cursor()));
}
}
// FRACTION
// Handle decimal point and digits afterwards.
let mut n_after_dot = 0;
let mut exponent = 0_i64;
let mut implicit_exponent: i64;
let int_end = n_digits as i64;
let mut fraction_digits = None;
let has_decimal = byte.first_is_cased(decimal_point);
if has_decimal {
// SAFETY: byte cannot be empty due to `first_is`
unsafe { byte.step_unchecked() };
let before = byte.clone();
#[cfg(not(feature = "compact"))]
parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa);
parse_digits(byte.fraction_iter(), format.mantissa_radix(), |digit| {
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
});
n_after_dot = byte.current_count() - before.current_count();
// NOTE: We can't use the number of digits to extract the slice for
// non-contiguous iterators, but we also need to the number of digits
// for our value calculation. We store both, and let the compiler know
// to optimize it out when not needed.
let b_after_dot = if cfg!(feature = "format") && !byte.fraction_iter().is_contiguous() {
byte.cursor() - before.cursor()
} else {
n_after_dot
};
// Store the fraction digits for slow-path algorithms.
debug_assert!(
b_after_dot <= before.as_slice().len(),
"digits after dot must be smaller than buffer"
);
// SAFETY: safe, since `idx_after_dot <= before.as_slice().len()`.
fraction_digits = Some(unsafe { before.as_slice().get_unchecked(..b_after_dot) });
// Calculate the implicit exponent: the number of digits after the dot.
implicit_exponent = -(n_after_dot as i64);
if format.mantissa_radix() == format.exponent_base() {
exponent = implicit_exponent;
} else {
debug_assert!(bits_per_digit % bits_per_base == 0, "exponent must be a power of base");
exponent = implicit_exponent * bits_per_digit / bits_per_base;
};
#[cfg(feature = "format")]
if format.required_fraction_digits() && n_after_dot == 0 {
return Err(Error::EmptyFraction(byte.cursor()));
}
}
// NOTE: Check if we have our exponent **BEFORE** checking if the
// mantissa is empty, so we can ensure
let has_exponent = byte
.first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format"));
// check to see if we have any invalid leading zeros
n_digits += n_after_dot;
if format.required_mantissa_digits()
&& (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0))
{
let any_digits = start.clone().integer_iter().peek().is_some();
// NOTE: This is because numbers like `_12.34` have significant digits,
// they just don't have a valid digit (#97).
if has_decimal || has_exponent || !any_digits || IS_PARTIAL {
return Err(Error::EmptyMantissa(byte.cursor()));
} else {
return Err(Error::InvalidDigit(start.cursor()));
}
}
// EXPONENT
// Handle scientific notation.
let mut explicit_exponent = 0_i64;
if has_exponent {
// NOTE: See above for the safety invariant above `required_mantissa_digits`.
// This is separated for correctness concerns, and therefore the two cannot
// be on the same line.
// SAFETY: byte cannot be empty due to `first_is` from `has_exponent`.`
unsafe { byte.step_unchecked() };
// Check float format syntax checks.
#[cfg(feature = "format")]
{
// NOTE: We've overstepped for the safety invariant before.
if format.no_exponent_notation() {
return Err(Error::InvalidExponent(byte.cursor() - 1));
}
// Check if we have no fraction but we required exponent notation.
if format.no_exponent_without_fraction() && fraction_digits.is_none() {
return Err(Error::ExponentWithoutFraction(byte.cursor() - 1));
}
}
let is_negative_exponent = parse_exponent_sign(&mut byte)?;
let before = byte.current_count();
parse_digits(byte.exponent_iter(), format.exponent_radix(), |digit| {
if explicit_exponent < 0x10000000 {
explicit_exponent *= format.exponent_radix() as i64;
explicit_exponent += digit as i64;
}
});
if format.required_exponent_digits() && byte.current_count() - before == 0 {
return Err(Error::EmptyExponent(byte.cursor()));
}
// Handle our sign, and get the explicit part of the exponent.
explicit_exponent = if is_negative_exponent {
-explicit_exponent
} else {
explicit_exponent
};
exponent += explicit_exponent;
} else if cfg!(feature = "format") && format.required_exponent_notation() {
return Err(Error::MissingExponent(byte.cursor()));
}
// Check to see if we have a valid base suffix.
// We've already trimmed any leading digit separators here, so we can be safe
// that the first character **is not** a digit separator.
#[allow(unused_variables)]
let base_suffix = format.base_suffix();
#[cfg(feature = "format")]
if base_suffix != 0 {
if byte.first_is(base_suffix, format.case_sensitive_base_suffix()) {
// SAFETY: safe since `byte.len() >= 1`.
unsafe { byte.step_unchecked() };
}
}
// CHECK OVERFLOW
// Get the number of parsed digits (total), and redo if we had overflow.
let end = byte.cursor();
let mut step = u64_step(format.mantissa_radix());
let mut many_digits = false;
#[cfg(feature = "format")]
if !format.required_mantissa_digits() && n_digits == 0 {
exponent = 0;
}
if n_digits <= step {
return Ok((
Number {
exponent,
mantissa,
is_negative,
many_digits: false,
integer: integer_digits,
fraction: fraction_digits,
},
end,
));
}
// Check for leading zeros, and to see if we had a false overflow.
n_digits -= step;
let mut zeros = start.clone();
let mut zeros_integer = zeros.integer_iter();
n_digits = n_digits.saturating_sub(zeros_integer.skip_zeros());
if zeros.first_is_cased(decimal_point) {
// SAFETY: safe since zeros cannot be empty due to `first_is`
unsafe { zeros.step_unchecked() };
}
let mut zeros_fraction = zeros.fraction_iter();
n_digits = n_digits.saturating_sub(zeros_fraction.skip_zeros());
// OVERFLOW
// Now, check if we explicitly overflowed.
if n_digits > 0 {
// Have more than 19 significant digits, so we overflowed.
many_digits = true;
mantissa = 0;
let mut integer = integer_digits.bytes::<{ FORMAT }>();
// Skip leading zeros, so we can use the step properly.
let mut integer_iter = integer.integer_iter();
integer_iter.skip_zeros();
parse_u64_digits::<_, FORMAT>(integer_iter, &mut mantissa, &mut step);
// NOTE: With the format feature enabled and non-contiguous iterators, we can
// have null fraction digits even if step was not 0. We want to make the
// none check as late in there as possible: any of them should
// short-circuit and should be determined at compile time. So, the
// conditions are either:
// 1. Step == 0
// 2. `cfg!(feature = "format") && !byte.is_contiguous() &&
// fraction_digits.is_none()`
implicit_exponent = if step == 0
|| (cfg!(feature = "format") && !byte.is_contiguous() && fraction_digits.is_none())
{
// Filled our mantissa with just the integer.
int_end - integer.current_count() as i64
} else {
// We know this can't be a None since we had more than 19
// digits previously, so we overflowed a 64-bit integer,
// but parsing only the integral digits produced less
// than 19 digits. That means we must have a decimal
// point, and at least 1 fractional digit.
let mut fraction = fraction_digits.unwrap().bytes::<{ FORMAT }>();
let mut fraction_iter = fraction.fraction_iter();
// Skip leading zeros, so we can use the step properly.
if mantissa == 0 {
fraction_iter.skip_zeros();
}
parse_u64_digits::<_, FORMAT>(fraction_iter, &mut mantissa, &mut step);
-(fraction.current_count() as i64)
};
if format.mantissa_radix() == format.exponent_base() {
exponent = implicit_exponent;
} else {
debug_assert!(bits_per_digit % bits_per_base == 0, "exponent must be a power of base");
exponent = implicit_exponent * bits_per_digit / bits_per_base;
};
// Add back the explicit exponent.
exponent += explicit_exponent;
}
Ok((
Number {
exponent,
mantissa,
is_negative,
many_digits,
integer: integer_digits,
fraction: fraction_digits,
},
end,
))
}
#[inline(always)]
pub fn parse_partial_number<'a, const FORMAT: u128>(
byte: Bytes<'a, FORMAT>,
is_negative: bool,
options: &Options,
) -> Result<(Number<'a>, usize)> {
parse_number::<FORMAT, true>(byte, is_negative, options)
}
/// Try to parse a non-special floating point number.
#[inline(always)]
pub fn parse_complete_number<'a, const FORMAT: u128>(
byte: Bytes<'a, FORMAT>,
is_negative: bool,
options: &Options,
) -> Result<Number<'a>> {
// Then have a const `IsPartial` as well
let length = byte.buffer_length();
let (float, count) = parse_number::<FORMAT, false>(byte, is_negative, options)?;
if count == length {
Ok(float)
} else {
Err(Error::InvalidDigit(count))
}
}
// DIGITS
// ------
/// Iteratively parse and consume digits from bytes.
#[inline(always)]
pub fn parse_digits<'a, Iter, Cb>(mut iter: Iter, radix: u32, mut cb: Cb)
where
Iter: DigitsIter<'a>,
Cb: FnMut(u32),
{
while let Some(&c) = iter.peek() {
match char_to_digit_const(c, radix) {
Some(v) => cb(v),
None => break,
}
// SAFETY: iter cannot be empty due to `iter.peek()`.
// NOTE: Because of the match statement, this would optimize poorly with
// `read_if`.
unsafe { iter.step_unchecked() };
iter.increment_count();
}
}
/// Iteratively parse and consume digits in intervals of 8.
///
/// # Preconditions
///
/// The iterator must be of the significant digits, not the exponent.
#[inline(always)]
#[cfg(not(feature = "compact"))]
pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64)
where
Iter: DigitsIter<'a>,
{
let format = NumberFormat::<{ FORMAT }> {};
let radix: u64 = format.radix() as u64;
if can_try_parse_multidigit!(iter, radix) {
debug_assert!(radix < 16, "radices over 16 will overflow with radix^8");
let radix8 = format.radix8() as u64;
// Can do up to 2 iterations without overflowing, however, for large
// inputs, this is much faster than any other alternative.
while let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
*mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
}
}
}
/// Iteratively parse and consume digits without overflowing.
///
/// # Preconditions
///
/// There must be at least `step` digits left in iterator. The iterator almost
/// must be of the significant digits, not the exponent.
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
mut iter: Iter,
mantissa: &mut u64,
step: &mut usize,
) where
Iter: DigitsIter<'a>,
{
let format = NumberFormat::<{ FORMAT }> {};
let radix = format.radix() as u64;
// Try to parse 8 digits at a time, if we can.
#[cfg(not(feature = "compact"))]
if can_try_parse_multidigit!(iter, radix) {
debug_assert!(radix < 16, "radices over 16 will overflow with radix^8");
let radix8 = format.radix8() as u64;
while *step > 8 {
if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
*mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
*step -= 8;
} else {
break;
}
}
}
// Parse single digits at a time.
while let Some(&c) = iter.peek() {
if *step > 0 {
let digit = char_to_valid_digit_const(c, radix as u32);
*mantissa = *mantissa * radix + digit as u64;
*step -= 1;
// SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`.
unsafe { iter.step_unchecked() };
iter.increment_count();
} else {
break;
}
}
}
// SPECIAL
// -------
/// Determine if the input data matches the special string.
/// If there's no match, returns 0. Otherwise, returns the byte's cursor.
#[must_use]
#[inline(always)]
pub fn is_special_eq<const FORMAT: u128>(mut byte: Bytes<FORMAT>, string: &'static [u8]) -> usize {
let format = NumberFormat::<{ FORMAT }> {};
if cfg!(feature = "format") && format.case_sensitive_special() {
if shared::starts_with(byte.special_iter(), string.iter()) {
// Trim the iterator afterwards.
byte.special_iter().peek();
return byte.cursor();
}
} else if shared::starts_with_uncased(byte.special_iter(), string.iter()) {
// Trim the iterator afterwards.
byte.special_iter().peek();
return byte.cursor();
}
0
}
/// Parse a positive representation of a special, non-finite float.
#[must_use]
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn parse_positive_special<F, const FORMAT: u128>(
byte: Bytes<FORMAT>,
options: &Options,
) -> Option<(F, usize)>
where
F: LemireFloat,
{
let format = NumberFormat::<{ FORMAT }> {};
if cfg!(feature = "format") && format.no_special() {
return None;
}
let cursor = byte.cursor();
let length = byte.buffer_length() - cursor;
if let Some(nan_string) = options.nan_string() {
if length >= nan_string.len() {
let count = is_special_eq::<FORMAT>(byte.clone(), nan_string);
if count != 0 {
return Some((F::NAN, count));
}
}
}
if let Some(infinity_string) = options.infinity_string() {
if length >= infinity_string.len() {
let count = is_special_eq::<FORMAT>(byte.clone(), infinity_string);
if count != 0 {
return Some((F::INFINITY, count));
}
}
}
if let Some(inf_string) = options.inf_string() {
if length >= inf_string.len() {
let count = is_special_eq::<FORMAT>(byte.clone(), inf_string);
if count != 0 {
return Some((F::INFINITY, count));
}
}
}
None
}
/// Parse a partial representation of a special, non-finite float.
#[must_use]
#[inline(always)]
pub fn parse_partial_special<F, const FORMAT: u128>(
byte: Bytes<FORMAT>,
is_negative: bool,
options: &Options,
) -> Option<(F, usize)>
where
F: LemireFloat,
{
let (mut float, count) = parse_positive_special::<F, FORMAT>(byte, options)?;
if is_negative {
float = -float;
}
Some((float, count))
}
/// Try to parse a special, non-finite float.
#[must_use]
#[inline(always)]
pub fn parse_special<F, const FORMAT: u128>(
byte: Bytes<FORMAT>,
is_negative: bool,
options: &Options,
) -> Option<F>
where
F: LemireFloat,
{
let length = byte.buffer_length();
if let Some((float, count)) = parse_partial_special::<F, FORMAT>(byte, is_negative, options) {
if count == length {
return Some(float);
}
}
None
}