feat: Add functions to coerce int and float types (#28)

This adds functions that can be used by the JS processor to coerce types into integers and floats attempting to follow the JavaScript conversion rules. Also, this limits the types that `mezmo_parse_float()` and `mezmo_parse_int()` handle to better match the functionality of `parseFloat()` and `parseInt()`. Ref: LOG-17443
mezmo · Jul 26, 2023 · 093162f · 093162f
1 parent a72673a
commit 093162f
Show file tree

Hide file tree

Showing 7 changed files with 342 additions and 48 deletions.
diff --git a/lib/stdlib/Cargo.toml b/lib/stdlib/Cargo.toml
@@ -169,6 +169,8 @@ default = [
     "mezmo_parse_float",
     "mezmo_parse_int",
     "mezmo_relational_comparison",
+    "mezmo_to_int",
+    "mezmo_to_float",
     "mezmo_to_string",
     "mezmo_string_operations",
     "mod",
@@ -335,9 +337,11 @@ merge = []
 mezmo_arithmetic_operation = ["mezmo_to_string"]
 mezmo_is_truthy = []
 mezmo_parse_int = ["parse_int"]
-mezmo_parse_float = ["to_float"]
+mezmo_parse_float = []
 mezmo_relational_comparison = []
 mezmo_string_operations = ["substring"]
+mezmo_to_int = []
+mezmo_to_float = []
 mezmo_to_string = ["to_string"]
 mod = []
 now = ["dep:chrono"]

diff --git a/lib/stdlib/src/lib.rs b/lib/stdlib/src/lib.rs
@@ -235,6 +235,10 @@ mod mezmo_substring;
 mod mezmo_trim_end;
 #[cfg(feature = "mezmo_string_operations")]
 mod mezmo_trim_start;
+#[cfg(feature = "mezmo_to_int")]
+mod mezmo_to_int;
+#[cfg(feature = "mezmo_to_float")]
+mod mezmo_to_float;
 #[cfg(feature = "mezmo_to_string")]
 mod mezmo_to_string;
 
@@ -585,6 +589,10 @@ pub use mezmo_substring::MezmoSubstring;
 pub use mezmo_trim_end::MezmoTrimEnd;
 #[cfg(feature = "mezmo_string_operations")]
 pub use mezmo_trim_start::MezmoTrimStart;
+#[cfg(feature = "mezmo_to_int")]
+pub use mezmo_to_int::MezmoToInt;
+#[cfg(feature = "mezmo_to_float")]
+pub use mezmo_to_float::MezmoToFloat;
 #[cfg(feature = "mezmo_to_string")]
 pub use mezmo_to_string::MezmoToString;
 #[cfg(feature = "mod")]
@@ -958,6 +966,10 @@ pub fn all() -> Vec<Box<dyn vrl::Function>> {
         Box::new(MezmoTrimEnd),
         #[cfg(feature = "mezmo_string_operations")]
         Box::new(MezmoTrimStart),
+        #[cfg(feature = "mezmo_to_int")]
+        Box::new(MezmoToInt),
+        #[cfg(feature = "mezmo_to_float")]
+        Box::new(MezmoToFloat),
         #[cfg(feature = "mezmo_to_string")]
         Box::new(MezmoToString),
         #[cfg(feature = "mod")]

diff --git a/lib/stdlib/src/mezmo_parse_float.rs b/lib/stdlib/src/mezmo_parse_float.rs
@@ -1,18 +1,26 @@
-use crate::to_float;
 use ::value::Value;
 use vrl::prelude::*;
-
-/// Converts any value to float, defaulting to f64(0).
-fn mezmo_parse_float(value: Value) -> f64 {
-    use Value::Float;
-    let v = to_float::to_float(value).unwrap_or_else(|_| Value::from(0.0));
-    match v {
-        Float(v) => v.into_inner(),
-        _ => 0.0,
+use vrl_core::{conversion::Conversion, Resolved};
+
+fn mezmo_parse_float(value: Value) -> Resolved {
+    use Value::{Bytes, Float, Integer};
+    match value {
+        Float(_) => Ok(value),
+        Integer(v) => Ok(Value::from_f64_or_zero(v as f64)),
+        Bytes(v) => Conversion::Float
+            .convert(v)
+            .map_err(|e| e.to_string().into()),
+        v => Err(format!("unable to parse {} into float", v.kind()).into()),
     }
 }
 
-/// Infallible counterpart of ToFloat
+/// Converts int, float, and string value to a float while matching the behavior
+/// of JavaScript's `parseFloat()`. All other types results in an error. This is
+/// also different from `to_float()` in that fallibility is determined
+/// completely at runtime (there's no `type_def()` check on the parameter).
+/// 
+/// FIXME: This doesn't properly handle whitespace or invalid chars at the end
+/// of the float.
 #[derive(Clone, Copy, Debug)]
 pub struct MezmoParseFloat;
 
@@ -53,11 +61,11 @@ struct ParseFloatFn {
 impl FunctionExpression for ParseFloatFn {
     fn resolve(&self, ctx: &mut Context) -> Resolved {
         let value = self.value.resolve(ctx)?;
-        Ok(mezmo_parse_float(value).into())
+        mezmo_parse_float(value)
     }
 
     fn type_def(&self, _state: &state::TypeState) -> TypeDef {
-        TypeDef::float().infallible()
+        TypeDef::float().fallible()
     }
 }
 
@@ -71,25 +79,25 @@ mod tests {
         float {
             args: func_args![value: 20.5],
             want: Ok(20.5),
-            tdef: TypeDef::float().infallible(),
+            tdef: TypeDef::float().fallible(),
         }
 
         integer {
             args: func_args![value: 100],
             want: Ok(100.0),
-            tdef: TypeDef::float().infallible(),
+            tdef: TypeDef::float().fallible(),
         }
 
         string {
             args: func_args![value: "100.1"],
             want: Ok(100.1),
-            tdef: TypeDef::float().infallible(),
+            tdef: TypeDef::float().fallible(),
         }
 
         null {
             args: func_args![value: value!(null)],
-            want: Ok(0.0),
-            tdef: TypeDef::float().infallible(),
+            want: Err("unable to parse null into float"),
+            tdef: TypeDef::float().fallible(),
         }
     ];
 }
diff --git a/lib/stdlib/src/mezmo_parse_int.rs b/lib/stdlib/src/mezmo_parse_int.rs
@@ -1,25 +1,26 @@
 use crate::parse_int;
 use ::value::Value;
 use vrl::prelude::*;
+use vrl_core::Resolved;
 
-/// Converts any value into an int, defaulting to 0.
-fn mezmo_parse_int(value: Value, base: Option<Value>) -> i64 {
+fn mezmo_parse_int(value: Value, base: Option<Value>) -> Resolved {
     use Value::{Bytes, Float, Integer};
     match value {
-        Integer(v) => v,
-        Float(v) => v.into_inner() as i64,
-        Bytes(_) => {
-            let v = parse_int::parse_int(value, base).unwrap_or_else(|_| Value::from(0));
-            match v {
-                Integer(v) => v,
-                _ => 0,
-            }
-        }
-        _ => 0,
+        Integer(_) => Ok(value),
+        Float(v) => Ok(Integer(v.into_inner() as i64)),
+        Bytes(_) => parse_int::parse_int(value, base),
+        v => Err(format!("unable to parse {} into integer", v.kind()).into()),
     }
 }
 
-/// Infallible counterpart of ParseInt
+/// Converts int, float, and string value to an integer while matching the
+/// behavior of JavaScript's `parseInt()`. All other types results in an
+/// error. This is also different from `to_int()` in that fallibility is
+/// determined completely at runtime (there's no `type_def()` check on the
+/// parameter).
+/// 
+/// FIXME: This doesn't properly handle whitespace or invalid chars at the end
+/// of the integer.
 #[derive(Clone, Copy, Debug)]
 pub struct MezmoParseInt;
 
@@ -74,11 +75,11 @@ impl FunctionExpression for ParseIntFn {
             .as_ref()
             .map(|expr| expr.resolve(ctx))
             .transpose()?;
-        Ok(mezmo_parse_int(value, base).into())
+        mezmo_parse_int(value, base)
     }
 
     fn type_def(&self, _state: &state::TypeState) -> TypeDef {
-        TypeDef::integer().infallible()
+        TypeDef::integer().fallible()
     }
 }
 
@@ -92,67 +93,67 @@ mod tests {
         float {
             args: func_args![value: 20.5],
             want: Ok(20),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         integer {
             args: func_args![value: 100],
             want: Ok(100),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         null {
             args: func_args![value: value!(null)],
-            want: Ok(0),
-            tdef: TypeDef::integer().infallible(),
+            want: Err("unable to parse null into integer"),
+            tdef: TypeDef::integer().fallible(),
         }
 
         text {
             args: func_args![value: "hello"],
-            want: Ok(0),
-            tdef: TypeDef::integer().infallible(),
+            want: Err("could not parse integer: invalid digit found in string"),
+            tdef: TypeDef::integer().fallible(),
         }
 
         boolean {
             args: func_args![value: true],
-            want: Ok(0),
-            tdef: TypeDef::integer().infallible(),
+            want: Err("unable to parse boolean into integer"),
+            tdef: TypeDef::integer().fallible(),
         }
 
         decimal {
             args: func_args![value: "-42"],
             want: Ok(-42),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         binary {
             args: func_args![value: "0b1001"],
             want: Ok(9),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         octal {
             args: func_args![value: "042"],
             want: Ok(34),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         hexadecimal {
             args: func_args![value: "0x2a"],
             want: Ok(42),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         zero {
             args: func_args![value: "0"],
             want: Ok(0),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
 
         explicit_hexadecimal {
             args: func_args![value: "2a", base: 16],
             want: Ok(42),
-            tdef: TypeDef::integer().infallible(),
+            tdef: TypeDef::integer().fallible(),
         }
     ];
 }