From 9a961632e3e4f2183e19a32e43437622786dcfc9 Mon Sep 17 00:00:00 2001 From: Jorge Bay Date: Fri, 30 Jun 2023 17:32:09 +0200 Subject: [PATCH] fix: Avoid panicking on regex error in parse_grok() function Onig regex library, used by the grok library, panics when it hits a retry (move to the start) limit. This catches the panic and returns an error instead. Ref: LOG-17425 --- lib/stdlib/src/parse_grok.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/stdlib/src/parse_grok.rs b/lib/stdlib/src/parse_grok.rs index f9b0381b4..a97f6eb73 100644 --- a/lib/stdlib/src/parse_grok.rs +++ b/lib/stdlib/src/parse_grok.rs @@ -21,14 +21,18 @@ static MEZMO_PATTERNS: &[(&str, &str)] = &[ mod non_wasm { use ::value::Value; pub(super) use std::sync::Arc; - use std::{collections::BTreeMap, fmt}; + use std::{collections::BTreeMap, fmt, panic}; use vrl::prelude::*; use vrl::state::TypeState; use vrl_diagnostic::{Label, Span}; fn parse_grok(value: Value, pattern: Arc) -> Resolved { let bytes = value.try_bytes_utf8_lossy()?; - match pattern.match_against(&bytes) { + + // Onig regex library, used by the grok library, panics when it hits a retry-limit-in-match. + // Fixing it in the grok library (by using another regex method) can be met + // with resistance because it requires a new API function, i.e., pattern.try_match_against() + let possible_panic = panic::catch_unwind(|| match pattern.match_against(&bytes) { Some(matches) => { let mut result = BTreeMap::new(); @@ -39,6 +43,15 @@ mod non_wasm { Ok(Value::from(result)) } None => Err("unable to parse input with grok pattern".into()), + }); + + match possible_panic { + Ok(r) => r, + Err(_) => Err(format!( + "regex with grok pattern caused a panic. Input: '{}', pattern: {:?}", + &bytes, pattern + ) + .into()), } }