From 89c38aa15a9b89fa81c7ce4c25dbb3ece35de018 Mon Sep 17 00:00:00 2001 From: ex5 <176934+ex5@users.noreply.github.com> Date: Mon, 9 Oct 2023 20:07:15 +0200 Subject: [PATCH 1/3] Allow more more missing upstream info in parse_nginx_log --- src/stdlib/log_util.rs | 6 +++--- src/stdlib/parse_nginx_log.rs | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/stdlib/log_util.rs b/src/stdlib/log_util.rs index 94da92064c..f438608f2c 100644 --- a/src/stdlib/log_util.rs +++ b/src/stdlib/log_util.rs @@ -142,9 +142,9 @@ pub(crate) static REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG: Lazy = Lazy::new( \[(?P[^\]]+)\]\s+ # Match all characters within square brackets \[(?P[^\]]+)?\]\s+ # Match all characters within square brackets, optional (?P\S+)\s+ # Match any non space character - (?P\d+)\s+ # Match numbers - (?P\d+\.\d+)\s+ # Match numbers with dot - (?P\d+)\s+ # Match numbers + (-|(?P\d+))\s+ # Match `-` or numbers + (-|(?P\d+\.\d+))\s+ # Match `-` or numbers with dot + (-|(?P\d+))\s+ # Match `-` or numbers (?P\S+) # Match any non space character \s*$ # Match any number of whitespaces (to be discarded). "#) diff --git a/src/stdlib/parse_nginx_log.rs b/src/stdlib/parse_nginx_log.rs index 7556046d24..c80331582b 100644 --- a/src/stdlib/parse_nginx_log.rs +++ b/src/stdlib/parse_nginx_log.rs @@ -339,6 +339,28 @@ mod tests { tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(), } + ingress_nginx_upstreaminfo_valid_missing_upstream { + args: func_args![ + value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] - - - - 752178adb17130b291aefd8c386279e7"#, + format: "ingress_upstreaminfo" + ], + want: Ok(btreemap! { + "remote_addr" => "0.0.0.0", + "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()), + "request" => "GET /some/path HTTP/2.0", + "status" => 200, + "body_bytes_size" => 12312, + "http_referer" => "https://10.0.0.1/some/referer", + "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "request_length" => 462, + "request_time" => 0.050, + "upstream_addr" => "-", + "proxy_upstream_name" => "some-upstream-service-9000", + "req_id" => "752178adb17130b291aefd8c386279e7", + }), + tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(), + } + ingress_nginx_upstreaminfo_valid_all_fields { args: func_args![ value: r#"0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#, From dab11cf0f1ba54ddb704b6c423765c3129352f31 Mon Sep 17 00:00:00 2001 From: ex5 <176934+ex5@users.noreply.github.com> Date: Tue, 10 Oct 2023 18:55:07 +0200 Subject: [PATCH 2/3] Add an entry to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ead6128c19..60b7449594 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## unreleased +- `parse_nginx_log` doesn't fail if `upstream_response_length`, `upstream_response_time`, `upstream_status` are missing (https://github.com/vectordotdev/vrl/pull/498) + #### Features - 'from_unix_timestamp' now accepts a new unit: Microseconds. From e92c778d62072cf993992167f08f60ca47162abc Mon Sep 17 00:00:00 2001 From: ex5 <176934+ex5@users.noreply.github.com> Date: Tue, 10 Oct 2023 18:56:37 +0200 Subject: [PATCH 3/3] Change wording of the changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60b7449594..effab6042d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## unreleased -- `parse_nginx_log` doesn't fail if `upstream_response_length`, `upstream_response_time`, `upstream_status` are missing (https://github.com/vectordotdev/vrl/pull/498) +- `parse_nginx_log` no longer fails if `upstream_response_length`, `upstream_response_time`, `upstream_status` are missing (https://github.com/vectordotdev/vrl/pull/498) #### Features - 'from_unix_timestamp' now accepts a new unit: Microseconds.