Skip to content

Commit

Permalink
feat(stdlib): Add ingress_upstreaminfo log format to `parse_nginx_l…
Browse files Browse the repository at this point in the history
…og` function
  • Loading branch information
esergion committed May 12, 2023
1 parent 43df337 commit 61af420
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## unreleased

- `ingress_upstreaminfo` log format has been added to `parse_nginx_log` function (https://github.com/vectordotdev/vrl/pull/193)

## `0.4.0` (2023-05-11)
- consolidated all crates into the root `vrl` crate. The external API stayed the same, with the exception of macros, which are now all exported at the root of the `vrl` crate.
- published VRL to crates.io. Standard crate versioning will now be used instead of git tags.
Expand Down
27 changes: 27 additions & 0 deletions benches/stdlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1736,6 +1736,33 @@ bench_function! {
})),
}

ingress_upstreaminfo {
args: func_args![
value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
format: "ingress_upstreaminfo",
],
want: Ok(value!({
"remote_addr" => "0.0.0.0",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().info()),
"request" => "GET /some/path HTTP/2.0",
"method" => "GET",
"path" => "/some/path",
"protocol" => "HTTP/2.0",
"status" => 200,
"body_bytes_size" => 12312,
"http_referer" => "https://10.0.0.1/some/referer",
"http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"request_length" => 462,
"request_time" => 0.050,
"proxy_upstream_name" => "some-upstream-service-9000",
"upstream_addr" => "10.0.50.80:9000",
"upstream_response_length" => 19437,
"upstream_response_time" => 0.049,
"upstream_status" => 200,
"req_id" => "752178adb17130b291aefd8c386279e7",
})),
}

error {
args: func_args![value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
format: "error"
Expand Down
46 changes: 44 additions & 2 deletions src/stdlib/log_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,39 @@ pub(crate) static REGEX_NGINX_COMBINED_LOG: Lazy<Regex> = Lazy::new(|| {
.expect("failed compiling regex for Nginx combined log")
});

// - Ingress Nginx docs: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/log-format/
#[cfg(feature = "stdlib_parse_nginx_log")]
pub(crate) static REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?x) # Ignore whitespace and comments in the regex expression.
^\s* # Start with any number of whitespaces
(-|(?P<remote_addr>\S+))\s+ # Match `-` or any non space character
\-\s+ # Always a dash
(-|(?P<remote_user>\S+))\s+ # Match `-` or any non space character
\[(?P<timestamp>[^\]]+)\]\s+ # Match date between brackets
"(?P<request>
(?P<method>\w+)\s+ # Match at least a word
(?P<path>\S+)\s+ # Match any non space character
(?P<protocol>[^"]+)
)"\s+ # Match any non double-quote character
(?P<status>\d+)\s+ # Match numbers
(?P<body_bytes_size>\d+)\s+ # Match numbers
"(-|(?P<http_referer>[^"]+))"\s+ # Match `-` or any non double-quote character
"(-|(?P<http_user_agent>[^"]+))"\s+ # Match `-` or any non double-quote character
(?P<request_length>\d+)\s+ # Match numbers
(?P<request_time>\d+\.\d+)\s+ # Match numbers with dot
\[(?P<proxy_upstream_name>[^\]]+)\]\s+ # Match all characters within square brackets
\[(?P<proxy_alternative_upstream_name>[^\]]+)?\]\s+ # Match all characters within square brackets, optional
(?P<upstream_addr>\S+)\s+ # Match any non space character
(?P<upstream_response_length>\d+)\s+ # Match numbers
(?P<upstream_response_time>\d+\.\d+)\s+ # Match numbers with dot
(?P<upstream_status>\d+)\s+ # Match numbers
(?P<req_id>\S+) # Match any non space character
\s*$ # Match any number of whitespaces (to be discarded).
"#)
.expect("failed compiling regex for Ingress Nginx upstreaminfo log")
});

#[cfg(feature = "stdlib_parse_nginx_log")]
pub(crate) static REGEX_NGINX_ERROR_LOG: Lazy<Regex> = Lazy::new(|| {
Regex::new(
Expand Down Expand Up @@ -180,12 +213,21 @@ fn capture_value(
) -> std::result::Result<Value, String> {
Ok(match name {
"timestamp" => Value::Timestamp(parse_time(value, timestamp_format, timezone)?),
"status" | "size" | "pid" | "tid" | "cid" | "port" => Value::Integer(
"status"
| "size"
| "pid"
| "tid"
| "cid"
| "port"
| "body_bytes_size"
| "request_length"
| "upstream_response_length"
| "upstream_status" => Value::Integer(
value
.parse()
.map_err(|_| format!("failed parsing {name}"))?,
),
"excess" => Value::Float(
"excess" | "request_time" | "upstream_response_time" => Value::Float(
value
.parse()
.map_err(|_| format!("failed parsing {name}"))?,
Expand Down
95 changes: 94 additions & 1 deletion src/stdlib/parse_nginx_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ fn parse_nginx_log(
}

fn variants() -> Vec<Value> {
vec![value!("combined"), value!("error")]
vec![
value!("combined"),
value!("error"),
value!("ingress_upstreaminfo"),
]
}

#[derive(Clone, Copy, Debug)]
Expand Down Expand Up @@ -100,6 +104,7 @@ impl Function for ParseNginxLog {
fn regex_for_format(format: &[u8]) -> &Regex {
match format {
b"combined" => &log_util::REGEX_NGINX_COMBINED_LOG,
b"ingress_upstreaminfo" => &log_util::REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG,
b"error" => &log_util::REGEX_NGINX_ERROR_LOG,
_ => unreachable!(),
}
Expand All @@ -108,6 +113,7 @@ fn regex_for_format(format: &[u8]) -> &Regex {
fn time_format_for_format(format: &[u8]) -> String {
match format {
b"combined" => "%d/%b/%Y:%T %z".to_owned(),
b"ingress_upstreaminfo" => "%d/%b/%Y:%T %z".to_owned(),
b"error" => "%Y/%m/%d %H:%M:%S".to_owned(),
_ => unreachable!(),
}
Expand Down Expand Up @@ -145,6 +151,7 @@ impl FunctionExpression for ParseNginxLogFn {
fn type_def(&self, _: &state::TypeState) -> TypeDef {
TypeDef::object(match self.format.as_ref() {
b"combined" => kind_combined(),
b"ingress_upstreaminfo" => kind_ingress_upstreaminfo(),
b"error" => kind_error(),
_ => unreachable!(),
})
Expand All @@ -169,6 +176,34 @@ fn kind_combined() -> BTreeMap<Field, Kind> {
])
}

fn kind_ingress_upstreaminfo() -> BTreeMap<Field, Kind> {
BTreeMap::from([
("remote_addr".into(), Kind::bytes().or_undefined()),
("remote_user".into(), Kind::bytes().or_undefined()),
("timestamp".into(), Kind::timestamp()),
("request".into(), Kind::bytes()),
("method".into(), Kind::bytes()),
("path".into(), Kind::bytes()),
("protocol".into(), Kind::bytes()),
("status".into(), Kind::integer()),
("body_bytes_size".into(), Kind::integer()),
("http_referer".into(), Kind::bytes().or_undefined()),
("http_user_agent".into(), Kind::bytes().or_undefined()),
("request_length".into(), Kind::integer()),
("request_time".into(), Kind::float()),
("proxy_upstream_name".into(), Kind::bytes()),
(
"proxy_alternative_upstream_name".into(),
Kind::bytes().or_undefined(),
),
("upstream_addr".into(), Kind::bytes()),
("upstream_response_length".into(), Kind::integer()),
("upstream_response_time".into(), Kind::float()),
("upstream_status".into(), Kind::integer()),
("req_id".into(), Kind::bytes()),
])
}

fn kind_error() -> BTreeMap<Field, Kind> {
BTreeMap::from([
("timestamp".into(), Kind::timestamp()),
Expand Down Expand Up @@ -259,6 +294,64 @@ mod tests {
tdef: TypeDef::object(kind_combined()).fallible(),
}

ingress_nginx_upstreaminfo_valid_without_optional_fields {
args: func_args![
value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
format: "ingress_upstreaminfo"
],
want: Ok(btreemap! {
"remote_addr" => "0.0.0.0",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
"request" => "GET /some/path HTTP/2.0",
"method" => "GET",
"path" => "/some/path",
"protocol" => "HTTP/2.0",
"status" => 200,
"body_bytes_size" => 12312,
"http_referer" => "https://10.0.0.1/some/referer",
"http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"request_length" => 462,
"request_time" => 0.050,
"proxy_upstream_name" => "some-upstream-service-9000",
"upstream_addr" => "10.0.50.80:9000",
"upstream_response_length" => 19437,
"upstream_response_time" => 0.049,
"upstream_status" => 200,
"req_id" => "752178adb17130b291aefd8c386279e7",
}),
tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
}

ingress_nginx_upstreaminfo_valid_all_fields {
args: func_args![
value: r#"0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
format: "ingress_upstreaminfo"
],
want: Ok(btreemap! {
"remote_addr" => "0.0.0.0",
"remote_user" => "bob",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
"request" => "GET /some/path HTTP/2.0",
"method" => "GET",
"path" => "/some/path",
"protocol" => "HTTP/2.0",
"status" => 200,
"body_bytes_size" => 12312,
"http_referer" => "https://10.0.0.1/some/referer",
"http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"request_length" => 462,
"request_time" => 0.050,
"proxy_upstream_name" => "some-upstream-service-9000",
"proxy_alternative_upstream_name" => "some-other-upstream-5000",
"upstream_addr" => "10.0.50.80:9000",
"upstream_response_length" => 19437,
"upstream_response_time" => 0.049,
"upstream_status" => 200,
"req_id" => "752178adb17130b291aefd8c386279e7",
}),
tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
}

error_line_valid {
args: func_args![
value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
Expand Down

0 comments on commit 61af420

Please sign in to comment.