Skip to content

Commit

Permalink
feat(stdlib): add punycode encoding functions (#672)
Browse files Browse the repository at this point in the history
* feat(stdlib): add punycode encoding functions

This adds `encode_punycode` and `decode_punycode` functions.
It also adds tests to confirm `parse_url` function behavior when it comes to punycode.

Fixes: #659

* Fix changelog entry PR reference

* Add tests and examples of fully ASCII strings to punycode functions

* Add benches for punycode related functions

* Add VRL tests for punycode encoding

* Make punycode functions fallible

* Rename `err` to `errors` in `map_err`
  • Loading branch information
esensar committed Feb 7, 2024
1 parent 40cbdee commit 114bb3c
Show file tree
Hide file tree
Showing 12 changed files with 278 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ stdlib = [
"dep:hex",
"dep:hmac",
"dep:hostname",
"dep:idna",
"dep:indexmap",
"dep:md-5",
"dep:nom",
Expand Down Expand Up @@ -123,6 +124,7 @@ exitcode = {version = "1", optional = true }
flate2 = { version = "1.0.28", default-features = false, features = ["default"], optional = true }
hex = { version = "0.4", optional = true }
hmac = { version = "0.12.1", optional = true }
idna = { version = "0.5", optional = true }
iana-time-zone = "0.1.59"
indexmap = { version = "~2.2.2", default-features = false, features = ["std"], optional = true}
indoc = {version = "2.0.4", optional = true }
Expand Down
30 changes: 30 additions & 0 deletions benches/stdlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ criterion_group!(
decode_base16,
decode_base64,
decode_percent,
decode_punycode,
decrypt,
// TODO: Cannot pass a Path to bench_function
//del,
Expand All @@ -35,6 +36,7 @@ criterion_group!(
encode_json,
encode_logfmt,
encode_percent,
encode_punycode,
encrypt,
ends_with,
// TODO: Cannot pass a Path to bench_function
Expand Down Expand Up @@ -301,6 +303,20 @@ bench_function! {
}
}

bench_function! {
decode_punycode => vrl::stdlib::DecodePunycode;

encoded {
args: func_args![value: "www.xn--caf-dma.com"],
want: Ok("www.café.com"),
}

non_encoded {
args: func_args![value: "www.cafe.com"],
want: Ok("www.cafe.com"),
}
}

bench_function! {
decode_mime_q => vrl::stdlib::DecodeMimeQ;

Expand Down Expand Up @@ -443,6 +459,20 @@ bench_function! {
}
}

bench_function! {
encode_punycode => vrl::stdlib::EncodePunycode;

idn {
args: func_args![value: "www.CAFé.com"],
want: Ok("www.xn--caf-dma.com"),
}

ascii {
args: func_args![value: "www.cafe.com"],
want: Ok("www.cafe.com"),
}
}

bench_function! {
ends_with => vrl::stdlib::EndsWith;

Expand Down
1 change: 1 addition & 0 deletions changelog.d/672.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added `encode_punycode` and `decode_punycode` functions
3 changes: 3 additions & 0 deletions lib/tests/tests/functions/punycode/decode.vrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# result: "www.porquénopuedensimplementehablarenespañol.com"

decode_punycode!("www.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com")
3 changes: 3 additions & 0 deletions lib/tests/tests/functions/punycode/encode.vrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# result: "www.xn--ihqwcrb4cv8a8dqg056pqjye.com"

encode_punycode!("www.他们为什么不说中文.com")
5 changes: 5 additions & 0 deletions lib/tests/tests/functions/punycode/encode_decode.vrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# object: { "host": "www.ليهمابتكلموشعربي؟.他们为什么不说中文" }
# result: "www.ليهمابتكلموشعربي؟.他们为什么不说中文"

encoded = encode_punycode!(.host)
decode_punycode!(encoded)
12 changes: 12 additions & 0 deletions lib/tests/tests/functions/punycode/url_encode_decode.vrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# object: { "url": "https://www.CAFé.com" }
# result: { "host": "www.xn--caf-dma.com", "host_decoded": "www.café.com" }

# parse url
parsed_url = parse_url!(.url)

# delete url - no longer needed
del(.url)

.host = parsed_url.host
.host_decoded = decode_punycode!(.host)
.
87 changes: 87 additions & 0 deletions src/stdlib/decode_punycode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
use crate::compiler::prelude::*;

#[derive(Clone, Copy, Debug)]
pub struct DecodePunycode;

impl Function for DecodePunycode {
fn identifier(&self) -> &'static str {
"decode_punycode"
}

fn parameters(&self) -> &'static [Parameter] {
&[Parameter {
keyword: "value",
kind: kind::BYTES,
required: true,
}]
}

fn compile(
&self,
_state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");

Ok(DecodePunycodeFn { value }.as_expr())
}

fn examples(&self) -> &'static [Example] {
&[
Example {
title: "punycode string",
source: r#"decode_punycode!("www.xn--caf-dma.com")"#,
result: Ok("www.café.com"),
},
Example {
title: "ascii string",
source: r#"decode_punycode!("www.cafe.com")"#,
result: Ok("www.cafe.com"),
},
]
}
}

#[derive(Clone, Debug)]
struct DecodePunycodeFn {
value: Box<dyn Expression>,
}

impl FunctionExpression for DecodePunycodeFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let value = self.value.resolve(ctx)?;
let string = value.try_bytes_utf8_lossy()?;

let (encoded, result) = idna::domain_to_unicode(&string);
result.map_err(|errors| format!("unable to decode punycode: {errors}"))?;

Ok(encoded.into())
}

fn type_def(&self, _: &state::TypeState) -> TypeDef {
TypeDef::bytes().fallible()
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::value;

test_function![
decode_punycode => DecodePunycode;

demo_string {
args: func_args![value: value!("www.xn--caf-dma.com")],
want: Ok(value!("www.café.com")),
tdef: TypeDef::bytes().fallible(),
}

ascii_string {
args: func_args![value: value!("www.cafe.com")],
want: Ok(value!("www.cafe.com")),
tdef: TypeDef::bytes().fallible(),
}
];
}
98 changes: 98 additions & 0 deletions src/stdlib/encode_punycode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use crate::compiler::prelude::*;

#[derive(Clone, Copy, Debug)]
pub struct EncodePunycode;

impl Function for EncodePunycode {
fn identifier(&self) -> &'static str {
"encode_punycode"
}

fn parameters(&self) -> &'static [Parameter] {
&[Parameter {
keyword: "value",
kind: kind::BYTES,
required: true,
}]
}

fn compile(
&self,
_state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");

Ok(EncodePunycodeFn { value }.as_expr())
}

fn examples(&self) -> &'static [Example] {
&[
Example {
title: "IDN string",
source: r#"encode_punycode!("www.café.com")"#,
result: Ok("www.xn--caf-dma.com"),
},
Example {
title: "mixed case string",
source: r#"encode_punycode!("www.CAFé.com")"#,
result: Ok("www.xn--caf-dma.com"),
},
Example {
title: "ascii string",
source: r#"encode_punycode!("www.cafe.com")"#,
result: Ok("www.cafe.com"),
},
]
}
}

#[derive(Clone, Debug)]
struct EncodePunycodeFn {
value: Box<dyn Expression>,
}

impl FunctionExpression for EncodePunycodeFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let value = self.value.resolve(ctx)?;
let string = value.try_bytes_utf8_lossy()?;

let encoded = idna::domain_to_ascii(&string)
.map_err(|errors| format!("unable to encode to punycode: {errors}"))?;

Ok(encoded.into())
}

fn type_def(&self, _: &state::TypeState) -> TypeDef {
TypeDef::bytes().fallible()
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::value;

test_function![
encode_punycode => EncodePunycode;

idn_string {
args: func_args![value: value!("www.café.com")],
want: Ok(value!("www.xn--caf-dma.com")),
tdef: TypeDef::bytes().fallible(),
}

mixed_case {
args: func_args![value: value!("www.CAFé.com")],
want: Ok(value!("www.xn--caf-dma.com")),
tdef: TypeDef::bytes().fallible(),
}

ascii_string {
args: func_args![value: value!("www.cafe.com")],
want: Ok(value!("www.cafe.com")),
tdef: TypeDef::bytes().fallible(),
}
];
}
6 changes: 6 additions & 0 deletions src/stdlib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ cfg_if::cfg_if! {
mod decode_gzip;
mod decode_mime_q;
mod decode_percent;
mod decode_punycode;
mod decode_snappy;
mod decode_zlib;
mod decode_zstd;
Expand All @@ -66,6 +67,7 @@ cfg_if::cfg_if! {
mod encode_key_value;
mod encode_logfmt;
mod encode_percent;
mod encode_punycode;
mod encode_snappy;
mod encode_zlib;
mod encode_zstd;
Expand Down Expand Up @@ -220,6 +222,7 @@ cfg_if::cfg_if! {
pub use decode_gzip::DecodeGzip;
pub use decode_mime_q::DecodeMimeQ;
pub use decode_percent::DecodePercent;
pub use decode_punycode::DecodePunycode;
pub use decode_snappy::DecodeSnappy;
pub use decode_zlib::DecodeZlib;
pub use decode_zstd::DecodeZstd;
Expand All @@ -233,6 +236,7 @@ cfg_if::cfg_if! {
pub use encode_key_value::EncodeKeyValue;
pub use encode_logfmt::EncodeLogfmt;
pub use encode_percent::EncodePercent;
pub use encode_punycode::EncodePunycode;
pub use encode_snappy::EncodeSnappy;
pub use encode_zlib::EncodeZlib;
pub use encode_zstd::EncodeZstd;
Expand Down Expand Up @@ -390,6 +394,7 @@ pub fn all() -> Vec<Box<dyn Function>> {
Box::new(DecodeBase64),
Box::new(DecodeGzip),
Box::new(DecodePercent),
Box::new(DecodePunycode),
Box::new(DecodeMimeQ),
Box::new(DecodeSnappy),
Box::new(DecodeZlib),
Expand All @@ -404,6 +409,7 @@ pub fn all() -> Vec<Box<dyn Function>> {
Box::new(EncodeKeyValue),
Box::new(EncodeLogfmt),
Box::new(EncodePercent),
Box::new(EncodePunycode),
Box::new(EncodeSnappy),
Box::new(EncodeZlib),
Box::new(EncodeZstd),
Expand Down
30 changes: 30 additions & 0 deletions src/stdlib/parse_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,5 +208,35 @@ mod tests {
})),
tdef: TypeDef::object(inner_kind()).fallible(),
}

punycode {
args: func_args![value: value!("https://www.café.com")],
want: Ok(value!({
fragment: (),
host: "www.xn--caf-dma.com",
password: "",
path: "/",
port: (),
query: {},
scheme: "https",
username: "",
})),
tdef: TypeDef::object(inner_kind()).fallible(),
}

punycode_mixed_case {
args: func_args![value: value!("https://www.CAFé.com")],
want: Ok(value!({
fragment: (),
host: "www.xn--caf-dma.com",
password: "",
path: "/",
port: (),
query: {},
scheme: "https",
username: "",
})),
tdef: TypeDef::object(inner_kind()).fallible(),
}
];
}

0 comments on commit 114bb3c

Please sign in to comment.