-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(stdlib): add punycode encoding functions (#672)
* feat(stdlib): add punycode encoding functions This adds `encode_punycode` and `decode_punycode` functions. It also adds tests to confirm `parse_url` function behavior when it comes to punycode. Fixes: #659 * Fix changelog entry PR reference * Add tests and examples of fully ASCII strings to punycode functions * Add benches for punycode related functions * Add VRL tests for punycode encoding * Make punycode functions fallible * Rename `err` to `errors` in `map_err`
- Loading branch information
Showing
12 changed files
with
278 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Added `encode_punycode` and `decode_punycode` functions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# result: "www.porquénopuedensimplementehablarenespañol.com" | ||
|
||
decode_punycode!("www.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# result: "www.xn--ihqwcrb4cv8a8dqg056pqjye.com" | ||
|
||
encode_punycode!("www.他们为什么不说中文.com") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# object: { "host": "www.ليهمابتكلموشعربي؟.他们为什么不说中文" } | ||
# result: "www.ليهمابتكلموشعربي؟.他们为什么不说中文" | ||
|
||
encoded = encode_punycode!(.host) | ||
decode_punycode!(encoded) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# object: { "url": "https://www.CAFé.com" } | ||
# result: { "host": "www.xn--caf-dma.com", "host_decoded": "www.café.com" } | ||
|
||
# parse url | ||
parsed_url = parse_url!(.url) | ||
|
||
# delete url - no longer needed | ||
del(.url) | ||
|
||
.host = parsed_url.host | ||
.host_decoded = decode_punycode!(.host) | ||
. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
use crate::compiler::prelude::*; | ||
|
||
#[derive(Clone, Copy, Debug)] | ||
pub struct DecodePunycode; | ||
|
||
impl Function for DecodePunycode { | ||
fn identifier(&self) -> &'static str { | ||
"decode_punycode" | ||
} | ||
|
||
fn parameters(&self) -> &'static [Parameter] { | ||
&[Parameter { | ||
keyword: "value", | ||
kind: kind::BYTES, | ||
required: true, | ||
}] | ||
} | ||
|
||
fn compile( | ||
&self, | ||
_state: &state::TypeState, | ||
_ctx: &mut FunctionCompileContext, | ||
arguments: ArgumentList, | ||
) -> Compiled { | ||
let value = arguments.required("value"); | ||
|
||
Ok(DecodePunycodeFn { value }.as_expr()) | ||
} | ||
|
||
fn examples(&self) -> &'static [Example] { | ||
&[ | ||
Example { | ||
title: "punycode string", | ||
source: r#"decode_punycode!("www.xn--caf-dma.com")"#, | ||
result: Ok("www.café.com"), | ||
}, | ||
Example { | ||
title: "ascii string", | ||
source: r#"decode_punycode!("www.cafe.com")"#, | ||
result: Ok("www.cafe.com"), | ||
}, | ||
] | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug)] | ||
struct DecodePunycodeFn { | ||
value: Box<dyn Expression>, | ||
} | ||
|
||
impl FunctionExpression for DecodePunycodeFn { | ||
fn resolve(&self, ctx: &mut Context) -> Resolved { | ||
let value = self.value.resolve(ctx)?; | ||
let string = value.try_bytes_utf8_lossy()?; | ||
|
||
let (encoded, result) = idna::domain_to_unicode(&string); | ||
result.map_err(|errors| format!("unable to decode punycode: {errors}"))?; | ||
|
||
Ok(encoded.into()) | ||
} | ||
|
||
fn type_def(&self, _: &state::TypeState) -> TypeDef { | ||
TypeDef::bytes().fallible() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
use crate::value; | ||
|
||
test_function![ | ||
decode_punycode => DecodePunycode; | ||
|
||
demo_string { | ||
args: func_args![value: value!("www.xn--caf-dma.com")], | ||
want: Ok(value!("www.café.com")), | ||
tdef: TypeDef::bytes().fallible(), | ||
} | ||
|
||
ascii_string { | ||
args: func_args![value: value!("www.cafe.com")], | ||
want: Ok(value!("www.cafe.com")), | ||
tdef: TypeDef::bytes().fallible(), | ||
} | ||
]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
use crate::compiler::prelude::*; | ||
|
||
#[derive(Clone, Copy, Debug)] | ||
pub struct EncodePunycode; | ||
|
||
impl Function for EncodePunycode { | ||
fn identifier(&self) -> &'static str { | ||
"encode_punycode" | ||
} | ||
|
||
fn parameters(&self) -> &'static [Parameter] { | ||
&[Parameter { | ||
keyword: "value", | ||
kind: kind::BYTES, | ||
required: true, | ||
}] | ||
} | ||
|
||
fn compile( | ||
&self, | ||
_state: &state::TypeState, | ||
_ctx: &mut FunctionCompileContext, | ||
arguments: ArgumentList, | ||
) -> Compiled { | ||
let value = arguments.required("value"); | ||
|
||
Ok(EncodePunycodeFn { value }.as_expr()) | ||
} | ||
|
||
fn examples(&self) -> &'static [Example] { | ||
&[ | ||
Example { | ||
title: "IDN string", | ||
source: r#"encode_punycode!("www.café.com")"#, | ||
result: Ok("www.xn--caf-dma.com"), | ||
}, | ||
Example { | ||
title: "mixed case string", | ||
source: r#"encode_punycode!("www.CAFé.com")"#, | ||
result: Ok("www.xn--caf-dma.com"), | ||
}, | ||
Example { | ||
title: "ascii string", | ||
source: r#"encode_punycode!("www.cafe.com")"#, | ||
result: Ok("www.cafe.com"), | ||
}, | ||
] | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug)] | ||
struct EncodePunycodeFn { | ||
value: Box<dyn Expression>, | ||
} | ||
|
||
impl FunctionExpression for EncodePunycodeFn { | ||
fn resolve(&self, ctx: &mut Context) -> Resolved { | ||
let value = self.value.resolve(ctx)?; | ||
let string = value.try_bytes_utf8_lossy()?; | ||
|
||
let encoded = idna::domain_to_ascii(&string) | ||
.map_err(|errors| format!("unable to encode to punycode: {errors}"))?; | ||
|
||
Ok(encoded.into()) | ||
} | ||
|
||
fn type_def(&self, _: &state::TypeState) -> TypeDef { | ||
TypeDef::bytes().fallible() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
use crate::value; | ||
|
||
test_function![ | ||
encode_punycode => EncodePunycode; | ||
|
||
idn_string { | ||
args: func_args![value: value!("www.café.com")], | ||
want: Ok(value!("www.xn--caf-dma.com")), | ||
tdef: TypeDef::bytes().fallible(), | ||
} | ||
|
||
mixed_case { | ||
args: func_args![value: value!("www.CAFé.com")], | ||
want: Ok(value!("www.xn--caf-dma.com")), | ||
tdef: TypeDef::bytes().fallible(), | ||
} | ||
|
||
ascii_string { | ||
args: func_args![value: value!("www.cafe.com")], | ||
want: Ok(value!("www.cafe.com")), | ||
tdef: TypeDef::bytes().fallible(), | ||
} | ||
]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters