Skip to content

Commit

Permalink
Remove std_unicode::str::is_utf16
Browse files Browse the repository at this point in the history
It was only accessible through the `#[unstable]` crate std_unicode.

It has never been used in the compiler or standard library
since 47e7a05 added it in 2012
“for OS API interop”.
It can be replaced with a one-liner:

```rust
fn is_utf16(slice: &[u16]) -> bool {
    std::char::decode_utf16(s.iter().cloned()).all(|r| r.is_ok())
}
```
  • Loading branch information
SimonSapin committed Mar 2, 2017
1 parent 031f9b1 commit 24b39c5
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 89 deletions.
65 changes: 0 additions & 65 deletions src/libcollectionstest/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,71 +540,6 @@ fn from_utf8_mostly_ascii() {
}
}

#[test]
fn test_is_utf16() {
use std_unicode::str::is_utf16;

macro_rules! pos {
($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
}

// non-surrogates
pos!(&[0x0000],
&[0x0001, 0x0002],
&[0xD7FF],
&[0xE000]);

// surrogate pairs (randomly generated with Python 3's
// .encode('utf-16be'))
pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
&[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
&[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);

// mixtures (also random)
pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
&[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
&[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);

// negative tests
macro_rules! neg {
($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } }
}

neg!(
// surrogate + regular unit
&[0xdb45, 0x0000],
// surrogate + lead surrogate
&[0xd900, 0xd900],
// unterminated surrogate
&[0xd8ff],
// trail surrogate without a lead
&[0xddb7]);

// random byte sequences that Python 3's .decode('utf-16be')
// failed on
neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
&[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
&[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
&[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
&[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
&[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
&[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
&[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
&[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
&[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
&[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
&[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
&[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
&[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
&[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
&[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
&[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
&[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
&[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
&[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
&[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
}

#[test]
fn test_as_bytes() {
// no null
Expand Down
2 changes: 1 addition & 1 deletion src/libcollectionstest/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ fn test_from_utf16() {
let s_as_utf16 = s.encode_utf16().collect::<Vec<u16>>();
let u_as_string = String::from_utf16(&u).unwrap();

assert!(::std_unicode::str::is_utf16(&u));
assert!(::std_unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok()));
assert_eq!(s_as_utf16, u);

assert_eq!(u_as_string, s);
Expand Down
1 change: 0 additions & 1 deletion src/libstd_unicode/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ pub mod char;
#[allow(deprecated)]
pub mod str {
pub use u_str::{SplitWhitespace, UnicodeStr};
pub use u_str::is_utf16;
pub use u_str::Utf16Encoder;
}

Expand Down
22 changes: 0 additions & 22 deletions src/libstd_unicode/u_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,6 @@ impl UnicodeStr for str {
}
}

/// Determines if a vector of `u16` contains valid UTF-16
pub fn is_utf16(v: &[u16]) -> bool {
let mut it = v.iter();
macro_rules! next { ($ret:expr) => {
match it.next() { Some(u) => *u, None => return $ret }
}
}
loop {
let u = next!(true);

match char::from_u32(u as u32) {
Some(_) => {}
None => {
let u2 = next!(false);
if u < 0xD7FF || u > 0xDBFF || u2 < 0xDC00 || u2 > 0xDFFF {
return false;
}
}
}
}
}

/// Iterator adaptor for encoding `char`s to UTF-16.
#[derive(Clone)]
pub struct Utf16Encoder<I> {
Expand Down

0 comments on commit 24b39c5

Please sign in to comment.