Skip to content

Commit

Permalink
feat(mezmo_redact): Support delimited credit card matching
Browse files Browse the repository at this point in the history
Update the credit card matching to support delimited
card numbers.
Fix bug in the IPv4 address pattern.
Always redact phone number pattern last to avoid conflicts
with credit card pattern
  • Loading branch information
biblicalph committed Jul 11, 2024
1 parent b456e73 commit ba489d5
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 149 deletions.
24 changes: 14 additions & 10 deletions src/stdlib/mezmo_patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,23 @@ pub(crate) const US_SOCIAL_SECURITY_NUMBER_PATTERN: &str = r#"(?x)
pub(crate) const EMAIL_ADDRESS_PATTERN: &str = r#"(?x)
(?i:[a-z0-9!\#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!\#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])
"#;
// See: https://docs.trellix.com/bundle/data-loss-prevention-11.10.x-classification-definitions-reference-guide/page/GUID-B8D29ECE-E70A-401E-B18D-B773F4FF71ED.html
// And: https://baymard.com/checkout-usability/credit-card-patterns
pub(crate) const CREDIT_CARD_PATTERN: &str = r#"(?x)
\b(?:4[0-9]{12}(?:[0-9]{3})?| # visa card numbers (starts with 4 and with a total of 13 or 16 digits)
[25][1-7][0-9]{14}| # mastercard numbers (old range: 51-57, new range: 21-27)
6(?:011|5[0-9][0-9])[0-9]{12}| # discover card numbers (starts with 6011 or 65)
3[47][0-9]{13}| # amex numbers (starts with 340 or 379)
3(?:0[0-5]|[68][0-9])[0-9]{11}| # diners club numbers (starts with 300-305 or 360-389)
(?:2131|1800|35\d{3})\d{11})\b # JCB card numbers (starts with 2131 or 1800 or 35)
"#;
\b
(?:4\d{3}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}|4\d{12})| # visa cards, 13 or 16 digits starting with 4. format pattern for 13 digit is unknown
(?:(?:2[2-7]|5[1-7])\d{2}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})| # mastercard, 16 digits. old range: 22-27, new range: 51-57
(?:(?:2131|1800|35\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})| # JCB cards, 16 digits (starts with 2131, 1800 or 35)
(?:(?:6011|64[4-9]\d|65\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})| # Discover cards, 16 digits (starts with 6011, 644, 645, 646, 647, 648, 649 or 65)
(?:3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5})| # AMEX cards, 15 digits starting with 34 or 37
(?:(?:30[0-5]\d|309\d|3[689]\d{2})[-\s]?\d{6}[-\s]?\d{4}) # Diner's club, 14 digits, starting with 300-305 or 309 or 36,38,39
\b
"#;
pub(crate) const IPV4_ADDRESS_PATTERN: &str = r#"(?x)
\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3} # first 3 octets (0.0.0.) with trailing period
(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b # last octet
\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3} # first 3 octets (0.0.0.) with trailing period
(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b # last octet
"#;
pub(crate) const PHONE_NUMBER_PATTERN: &str = r#"(?mx)
pub(crate) const PHONE_NUMBER_PATTERN: &str = r#"(?x)
(?:
(?: # optional country code
(?:\+\d{1,3}|\b\d{1,3})[\s.-]? # or used to prevent \b from consuming leading +
Expand Down
Loading

0 comments on commit ba489d5

Please sign in to comment.