Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for lazy matchers #185

Merged
merged 1 commit into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 139 additions & 100 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# flake8: noqa
import io
from contextlib import suppress
from contextlib import suppress, contextmanager
from os import fspath
from pathlib import Path
from typing import Optional, List, Dict
Expand Down Expand Up @@ -52,21 +52,6 @@ def run(self) -> None:
f"Unable to find regexes.yaml, should be at {yaml_src!r}"
)

def write_matcher(f, typ: str, fields: List[Optional[object]]):
f.write(f" {typ}(".encode())
while len(fields) > 1 and fields[-1] is None:
fields = fields[:-1]
f.write(", ".join(map(repr, fields)).encode())
f.write(b"),\n")

def write_params(fields):
# strip trailing None values
while len(fields) > 1 and fields[-1] is None:
fields.pop()

for field in fields:
fp.write((f" {field!r},\n").encode())

with yaml_src.open("rb") as f:
regexes = yaml.safe_load(f)

Expand All @@ -79,96 +64,150 @@ def write_params(fields):
outdir.mkdir(parents=True, exist_ok=True)

dest = outdir / "_matchers.py"
dest_lazy = outdir / "_lazy.py"
dest_legacy = outdir / "_regexes.py"

with dest.open("wb") as f, dest_legacy.open("wb") as fp:
# fmt: off
f.write(b"""\
with dest.open("wb") as eager, dest_lazy.open("wb") as lazy, dest_legacy.open(
"wb"
) as legacy:
eager = EagerWriter(eager)
lazy = LazyWriter(lazy)
legacy = LegacyWriter(legacy)

for section in ["user_agent_parsers", "os_parsers", "device_parsers"]:
with eager.section(section), lazy.section(section), legacy.section(
section
):
extract = EXTRACTORS[section]
for p in regexes[section]:
el = trim(extract(p))
eager.item(el)
lazy.item(el)
legacy.item(el)
eager.end()
lazy.end()
legacy.end()


def trim(l):
while len(l) > 1 and l[-1] is None:
l.pop()
return l


EXTRACTORS = {
"user_agent_parsers": lambda p: [
p["regex"],
p.get("family_replacement"),
p.get("v1_replacement"),
p.get("v2_replacement"),
],
"os_parsers": lambda p: [
p["regex"],
p.get("os_replacement"),
p.get("os_v1_replacement"),
p.get("os_v2_replacement"),
p.get("os_v3_replacement"),
p.get("os_v4_replacement"),
],
"device_parsers": lambda p: [
p["regex"],
p.get("regex_flag"),
p.get("device_replacement"),
p.get("brand_replacement"),
p.get("model_replacement"),
],
}


class Writer:
section_end = b""

def __init__(self, fp):
self.fp = fp
self.fp.write(
b"""\
########################################################
# NOTICE: this file is autogenerated from regexes.yaml #
########################################################
"""
)
self.fp.write(self.prefix)
self._section = None

@contextmanager
def section(self, id):
self._section = id
self.fp.write(self.sections[id])
yield
self.fp.write(self.section_end)

def item(self, elements):
# DeviceMatcher(re, flag, repl1),
self.fp.write(self.items[self._section])
self.fp.write(", ".join(map(repr, elements)).encode())
self.fp.write(b"),\n")

def end(self):
self.fp.write(self.suffix)


class LegacyWriter(Writer):
prefix = b"""\
__all__ = [
"USER_AGENT_PARSERS",
"DEVICE_PARSERS",
"OS_PARSERS",
]

from .user_agent_parser import UserAgentParser, DeviceParser, OSParser

"""
sections = {
"user_agent_parsers": b"USER_AGENT_PARSERS = [\n",
"os_parsers": b"\n\nOS_PARSERS = [\n",
"device_parsers": b"\n\nDEVICE_PARSERS = [\n",
}
section_end = b"]"
items = {
"user_agent_parsers": b" UserAgentParser(",
"os_parsers": b" OSParser(",
"device_parsers": b" DeviceParser(",
}
suffix = b"\n"


class EagerWriter(Writer):
prefix = b"""\
__all__ = ["MATCHERS"]

from typing import Tuple, List
from .core import UserAgentMatcher, OSMatcher, DeviceMatcher

MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([
"""
sections = {
"user_agent_parsers": b"",
"os_parsers": b"], [\n",
"device_parsers": b"], [\n",
}
items = {
"user_agent_parsers": b" UserAgentMatcher(",
"os_parsers": b" OSMatcher(",
"device_parsers": b" DeviceMatcher(",
}
suffix = b"])\n"


class LazyWriter(EagerWriter):
prefix = b"""\
__all__ = ["MATCHERS"]

from typing import Tuple, List
from .lazy import UserAgentMatcher, OSMatcher, DeviceMatcher

from .core import Matchers, UserAgentMatcher, OSMatcher, DeviceMatcher

MATCHERS: Matchers = ([
""")
fp.write(b"# -*- coding: utf-8 -*-\n")
fp.write(b"########################################################\n")
fp.write(b"# NOTICE: This file is autogenerated from regexes.yaml #\n")
fp.write(b"########################################################\n")
fp.write(b"\n")
fp.write(b"from .user_agent_parser import (\n")
fp.write(b" UserAgentParser, DeviceParser, OSParser,\n")
fp.write(b")\n")
fp.write(b"\n")
fp.write(b"__all__ = ('USER_AGENT_PARSERS', 'DEVICE_PARSERS', 'OS_PARSERS')\n")
fp.write(b"\n")
fp.write(b"USER_AGENT_PARSERS = [\n")
for device_parser in regexes["user_agent_parsers"]:
write_matcher(f, "UserAgentMatcher", [
device_parser["regex"],
device_parser.get("family_replacement"),
device_parser.get("v1_replacement"),
device_parser.get("v2_replacement"),
])

fp.write(b" UserAgentParser(\n")
write_params([
device_parser["regex"],
device_parser.get("family_replacement"),
device_parser.get("v1_replacement"),
device_parser.get("v2_replacement"),
])
fp.write(b" ),\n")
f.write(b" ], [\n")
fp.write(b"]\n\n")

fp.write(b"OS_PARSERS = [\n")
for device_parser in regexes["os_parsers"]:
write_matcher(f, "OSMatcher", [
device_parser["regex"],
device_parser.get("os_replacement"),
device_parser.get("os_v1_replacement"),
device_parser.get("os_v2_replacement"),
device_parser.get("os_v3_replacement"),
device_parser.get("os_v4_replacement"),
])

fp.write(b" OSParser(\n")
write_params([
device_parser["regex"],
device_parser.get("os_replacement"),
device_parser.get("os_v1_replacement"),
device_parser.get("os_v2_replacement"),
device_parser.get("os_v3_replacement"),
device_parser.get("os_v4_replacement"),
])
fp.write(b" ),\n")
f.write(b" ], [\n")
fp.write(b"]\n\n")

fp.write(b"DEVICE_PARSERS = [\n")
for device_parser in regexes["device_parsers"]:
write_matcher(f, "DeviceMatcher", [
device_parser["regex"],
device_parser.get("regex_flag"),
device_parser.get("device_replacement"),
device_parser.get("brand_replacement"),
device_parser.get("model_replacement"),
])

fp.write(b" DeviceParser(\n")
write_params([
device_parser["regex"],
device_parser.get("regex_flag"),
device_parser.get("device_replacement"),
device_parser.get("brand_replacement"),
device_parser.get("model_replacement"),
])
fp.write(b" ),\n")
f.write(b"])\n")
fp.write(b"]\n")
# fmt: on
MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([
"""


setup(
Expand Down
5 changes: 3 additions & 2 deletions src/ua_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"UserAgent",
"UserAgentMatcher",
"load_builtins",
"load_lazy_builtins",
"load_data",
"load_yaml",
"parse",
Expand Down Expand Up @@ -65,7 +66,7 @@
)
from .basic import Parser as BasicParser
from .caching import CachingParser, Clearing, LRU, Locking
from .loaders import load_builtins, load_data, load_yaml
from .loaders import load_builtins, load_lazy_builtins, load_data, load_yaml

Re2Parser: Optional[Callable[[Matchers], Parser]] = None
with contextlib.suppress(ImportError):
Expand All @@ -79,7 +80,7 @@ def __getattr__(name: str) -> Parser:
global parser
if name == "parser":
if Re2Parser is not None:
parser = Re2Parser(load_builtins())
parser = Re2Parser(load_lazy_builtins())
else:
parser = CachingParser(
BasicParser(load_builtins()),
Expand Down
10 changes: 10 additions & 0 deletions src/ua_parser/_lazy.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__all__ = ["MATCHERS"]

from typing import Tuple, List
from .lazy import UserAgentMatcher, OSMatcher, DeviceMatcher

MATCHERS: Tuple[
List[UserAgentMatcher],
List[OSMatcher],
List[DeviceMatcher],
]
11 changes: 9 additions & 2 deletions src/ua_parser/_matchers.pyi
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from .core import Matchers
__all__ = ["MATCHERS"]

MATCHERS: Matchers
from typing import Tuple, List
from .core import UserAgentMatcher, OSMatcher, DeviceMatcher

MATCHERS: Tuple[
List[UserAgentMatcher],
List[OSMatcher],
List[DeviceMatcher],
]
7 changes: 4 additions & 3 deletions src/ua_parser/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Device,
DeviceMatcher,
Domain,
Matcher,
Matchers,
OS,
OSMatcher,
Expand All @@ -23,9 +24,9 @@ class Parser(AbstractParser):
when one matches.
"""

user_agent_matchers: List[UserAgentMatcher]
os_matchers: List[OSMatcher]
device_matchers: List[DeviceMatcher]
user_agent_matchers: List[Matcher[UserAgent]]
os_matchers: List[Matcher[OS]]
device_matchers: List[Matcher[Device]]

def __init__(
self,
Expand Down
Loading
Loading