Skip to content

Commit

Permalink
Merge pull request #1189 from lark-parser/pr1179
Browse files Browse the repository at this point in the history
Minor adjustments to PR #1179
  • Loading branch information
erezsh committed Sep 11, 2022
2 parents ed53bfd + c02d912 commit 8ab4c3b
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 27 deletions.
66 changes: 40 additions & 26 deletions lark/lark.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod
import getpass
import sys, os, pickle, hashlib
import tempfile
import types
Expand Down Expand Up @@ -308,28 +309,38 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
if self.options.cache is not True:
raise ConfigurationError("cache argument must be bool or str")

cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2])

if FS.exists(cache_fn):
logger.debug('Loading grammar from cache: %s', cache_fn)
# Remove options that aren't relevant for loading from cache
for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
del options[name]
try:
username = getpass.getuser()
except Exception:
# The exception raised may be ImportError or OSError in
# the future. For the cache, we don't care about the
# specific reason - we just want a username.
username = "unknown"

cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_md5, *sys.version_info[:2])

old_options = self.options
try:
with FS.open(cache_fn, 'rb') as f:
old_options = self.options
try:
file_md5 = f.readline().rstrip(b'\n')
cached_used_files = pickle.load(f)
if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
cached_parser_data = pickle.load(f)
self._load(cached_parser_data, **options)
return
except Exception: # We should probably narrow done which errors we catch here.
logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn)

# In theory, the Lark instance might have been messed up by the call to `_load`.
# In practice the only relevant thing that might have been overriden should be `options`
self.options = old_options
logger.debug('Loading grammar from cache: %s', cache_fn)
# Remove options that aren't relevant for loading from cache
for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
del options[name]
file_md5 = f.readline().rstrip(b'\n')
cached_used_files = pickle.load(f)
if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
cached_parser_data = pickle.load(f)
self._load(cached_parser_data, **options)
return
except FileNotFoundError:
# The cache file doesn't exist; parse and compose the grammar as normal
pass
except Exception: # We should probably narrow done which errors we catch here.
logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)

# In theory, the Lark instance might have been messed up by the call to `_load`.
# In practice the only relevant thing that might have been overwritten should be `options`
self.options = old_options


# Parse the grammar file and compose the grammars
Expand Down Expand Up @@ -421,11 +432,14 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:

if cache_fn:
logger.debug('Saving grammar to cache: %s', cache_fn)
with FS.open(cache_fn, 'wb') as f:
assert cache_md5 is not None
f.write(cache_md5.encode('utf8') + b'\n')
pickle.dump(used_files, f)
self.save(f, _LOAD_ALLOWED_OPTIONS)
try:
with FS.open(cache_fn, 'wb') as f:
assert cache_md5 is not None
f.write(cache_md5.encode('utf8') + b'\n')
pickle.dump(used_files, f)
self.save(f, _LOAD_ALLOWED_OPTIONS)
except IOError as e:
logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)

if __doc__:
__doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
Expand Down
5 changes: 4 additions & 1 deletion tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ class MockFS:
def __init__(self):
self.files = {}

def open(self, name, mode=None):
def open(self, name, mode="r", **kwargs):
if name not in self.files:
if "r" in mode:
# If we are reading from a file, it should already exist
raise FileNotFoundError(name)
f = self.files[name] = MockFile()
else:
f = self.files[name]
Expand Down

0 comments on commit 8ab4c3b

Please sign in to comment.