Skip to content

Commit

Permalink
Merge pull request #1251 from wilrodriguez/master
Browse files Browse the repository at this point in the history
Replace md5 hashing with sha256.
  • Loading branch information
erezsh committed Mar 9, 2023
2 parents 514b3b2 + 71e175b commit b94cbc1
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 14 deletions.
16 changes: 8 additions & 8 deletions lark/lark.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .exceptions import ConfigurationError, assert_config, UnexpectedInput
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, md5_digest
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest
from .tree import Tree
from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType

Expand Down Expand Up @@ -292,7 +292,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
grammar = read()

cache_fn = None
cache_md5 = None
cache_sha256 = None
if isinstance(grammar, str):
self.source_grammar = grammar
if self.options.use_bytes:
Expand All @@ -307,7 +307,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
from . import __version__
s = grammar + options_str + __version__ + str(sys.version_info[:2])
cache_md5 = md5_digest(s)
cache_sha256 = sha256_digest(s)

if isinstance(self.options.cache, str):
cache_fn = self.options.cache
Expand All @@ -323,7 +323,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
# specific reason - we just want a username.
username = "unknown"

cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_md5, *sys.version_info[:2])
cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2])

old_options = self.options
try:
Expand All @@ -332,9 +332,9 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
# Remove options that aren't relevant for loading from cache
for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
del options[name]
file_md5 = f.readline().rstrip(b'\n')
file_sha256 = f.readline().rstrip(b'\n')
cached_used_files = pickle.load(f)
if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files):
cached_parser_data = pickle.load(f)
self._load(cached_parser_data, **options)
return
Expand Down Expand Up @@ -440,8 +440,8 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
logger.debug('Saving grammar to cache: %s', cache_fn)
try:
with FS.open(cache_fn, 'wb') as f:
assert cache_md5 is not None
f.write(cache_md5.encode('utf8') + b'\n')
assert cache_sha256 is not None
f.write(cache_sha256.encode('utf8') + b'\n')
pickle.dump(used_files, f)
self.save(f, _LOAD_ALLOWED_OPTIONS)
except IOError as e:
Expand Down
12 changes: 6 additions & 6 deletions lark/load_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,7 @@ def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], alia
except IOError:
continue
else:
h = md5_digest(text)
h = sha256_digest(text)
if self.used_files.get(joined_path, h) != h:
raise RuntimeError("Grammar file was changed during importing")
self.used_files[joined_path] = h
Expand Down Expand Up @@ -1393,7 +1393,7 @@ def verify_used_files(file_hashes):
if text is None: # We don't know how to load the path. ignore it.
continue

current = md5_digest(text)
current = sha256_digest(text)
if old != current:
logger.info("File %r changed, rebuilding Parser" % path)
return False
Expand All @@ -1411,13 +1411,13 @@ def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
return builder.build(), builder.used_files


def md5_digest(s: str) -> str:
"""Get the md5 digest of a string
def sha256_digest(s: str) -> str:
"""Get the sha256 digest of a string
Supports the `usedforsecurity` argument for Python 3.9+ to allow running on
a FIPS-enabled system.
"""
if sys.version_info >= (3, 9):
return hashlib.md5(s.encode('utf8'), usedforsecurity=False).hexdigest()
return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest()
else:
return hashlib.md5(s.encode('utf8')).hexdigest()
return hashlib.sha256(s.encode('utf8')).hexdigest()

0 comments on commit b94cbc1

Please sign in to comment.