Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize path #86

Merged
merged 5 commits into from
Jun 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
CHANGES
=======

0.10.3 (2017-06-13)
-------------------

* Prevent double URL args unquoting #83

0.10.2 (2017-05-05)
-------------------

Expand Down
21 changes: 21 additions & 0 deletions tests/test_normalize_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from yarl import _normalize_path as np


def test_no_dots():
assert np('path/to') == 'path/to'


def test_skip_dots():
assert np('path/./to') == 'path/to'


def test_dot_at_end():
assert np('path/to/.') == 'path/to/'


def test_double_dots():
assert np('path/../to') == 'to'


def test_extra_double_dots():
assert np('path/../../to') == 'to'
50 changes: 50 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def test_origin_no_scheme():
url.origin()


def test_drop_dots():
u = URL('http://example.com/path/../to')
assert str(u) == 'http://example.com/to'


def test_abs_cmp():
assert URL('http://example.com:8888') == URL('http://example.com:8888')
assert URL('http://example.com:8888/') == URL('http://example.com:8888/')
Expand Down Expand Up @@ -246,6 +251,11 @@ def test_query_dont_unqoute_twice():
assert url.query['url'] == sample_url


def test_query_nonascii():
url = URL('http://example.com?ключ=знач')
assert url.query == MultiDict({'ключ': 'знач'})


def test_raw_fragment_empty():
url = URL('http://example.com')
assert '' == url.raw_fragment
Expand Down Expand Up @@ -490,6 +500,11 @@ def test_div_with_colon_and_at():
assert url.raw_path == '/base/path:abc@123'


def test_div_with_dots():
url = URL('http://example.com/base') / '../path/./to'
assert url.raw_path == '/path/to'


# comparison and hashing

def test_ne_str():
Expand Down Expand Up @@ -728,6 +743,8 @@ def test_with_port_invalid_type():
with pytest.raises(TypeError):
URL('http://example.com').with_port('123')

# with_path


def test_with_path():
url = URL('http://example.com')
Expand All @@ -741,6 +758,18 @@ def test_with_path_encoded():
) == 'http://example.com/test'


def test_with_path_dots():
url = URL('http://example.com')
assert str(url.with_path('/test/.')) == 'http://example.com/test/'


def test_with_path_relative():
url = URL('/path')
assert str(url.with_path('/new')) == '/new'


# with_query

def test_with_query():
url = URL('http://example.com')
assert str(url.with_query({'a': '1'})) == 'http://example.com/?a=1'
Expand Down Expand Up @@ -910,6 +939,8 @@ def test_with_fragment_bad_type():
with pytest.raises(TypeError):
url.with_fragment(123)

# with_name


def test_with_name():
url = URL('http://example.com/a/b')
Expand Down Expand Up @@ -975,6 +1006,16 @@ def test_with_name_within_colon_and_at():
url = URL('http://example.com/oldpath').with_name('path:abc@123')
assert url.raw_path == '/path:abc@123'


def test_with_name_dot():
with pytest.raises(ValueError):
URL('http://example.com').with_name('.')


def test_with_name_double_dot():
with pytest.raises(ValueError):
URL('http://example.com').with_name('..')

# is_absolute


Expand Down Expand Up @@ -1544,3 +1585,12 @@ def test_build_query_quoting():
assert u == URL('http://127.0.0.1/файл.jpg?arg=Привет')
assert str(u) == ('http://127.0.0.1/%D1%84%D0%B0%D0%B9%D0%BB.jpg?'
'arg=%D0%9F%D1%80%D0%B8%D0%B2%D0%B5%D1%82')


def test_build_drop_dots():
u = URL.build(
scheme='http',
host='example.com',
path='/path/../to',
)
assert str(u) == 'http://example.com/to'
63 changes: 52 additions & 11 deletions yarl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .quoting import quote, unquote

__version__ = '0.10.2'
__version__ = '0.10.3'

__all__ = ['URL']

Expand Down Expand Up @@ -181,13 +181,14 @@ def __init__(self, val='', *, encoded=False, strict=False):
user += ':' + _quote(val.password)
netloc = user + '@' + netloc

val = SplitResult(
val[0], # scheme
netloc,
_quote(val[2], safe='+@:', protected='/+', strict=strict),
query=_quote(val[3], safe='=+&?/:@',
protected=PROTECT_CHARS, qs=True, strict=strict),
fragment=_quote(val[4], safe='?/:@', strict=strict))
path = _quote(val[2], safe='+@:', protected='/+', strict=strict)
if netloc:
path = _normalize_path(path)

query = _quote(val[3], safe='=+&?/:@',
protected=PROTECT_CHARS, qs=True, strict=strict)
fragment = _quote(val[4], safe='?/:@', strict=strict)
val = SplitResult(val[0], netloc, path, query, fragment)

self._val = val
self._cache = {}
Expand All @@ -207,11 +208,16 @@ def build(cls, *, scheme='', user='', password='', host='', port=None,
raise ValueError(
"Only one of \"query\" or \"query_string\" should be passed")

netloc = cls._make_netloc(user, password, host, port)
path = _quote(path, safe='@:', protected='/')
if netloc:
path = _normalize_path(path)

url = cls(
SplitResult(
scheme,
cls._make_netloc(user, password, host, port),
_quote(path, safe='@:', protected='/'),
netloc,
path,
_quote(query_string),
fragment
),
Expand Down Expand Up @@ -290,6 +296,8 @@ def __truediv__(self, name):
parts = path.rstrip('/').split('/')
parts.append(name)
new_path = '/'.join(parts)
if self.is_absolute():
new_path = _normalize_path(new_path)
return URL(self._val._replace(path=new_path, query='', fragment=''),
encoded=True)

Expand Down Expand Up @@ -464,7 +472,8 @@ def query(self):
Empty value if URL has no query part.

"""
ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True))
ret = MultiDict(parse_qsl(self.raw_query_string,
keep_blank_values=True))
return MultiDictProxy(ret)

@property
Expand Down Expand Up @@ -708,6 +717,8 @@ def with_path(self, path, encoded=False):
"""Return a new URL with path replaced."""
if not encoded:
path = _quote(path, safe='@:', protected='/', strict=self._strict)
if self.is_absolute():
path = _normalize_path(path)
return URL(self._val._replace(path=path), encoded=True)

def with_query(self, *args, **kwargs):
Expand Down Expand Up @@ -830,6 +841,8 @@ def with_name(self, name):
if '/' in name:
raise ValueError("Slash in name is not allowed")
name = _quote(name, safe='@:', protected='/')
if name in ('.', '..'):
raise ValueError(". and .. values are forbidden")
parts = list(self.raw_parts)
if self.is_absolute():
if len(parts) == 1:
Expand Down Expand Up @@ -873,3 +886,31 @@ def human_repr(self):
self.path,
self.query_string,
self.fragment))


def _normalize_path(path):
# Drop '.' and '..' from path

segments = path.split('/')
resolved_path = []

for seg in segments:
if seg == '..':
try:
resolved_path.pop()
except IndexError:
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
pass
elif seg == '.':
continue
else:
resolved_path.append(seg)

if segments[-1] in ('.', '..'):
# do some post-processing here. if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append('')

return '/'.join(resolved_path)