Skip to content

Commit

Permalink
Fix a bug that invalid notation declaration may be accepted
Browse files Browse the repository at this point in the history
HackerOne: HO-1104077

It's caused by quote character.

Reported by Juho Nurminen. Thanks!!!
  • Loading branch information
kou authored and mame committed Apr 5, 2021
1 parent a659c63 commit 2fe62e2
Show file tree
Hide file tree
Showing 2 changed files with 234 additions and 6 deletions.
59 changes: 53 additions & 6 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ class BaseParser
ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /\A\s*<!NOTATION/um

This comment has been minimized.

Copy link
@MJjames43

MJjames43 May 1, 2021

ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})\s>/um
NOTATIONDECL_START = /\A\s*<!NOTATION/um

PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um

TEXT_PATTERN = /\A([^<]*)/um

Expand All @@ -103,6 +100,10 @@ class BaseParser
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um

NOTATIONDECL_START = /\A\s*<!NOTATION/um
PUBLIC = /\A\s*<!NOTATION\s+#{NAME}\s+(PUBLIC)\s+#{PUBIDLITERAL}(?:\s+#{SYSTEMLITERAL})?\s*>/um
SYSTEM = /\A\s*<!NOTATION\s+#{NAME}\s+(SYSTEM)\s+#{SYSTEMLITERAL}\s*>/um

EREFERENCE = /&(?!#{NAME};)/

DEFAULT_ENTITIES = {
Expand Down Expand Up @@ -315,12 +316,22 @@ def pull_event
md = nil
if @source.match( PUBLIC )
md = @source.match( PUBLIC, true )
vals = [md[1],md[2],md[4],md[6]]
pubid = system = nil
pubid_literal = md[3]
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
system_literal = md[4]
system = system_literal[1..-2] if system_literal # Remove quote
vals = [md[1], md[2], pubid, system]
elsif @source.match( SYSTEM )
md = @source.match( SYSTEM, true )
vals = [md[1],md[2],nil,md[4]]
system = nil
system_literal = md[3]
system = system_literal[1..-2] if system_literal # Remove quote
vals = [md[1], md[2], nil, system]
else
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
details = notation_decl_invalid_details
message = "Malformed notation declaration: #{details}"
raise REXML::ParseException.new(message, @source)
end
return [ :notationdecl, *vals ]
when DOCTYPE_END
Expand Down Expand Up @@ -569,6 +580,42 @@ def parse_attributes(prefixes, curr_ns)
end
return attributes, closed
end

def notation_decl_invalid_details
name = /#{NOTATIONDECL_START}\s+#{NAME}/um
public = /#{name}\s+PUBLIC/um
system = /#{name}\s+SYSTEM/um
if @source.match(/#{NOTATIONDECL_START}\s*>/um)
return "name is missing"
elsif not @source.match(/#{name}[\s>]/um)
return "invalid name"
elsif @source.match(/#{name}\s*>/um)
return "ID type is missing"
elsif not @source.match(/#{name}\s+(?:PUBLIC|SYSTEM)[\s>]/um)
return "invalid ID type"
elsif @source.match(/#{public}/um)
if @source.match(/#{public}\s*>/um)
return "public ID literal is missing"
elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
return "invalid public ID literal"
elsif @source.match(/#{public}\s+#{PUBIDLITERAL}[^\s>]/um)
return "garbage after public ID literal"
elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
return "invalid system literal"
elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*>/um)
return "garbage after system literal"
end
elsif @source.match(/#{system}/um)
if @source.match(/#{system}\s*>/um)
return "system literal is missing"
elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
return "invalid system literal"
elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}\s*>/um)
return "garbage after system literal"
end
end
"end > is missing"
end
end
end
end
Expand Down
181 changes: 181 additions & 0 deletions test/parse/test_notation_declaration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,100 @@ def test_name
doctype = parse("<!NOTATION name PUBLIC 'urn:public-id'>")
assert_equal("name", doctype.notation("name").name)
end

def test_no_name
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: name is missing
Line: 5
Position: 72
Last 80 unconsumed characters:
<!NOTATION> ]> <r/>
DETAIL
end

def test_invalid_name
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION '>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid name
Line: 5
Position: 74
Last 80 unconsumed characters:
<!NOTATION '> ]> <r/>
DETAIL
end

def test_no_id_type
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: ID type is missing
Line: 5
Position: 77
Last 80 unconsumed characters:
<!NOTATION name> ]> <r/>
DETAIL
end

def test_invalid_id_type
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name INVALID>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid ID type
Line: 5
Position: 85
Last 80 unconsumed characters:
<!NOTATION name INVALID> ]> <r/>
DETAIL
end
end

class TestExternalID < self
class TestSystem < self
def test_no_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: system literal is missing
Line: 5
Position: 84
Last 80 unconsumed characters:
<!NOTATION name SYSTEM> ]> <r/>
DETAIL
end

def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM 'system-literal'x'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage after system literal
Line: 5
Position: 103
Last 80 unconsumed characters:
<!NOTATION name SYSTEM 'system-literal'x'> ]> <r/>
DETAIL
end

def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM 'system-literal'>
Expand All @@ -44,6 +134,21 @@ def test_double_quote

class TestPublic < self
class TestPublicIDLiteral < self
def test_content_double_quote
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal">
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid public ID literal
Line: 5
Position: 129
Last 80 unconsumed characters:
<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal"> ]> <r/>
DETAIL
end

def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal' "system-literal">
Expand All @@ -60,6 +165,21 @@ def test_double_quote
end

class TestSystemLiteral < self
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage after system literal
Line: 5
Position: 123
Last 80 unconsumed characters:
<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'> ]> <r/>
DETAIL
end

def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC "public-id-literal" 'system-literal'>
Expand Down Expand Up @@ -96,5 +216,66 @@ def test_public_system
end
end
end

class TestPublicID < self
def test_no_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: public ID literal is missing
Line: 5
Position: 84
Last 80 unconsumed characters:
<!NOTATION name PUBLIC> ]> <r/>
DETAIL
end

def test_literal_content_double_quote
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'double quote " is invalid in PubidLiteral'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid public ID literal
Line: 5
Position: 128
Last 80 unconsumed characters:
<!NOTATION name PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> <r/>
DETAIL
end

def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal'x'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage after public ID literal
Line: 5
Position: 106
Last 80 unconsumed characters:
<!NOTATION name PUBLIC 'public-id-literal'x'> ]> <r/>
DETAIL
end

def test_literal_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal'>
INTERNAL_SUBSET
assert_equal("public-id-literal", doctype.notation("name").public)
end

def test_literal_double_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC "public-id-literal">
INTERNAL_SUBSET
assert_equal("public-id-literal", doctype.notation("name").public)
end
end
end
end

0 comments on commit 2fe62e2

Please sign in to comment.