Skip to content

Commit

Permalink
Fixes issue#1261. JRuby's sax parser problem with square brackets in …
Browse files Browse the repository at this point in the history
…a text
  • Loading branch information
yokolet committed Mar 21, 2015
1 parent 0322104 commit 3b121ca
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.ja.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* [JRuby] Document#parse should support IO objects that respond to #read. (#1124) (Thanks, Jake Byman!)
* [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. (#1262)
* [JRuby] 角括弧([, ])がテキスト内に存在すると、SAXパーザがテキストを分割してしまう。(#1261)


=== 1.6.6.2 / 2015年01月23日
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* [JRuby] Document#parse should support IO objects that respond to #read. (#1124) (Thanks, Jake Byman!)
* [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. (#1262)
* [JRuby] SAX parser cuts texts in peices when quare brackets exist. (#1261)


=== 1.6.6.2 / 2015-01-23
Expand Down
8 changes: 6 additions & 2 deletions ext/java/nokogiri/XmlSaxPushParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.xml.sax.SAXException;

/**
* Class for Nokogiri::XML::SAX::PushParser
Expand Down Expand Up @@ -136,8 +137,11 @@ public IRubyObject native_write(ThreadContext context, IRubyObject chunk,


if (isLast.isTrue()) {
IRubyObject document = invoke(context, this, "document");
invoke(context, document, "end_document");
try {
parserTask.parser.getNokogiriHandler().endDocument();
} catch (SAXException e) {
throw context.getRuntime().newRuntimeError(e.getMessage());
}
terminateTask(context);
} else {
try {
Expand Down
28 changes: 18 additions & 10 deletions ext/java/nokogiri/internals/NokogiriHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
import static nokogiri.internals.NokogiriHelpers.isNamespace;
import static nokogiri.internals.NokogiriHelpers.stringOrNil;

import java.util.ArrayDeque;
import java.util.LinkedList;
import java.util.Stack;

import nokogiri.XmlSyntaxError;

Expand All @@ -62,7 +62,7 @@
* @author Yoko Harada <yokolet@gmail.com>
*/
public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
private StringBuffer buffer;
Stack<StringBuffer> characterStack;
private final Ruby ruby;
private final RubyClass attrClass;
private final IRubyObject object;
Expand Down Expand Up @@ -100,6 +100,7 @@ public void setDocumentLocator(Locator locator) {
@Override
public void startDocument() throws SAXException {
call("start_document");
characterStack = new Stack();
}

@Override
Expand All @@ -111,6 +112,13 @@ public void xmlDecl(String version, String encoding, String standalone) {

@Override
public void endDocument() throws SAXException {
StringBuffer sb;
if (!characterStack.empty()) {
for (int i=0; i<characterStack.size(); i++) {
sb = characterStack.get(i);
call("characters", ruby.newString(sb.toString()));
}
}
call("end_document");
}

Expand Down Expand Up @@ -187,6 +195,7 @@ public void startElement(String uri, String localName, String qName, Attributes
stringOrNil(ruby, getPrefix(qName)),
stringOrNil(ruby, uri),
rubyNSAttr);
characterStack.push(new StringBuffer());
}

private static String[] emptyAttrs =
Expand Down Expand Up @@ -224,6 +233,8 @@ private boolean isFromFragmentHandler() {

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
StringBuffer sb = characterStack.pop();
call("characters", ruby.newString(sb.toString()));
call("end_element_namespace",
stringOrNil(ruby, localName),
stringOrNil(ruby, getPrefix(qName)),
Expand All @@ -232,11 +243,8 @@ public void endElement(String uri, String localName, String qName) throws SAXExc

@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (buffer != null) {
buffer.append(new String(ch, start, length));
} else {
call("characters", ruby.newString(new String(ch, start, length)));
}
StringBuffer sb = characterStack.peek();
sb.append(new String(ch, start, length));
}

@Override
Expand All @@ -246,13 +254,13 @@ public void comment(char[] ch, int start, int length) throws SAXException {

@Override
public void startCDATA() throws SAXException {
buffer = new StringBuffer();
characterStack.push(new StringBuffer());
}

@Override
public void endCDATA() throws SAXException {
call("cdata_block", ruby.newString(buffer.toString()));
buffer = null;
StringBuffer sb = characterStack.pop();
call("cdata_block", ruby.newString(sb.toString()));
}

@Override
Expand Down
11 changes: 11 additions & 0 deletions test/xml/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,17 @@ def test_recovery_from_incorrect_xml

assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
end

def test_square_bracket_in_text # issue 1261
xml = <<-eoxml
<tu tuid="87dea04cf60af103ff09d1dba36ae820" segtype="block">
<prop type="x-smartling-string-variant">en:#:home_page:#:stories:#:[6]:#:name</prop>
<tuv xml:lang="en-US"><seg>Sandy S.</seg></tuv>
</tu>
eoxml
@parser.parse(xml)
assert @parser.document.data.must_include "en:#:home_page:#:stories:#:[6]:#:name"
end
end
end
end
Expand Down

0 comments on commit 3b121ca

Please sign in to comment.