diff --git a/src/main/java/ezvcard/io/html/HCardParser.java b/src/main/java/ezvcard/io/html/HCardParser.java index 51099ff7..10a4988c 100644 --- a/src/main/java/ezvcard/io/html/HCardParser.java +++ b/src/main/java/ezvcard/io/html/HCardParser.java @@ -13,6 +13,7 @@ import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -291,135 +292,198 @@ private void parseVCardElement(Element vcardElement) { } private void visit(Element element) { - boolean visitChildren = true; - Set classNames = element.classNames(); - for (String className : classNames) { - className = className.toLowerCase(); + Set classNames = adjustClassNames(element); - //give special treatment to certain URLs + for (String className : classNames) { if (urlPropertyName.equals(className)) { - String href = element.attr("href"); - if (href.length() > 0) { - if (!classNames.contains(emailName) && href.matches("(?i)mailto:.*")) { - className = emailName; - } else if (!classNames.contains(telName) && href.matches("(?i)tel:.*")) { - className = telName; - } else { - //try parsing as IMPP - VCardPropertyScribe scribe = index.getPropertyScribe(Impp.class); - - context.getWarnings().clear(); - context.setPropertyName(scribe.getPropertyName()); - try { - VCardProperty property = scribe.parseHtml(new HCardElement(element), context); - vcard.addProperty(property); - warnings.addAll(context.getWarnings()); - continue; - } catch (SkipMeException e) { - //URL is not an instant messenger URL - } catch (CannotParseException e) { - //URL is not an instant messenger URL - } - } + if (tryToParseAsImpp(element)) { + continue; } } - //hCard uses a different name for the CATEGORIES property - if ("category".equals(className)) { - className = categoriesName; - } + parseProperty(element, className); + } - VCardPropertyScribe scribe = index.getPropertyScribe(className); - if (scribe == null) { - //if no scribe is found, and the class name doesn't start with "x-", then it must be an arbitrary CSS class that has nothing to do with vCard - if (!className.startsWith("x-")) { - continue; - } - scribe = new RawPropertyScribe(className); + if (embeddedVCards.isEmpty()) { + //do not visit children if there are any embedded vCards + element.children().forEach(this::visit); + } + } + + private Set adjustClassNames(Element element) { + //@formatter:off + Set classNamesToLower = element.classNames().stream() + .map(String::toLowerCase) + .collect(Collectors.toSet()); + + return classNamesToLower.stream() + .map(className -> adjustClassName(className, element, classNamesToLower)) + .collect(Collectors.toSet()); + //@formatter:on + } + + private String adjustClassName(String className, Element element, Set origClassNames) { + /* + * hCard uses a different name for the CATEGORIES property. + */ + if ("category".equals(className)) { + return categoriesName; + } + + /* + * Give special treatment to certain URLs. + */ + if (urlPropertyName.equals(className)) { + String href = element.attr("href"); + if (!origClassNames.contains(emailName) && href.matches("(?i)mailto:.*")) { + return emailName; } + if (!origClassNames.contains(telName) && href.matches("(?i)tel:.*")) { + return telName; + } + } - context.getWarnings().clear(); - context.setPropertyName(scribe.getPropertyName()); + return className; + } - VCardProperty property; - try { - property = scribe.parseHtml(new HCardElement(element), context); - warnings.addAll(context.getWarnings()); + private boolean tryToParseAsImpp(Element element) { + String href = element.attr("href"); + if (href.isEmpty()) { + return false; + } - //LABELs must be treated specially so they can be matched up with their ADRs - if (property instanceof Label) { - labels.add((Label) property); - continue; - } + VCardPropertyScribe scribe = index.getPropertyScribe(Impp.class); - //add all NICKNAMEs to the same type object - if (property instanceof Nickname) { - Nickname nn = (Nickname) property; - if (nickname == null) { - nickname = nn; - vcard.addProperty(nickname); - } else { - nickname.getValues().addAll(nn.getValues()); - } - continue; - } + context.getWarnings().clear(); + context.setPropertyName(scribe.getPropertyName()); + try { + VCardProperty property = scribe.parseHtml(new HCardElement(element), context); + vcard.addProperty(property); + warnings.addAll(context.getWarnings()); + return true; + } catch (SkipMeException | CannotParseException e) { + //URL is not an instant messenger URL + return false; + } + } - //add all CATEGORIES to the same type object - if (property instanceof Categories) { - Categories c = (Categories) property; - if (categories == null) { - categories = c; - vcard.addProperty(categories); - } else { - categories.getValues().addAll(c.getValues()); - } - continue; - } - } catch (SkipMeException e) { - //@formatter:off - warnings.add(new ParseWarning.Builder(context) - .message(22, e.getMessage()) - .build() - ); - //@formatter:on - continue; - } catch (CannotParseException e) { - //@formatter:off - warnings.add(new ParseWarning.Builder(context) - .message(e) - .build() - ); - //@formatter:on - - property = new RawProperty(className, element.outerHtml()); - } catch (EmbeddedVCardException e) { - if (isChildOf(element, embeddedVCards)) { - //prevents multiple-nested embedded elements from overwriting each other - continue; - } + private VCardPropertyScribe getPropertyScribe(String className) { + VCardPropertyScribe scribe = index.getPropertyScribe(className); + + if (scribe == null) { + /* + * If no scribe is found, and the class name doesn't start with + * "x-", then it must be an arbitrary CSS class that has nothing to + * do with vCard + */ + if (!className.startsWith("x-")) { + return null; + } - property = e.getProperty(); + scribe = new RawPropertyScribe(className); + } - embeddedVCards.add(element); - HCardParser embeddedReader = new HCardParser(element, pageUrl); - try { - VCard embeddedVCard = embeddedReader.readNext(); - e.injectVCard(embeddedVCard); - } finally { - warnings.addAll(embeddedReader.getWarnings()); - IOUtils.closeQuietly(embeddedReader); - } - visitChildren = false; + return scribe; + } + + private VCard parseEmbeddedVCard(Element element) { + embeddedVCards.add(element); + HCardParser embeddedReader = new HCardParser(element, pageUrl); + try { + return embeddedReader.readNext(); + } finally { + warnings.addAll(embeddedReader.getWarnings()); + IOUtils.closeQuietly(embeddedReader); + } + } + + private void parseProperty(Element element, String className) { + VCardPropertyScribe scribe = getPropertyScribe(className); + if (scribe == null) { + //it's a CSS class that's unrelated to hCard + return; + } + + context.getWarnings().clear(); + context.setPropertyName(scribe.getPropertyName()); + + VCardProperty property; + try { + property = scribe.parseHtml(new HCardElement(element), context); + } catch (SkipMeException e) { + //@formatter:off + warnings.add(new ParseWarning.Builder(context) + .message(22, e.getMessage()) + .build() + ); + //@formatter:on + + return; + } catch (CannotParseException e) { + //@formatter:off + warnings.add(new ParseWarning.Builder(context) + .message(e) + .build() + ); + //@formatter:on + + property = new RawProperty(className, element.outerHtml()); + vcard.addProperty(property); + return; + } catch (EmbeddedVCardException e) { + if (isChildOf(element, embeddedVCards)) { + //prevents multiple-nested embedded elements from overwriting each other + return; } + property = e.getProperty(); + + VCard embeddedVCard = parseEmbeddedVCard(element); + e.injectVCard(embeddedVCard); vcard.addProperty(property); + return; + } + + warnings.addAll(context.getWarnings()); + + /* + * LABELs must be treated specially so they can be matched up with their + * ADRs. + */ + if (property instanceof Label) { + labels.add((Label) property); + return; + } + + /* + * Add all NICKNAMEs to the same type object. + */ + if (property instanceof Nickname) { + Nickname nn = (Nickname) property; + if (nickname == null) { + nickname = nn; + vcard.addProperty(nickname); + } else { + nickname.getValues().addAll(nn.getValues()); + } + return; } - if (visitChildren) { - for (Element child : element.children()) { - visit(child); + /* + * Add all CATEGORIES to the same type object. + */ + if (property instanceof Categories) { + Categories c = (Categories) property; + if (categories == null) { + categories = c; + vcard.addProperty(categories); + } else { + categories.getValues().addAll(c.getValues()); } + return; } + + vcard.addProperty(property); } public void close() {