Skip to content

Commit

Permalink
Fix attribute deduplication in form and empty elements (#1950)
Browse files Browse the repository at this point in the history
Add test-case and fixes for attribute deduplication in form and empty elements

Fixes #1949
---------

Co-authored-by: Jonathan Hedley <jonathan@hedley.net>
  • Loading branch information
perlan and jhy authored May 8, 2023
1 parent f284d35 commit 401c8b0
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
25 changes: 17 additions & 8 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.jsoup.nodes.FormElement;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Token.StartTag;

import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
Expand Down Expand Up @@ -227,13 +228,7 @@ void error(HtmlTreeBuilderState state) {
}

Element insert(final Token.StartTag startTag) {
// cleanup duplicate attributes:
if (startTag.hasAttributes() && !startTag.attributes.isEmpty()) {
int dupes = startTag.attributes.deduplicate(settings);
if (dupes > 0) {
error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
}
}
dedupeAttributes(startTag);

// handle empty unknown tags
// when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag.
Expand All @@ -250,7 +245,7 @@ Element insert(final Token.StartTag startTag) {
return el;
}

Element insertStartTag(String startTagName) {
Element insertStartTag(String startTagName) {
Element el = new Element(tagFor(startTagName, settings), null);
insert(el);
return el;
Expand All @@ -267,6 +262,8 @@ private void insert(Element el, @Nullable Token token) {
}

Element insertEmpty(Token.StartTag startTag) {
dedupeAttributes(startTag);

Tag tag = tagFor(startTag.name(), settings);
Element el = new Element(tag, null, settings.normalizeAttributes(startTag.attributes));
insertNode(el, startTag);
Expand All @@ -282,6 +279,8 @@ Element insertEmpty(Token.StartTag startTag) {
}

FormElement insertForm(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
dedupeAttributes(startTag);

Tag tag = tagFor(startTag.name(), settings);
FormElement el = new FormElement(tag, null, settings.normalizeAttributes(startTag.attributes));
if (checkTemplateStack) {
Expand Down Expand Up @@ -340,6 +339,16 @@ else if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(),
onNodeInserted(node, token);
}

/** Cleanup duplicate attributes. **/
private void dedupeAttributes(StartTag startTag) {
if (startTag.hasAttributes() && !startTag.attributes.isEmpty()) {
int dupes = startTag.attributes.deduplicate(settings);
if (dupes > 0) {
error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
}
}
}

Element pop() {
int size = stack.size();
return stack.remove(size-1);
Expand Down
24 changes: 18 additions & 6 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
import org.jsoup.nodes.*;
import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.stream.Stream;

import static org.jsoup.parser.ParseSettings.preserveCase;
import static org.junit.jupiter.api.Assertions.*;
Expand Down Expand Up @@ -46,16 +49,25 @@ public class HtmlParserTest {
assertEquals("foo > bar", p.attr("class"));
}

@Test public void dropsDuplicateAttributes() {
String html = "<p One=One ONE=Two Two=two one=Three One=Four two=Five>Text</p>";
@ParameterizedTest @MethodSource("dupeAttributeData")
public void dropsDuplicateAttributes(String html, String expected) {
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(html, "");

Element p = doc.selectFirst("p");
assertEquals("<p one=\"One\" two=\"two\">Text</p>", p.outerHtml()); // normalized names due to lower casing
Element el = doc.expectFirst("body > *");
assertEquals(expected, el.outerHtml()); // normalized names due to lower casing
String tag = el.normalName();

assertEquals(1, parser.getErrors().size());
assertEquals("Dropped duplicate attribute(s) in tag [p]", parser.getErrors().get(0).getErrorMessage());
assertEquals("Dropped duplicate attribute(s) in tag [" + tag + "]", parser.getErrors().get(0).getErrorMessage());
}

private static Stream<Arguments> dupeAttributeData() {
return Stream.of(
Arguments.of("<p One=One ONE=Two Two=two one=Three One=Four two=Five>Text</p>", "<p one=\"One\" two=\"two\">Text</p>"),
Arguments.of("<img One=One ONE=Two Two=two one=Three One=Four two=Five>", "<img one=\"One\" two=\"two\">"),
Arguments.of("<form One=One ONE=Two Two=two one=Three One=Four two=Five></form>", "<form one=\"One\" two=\"two\"></form>")
);
}

@Test public void retainsAttributesOfDifferentCaseIfSensitive() {
Expand Down

0 comments on commit 401c8b0

Please sign in to comment.