Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add testcase and fix for attributes deduplication in form and empty elements #1950

Merged
merged 5 commits into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.jsoup.nodes.FormElement;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Token.StartTag;

import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
Expand Down Expand Up @@ -227,13 +228,7 @@ void error(HtmlTreeBuilderState state) {
}

Element insert(final Token.StartTag startTag) {
// cleanup duplicate attributes:
if (startTag.hasAttributes() && !startTag.attributes.isEmpty()) {
int dupes = startTag.attributes.deduplicate(settings);
if (dupes > 0) {
error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
}
}
dedupeAttributes(startTag);

// handle empty unknown tags
// when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag.
Expand All @@ -250,7 +245,7 @@ Element insert(final Token.StartTag startTag) {
return el;
}

Element insertStartTag(String startTagName) {
Element insertStartTag(String startTagName) {
Element el = new Element(tagFor(startTagName, settings), null);
insert(el);
return el;
Expand All @@ -267,6 +262,8 @@ private void insert(Element el, @Nullable Token token) {
}

Element insertEmpty(Token.StartTag startTag) {
dedupeAttributes(startTag);

Tag tag = tagFor(startTag.name(), settings);
Element el = new Element(tag, null, settings.normalizeAttributes(startTag.attributes));
insertNode(el, startTag);
Expand All @@ -282,6 +279,8 @@ Element insertEmpty(Token.StartTag startTag) {
}

FormElement insertForm(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
dedupeAttributes(startTag);

Tag tag = tagFor(startTag.name(), settings);
FormElement el = new FormElement(tag, null, settings.normalizeAttributes(startTag.attributes));
if (checkTemplateStack) {
Expand Down Expand Up @@ -340,6 +339,16 @@ else if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(),
onNodeInserted(node, token);
}

/** Cleanup duplicate attributes. **/
private void dedupeAttributes(StartTag startTag) {
if (startTag.hasAttributes() && !startTag.attributes.isEmpty()) {
int dupes = startTag.attributes.deduplicate(settings);
if (dupes > 0) {
error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
}
}
}

Element pop() {
int size = stack.size();
return stack.remove(size-1);
Expand Down
24 changes: 18 additions & 6 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
import org.jsoup.nodes.*;
import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.stream.Stream;

import static org.jsoup.parser.ParseSettings.preserveCase;
import static org.junit.jupiter.api.Assertions.*;
Expand Down Expand Up @@ -46,16 +49,25 @@ public class HtmlParserTest {
assertEquals("foo > bar", p.attr("class"));
}

@Test public void dropsDuplicateAttributes() {
String html = "<p One=One ONE=Two Two=two one=Three One=Four two=Five>Text</p>";
@ParameterizedTest @MethodSource("dupeAttributeData")
public void dropsDuplicateAttributes(String html, String expected) {
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(html, "");

Element p = doc.selectFirst("p");
assertEquals("<p one=\"One\" two=\"two\">Text</p>", p.outerHtml()); // normalized names due to lower casing
Element el = doc.expectFirst("body > *");
assertEquals(expected, el.outerHtml()); // normalized names due to lower casing
String tag = el.normalName();

assertEquals(1, parser.getErrors().size());
assertEquals("Dropped duplicate attribute(s) in tag [p]", parser.getErrors().get(0).getErrorMessage());
assertEquals("Dropped duplicate attribute(s) in tag [" + tag + "]", parser.getErrors().get(0).getErrorMessage());
}

private static Stream<Arguments> dupeAttributeData() {
return Stream.of(
Arguments.of("<p One=One ONE=Two Two=two one=Three One=Four two=Five>Text</p>", "<p one=\"One\" two=\"two\">Text</p>"),
Arguments.of("<img One=One ONE=Two Two=two one=Three One=Four two=Five>", "<img one=\"One\" two=\"two\">"),
Arguments.of("<form One=One ONE=Two Two=two one=Three One=Four two=Five></form>", "<form one=\"One\" two=\"two\"></form>")
);
}

@Test public void retainsAttributesOfDifferentCaseIfSensitive() {
Expand Down