Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add testcase and fix for attributes deduplication in form and empty elements #1950

Merged
merged 5 commits into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Simplified dupe tests and method name
  • Loading branch information
jhy committed May 8, 2023
commit 5d5a410e3fd42c614883fdbc7f982b93e8c89477
8 changes: 4 additions & 4 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ void error(HtmlTreeBuilderState state) {
}

Element insert(final Token.StartTag startTag) {
dedupliateTagAttributes(startTag);
dedupeAttributes(startTag);

// handle empty unknown tags
// when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag.
Expand Down Expand Up @@ -262,7 +262,7 @@ private void insert(Element el, @Nullable Token token) {
}

Element insertEmpty(Token.StartTag startTag) {
dedupliateTagAttributes(startTag);
dedupeAttributes(startTag);

Tag tag = tagFor(startTag.name(), settings);
Element el = new Element(tag, null, settings.normalizeAttributes(startTag.attributes));
Expand All @@ -279,7 +279,7 @@ Element insertEmpty(Token.StartTag startTag) {
}

FormElement insertForm(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
dedupliateTagAttributes(startTag);
dedupeAttributes(startTag);

Tag tag = tagFor(startTag.name(), settings);
FormElement el = new FormElement(tag, null, settings.normalizeAttributes(startTag.attributes));
Expand Down Expand Up @@ -340,7 +340,7 @@ else if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(),
}

/** Cleanup duplicate attributes. **/
private void dedupliateTagAttributes(StartTag startTag) {
private void dedupeAttributes(StartTag startTag) {
if (startTag.hasAttributes() && !startTag.attributes.isEmpty()) {
int dupes = startTag.attributes.deduplicate(settings);
if (dupes > 0) {
Expand Down
44 changes: 16 additions & 28 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
import org.jsoup.nodes.*;
import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.stream.Stream;

import static org.jsoup.parser.ParseSettings.preserveCase;
import static org.junit.jupiter.api.Assertions.*;
Expand Down Expand Up @@ -46,40 +49,25 @@ public class HtmlParserTest {
assertEquals("foo > bar", p.attr("class"));
}

@Test public void dropsDuplicateAttributes() {
String html = "<p One=One ONE=Two Two=two one=Three One=Four two=Five>Text</p>";
@ParameterizedTest @MethodSource("dupeAttributeData")
public void dropsDuplicateAttributes(String html, String expected) {
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(html, "");

Element p = doc.selectFirst("p");
assertEquals("<p one=\"One\" two=\"two\">Text</p>", p.outerHtml()); // normalized names due to lower casing
Element el = doc.expectFirst("body > *");
assertEquals(expected, el.outerHtml()); // normalized names due to lower casing
String tag = el.normalName();

assertEquals(1, parser.getErrors().size());
assertEquals("Dropped duplicate attribute(s) in tag [p]", parser.getErrors().get(0).getErrorMessage());
assertEquals("Dropped duplicate attribute(s) in tag [" + tag + "]", parser.getErrors().get(0).getErrorMessage());
}

@Test public void dropsDuplicateAttributesInEmptyElement() {
String html = "<img One=One ONE=Two Two=two one=Three One=Four two=Five>";
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(html, "");

Element p = doc.selectFirst("img");
assertEquals("<img one=\"One\" two=\"two\">", p.outerHtml()); // normalized names due to lower casing

assertEquals(1, parser.getErrors().size());
assertEquals("Dropped duplicate attribute(s) in tag [img]", parser.getErrors().get(0).getErrorMessage());
}

@Test public void dropsDuplicateAttributesInFormElement() {
String html = "<form One=One ONE=Two Two=two one=Three One=Four two=Five></form>";
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(html, "");

Element p = doc.selectFirst("form");
assertEquals("<form one=\"One\" two=\"two\"></form>", p.outerHtml()); // normalized names due to lower casing

assertEquals(1, parser.getErrors().size());
assertEquals("Dropped duplicate attribute(s) in tag [form]", parser.getErrors().get(0).getErrorMessage());
private static Stream<Arguments> dupeAttributeData() {
return Stream.of(
Arguments.of("<p One=One ONE=Two Two=two one=Three One=Four two=Five>Text</p>", "<p one=\"One\" two=\"two\">Text</p>"),
Arguments.of("<img One=One ONE=Two Two=two one=Three One=Four two=Five>", "<img one=\"One\" two=\"two\">"),
Arguments.of("<form One=One ONE=Two Two=two one=Three One=Four two=Five></form>", "<form one=\"One\" two=\"two\"></form>")
);
}

@Test public void retainsAttributesOfDifferentCaseIfSensitive() {
Expand Down