Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not ignore XML CDATA sections when parsing: #558

Merged
merged 1 commit into from
Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Do not ignore XML CDATA sections when parsing:
- handle startCDATA/endCDATA lexical events;
- keep track of the `inCDATA` state;
- capture text into appropriate Node subtype depending on that state;
- add createPCData()/makePCData() helpers;
- add unit tests.
  • Loading branch information
dubinsky committed Sep 1, 2021
commit 5e8aba5d0820c8aa854b98b65ad966feed57c894
12 changes: 5 additions & 7 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,12 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
// to previous-version artifacts that were built on 8. see scala/scala-xml#501
exclude[DirectMissingMethodProblem]("scala.xml.include.sax.XIncluder.declaration"),

// caused by the switch from DefaultHandler to DefaultHandler2:
exclude[MissingTypesProblem]("scala.xml.parsing.FactoryAdapter"),
exclude[MissingTypesProblem]("scala.xml.parsing.NoBindingFactoryAdapter"),
// necessitated by the switch from DefaultHandler to DefaultHandler2 in FactoryAdapter:
exclude[MissingTypesProblem]("scala.xml.parsing.FactoryAdapter"), // see #549

exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.comment"),
exclude[ReversedMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
exclude[DirectMissingMethodProblem]("scala.xml.parsing.NoBindingFactoryAdapter.createComment")
// necessitated by the introduction of new abstract methods in FactoryAdapter:
exclude[ReversedMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"), // see #549
exclude[ReversedMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createPCData") // see #558
)
},

Expand Down
31 changes: 26 additions & 5 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -580,13 +580,23 @@ class XMLTestJVM {
XML.loadString(broken)
}

def roundtrip(xml: String): Unit = assertEquals(xml, XML.loadString(xml).toString)

@UnitTest
def issue508: Unit = {
def check(xml: String): Unit = assertEquals(xml, XML.loadString(xml).toString)
def issue508commentParsing: Unit = {
// confirm that comments are processed correctly now
roundtrip("<a><!-- comment --> suffix</a>")
roundtrip("<a>prefix <!-- comment --> suffix</a>")
roundtrip("<a>prefix <b><!-- comment --></b> suffix</a>")
roundtrip("<a>prefix <b><!-- multi-\nline\n comment --></b> suffix</a>")
roundtrip("""<a>prefix <b><!-- multi-
|line
| comment --></b> suffix</a>""".stripMargin)

check("<a><!-- comment --> suffix</a>")
check("<a>prefix <!-- comment --> suffix</a>")
check("<a>prefix <b><!-- comment --></b> suffix</a>")
// confirm that processing instructions were always processed correctly
roundtrip("<a><?target content ?> suffix</a>")
roundtrip("<a>prefix <?target content ?> suffix</a>")
roundtrip("<a>prefix <b><?target content?></b> suffix</a>")

// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
// capturing comments before and after the root element is not currently possible
Expand All @@ -595,6 +605,17 @@ class XMLTestJVM {
//check("<a>text</a><!-- epilogue -->")
}

@UnitTest
def cdataParsing: Unit = {
roundtrip("<a><![CDATA[ cdata ]]> suffix</a>")
roundtrip("<a>prefix <![CDATA[ cdata ]]> suffix</a>")
roundtrip("<a>prefix <b><![CDATA[ cdata section]]></b> suffix</a>")
roundtrip("""<a>prefix <b><![CDATA[
| multi-
| line cdata
| section]]></b> suffix</a>""".stripMargin)
}

@UnitTest
def nodeSeqNs: Unit = {
val x = {
Expand Down
2 changes: 2 additions & 0 deletions shared/src/main/scala/scala/xml/factory/NodeFactory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ trait NodeFactory[A <: Node] {
}

def makeText(s: String): Text = Text(s)
def makePCData(s: String): PCData =
PCData(s)
def makeComment(s: String): Seq[Comment] =
if (ignoreComments) Nil else List(Comment(s))
def makeProcInstr(t: String, s: String): Seq[ProcInstr] =
Expand Down
39 changes: 34 additions & 5 deletions shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
var rootElem: Node = _

val buffer = new StringBuilder()
private var inCDATA: Boolean = false

/** List of attributes
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
Expand Down Expand Up @@ -100,6 +102,13 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
*/
def createText(text: String): Text // abstract

/**
* creates a PCData node.
* @param text
* @return a new PCData node.
*/
def createPCData(text: String): PCData // abstract

/**
* creates a new processing instruction node.
*/
Expand All @@ -117,7 +126,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
val normalizeWhitespace = false

/**
* Characters.
* Capture characters, possibly normalizing whitespace.
* @param ch
* @param offset
* @param length
Expand All @@ -139,7 +148,20 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
}
}

private def splitName(s: String) = {
/**
* Start of a CDATA section.
*/
override def startCDATA(): Unit = {
captureText()
inCDATA = true
}

/**
* End of a CDATA section.
*/
override def endCDATA(): Unit = captureText()

private def splitName(s: String): (String, String) = {
val idx = s indexOf ':'
if (idx < 0) (null, s)
else (s take idx, s drop (idx + 1))
Expand Down Expand Up @@ -185,13 +207,17 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
}

/**
* captures text, possibly normalizing whitespace
* Captures text or cdata.
*/
def captureText(): Unit = {
if (capture && buffer.nonEmpty)
hStack = createText(buffer.toString) :: hStack
if (capture && buffer.nonEmpty) {
val text: String = buffer.toString
val newNode: Node = if (inCDATA) createPCData(text) else createText(text)
hStack = newNode :: hStack
}

buffer.clear()
inCDATA = false
}

/**
Expand Down Expand Up @@ -232,6 +258,9 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
hStack = hStack.reverse_:::(createProcInstr(target, data).toList)
}

/**
* Comment.
*/
override def comment(ch: Array[Char], start: Int, length: Int): Unit = {
captureText()
val commentText: String = String.valueOf(ch.slice(start, start + length))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,7 @@ class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] {

/** Creates a comment. */
override def createComment(characters: String): Seq[Comment] = makeComment(characters)

/** Creates a cdata. */
override def createPCData(characters: String): PCData = makePCData(characters)
}
1 change: 0 additions & 1 deletion shared/src/test/scala/scala/xml/CommentTest.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package scala.xml

import org.junit.Assert.assertEquals
import org.junit.Assert.assertTrue
import org.junit.Test

final class CommentTest {
Expand Down