diff --git a/.eslintrc.json b/.eslintrc.json
index 70fd13e7d..71ec49955 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -1,27 +1,62 @@
{
- "extends": [
- "eslint:recommended",
- "plugin:@typescript-eslint/eslint-recommended",
- "plugin:@typescript-eslint/recommended",
- "prettier",
- "prettier/@typescript-eslint"
- ],
+ "extends": ["eslint:recommended", "prettier"],
"env": {
"node": true,
"es6": true
},
"rules": {
- "eqeqeq": 2,
+ "eqeqeq": [2, "smart"],
"no-caller": 2,
"dot-notation": 2,
"no-var": 2,
"prefer-const": 2,
- "prefer-arrow-callback": 2,
+ "prefer-arrow-callback": [2, { "allowNamedFunctions": true }],
"arrow-body-style": [2, "as-needed"],
"object-shorthand": 2,
-
- "@typescript-eslint/explicit-function-return-type": 0,
- "@typescript-eslint/explicit-member-accessibility": 0,
- "@typescript-eslint/no-use-before-define": [2, { "functions": false }]
- }
+ "prefer-template": 2,
+ "one-var": [2, "never"],
+ "prefer-destructuring": [2, { "object": true }],
+ "capitalized-comments": 2,
+ "multiline-comment-style": [2, "starred-block"],
+ "spaced-comment": 2,
+ "yoda": [2, "never"],
+ "curly": [2, "multi-line"],
+ "no-else-return": 2
+ },
+ "overrides": [
+ {
+ "files": "*.ts",
+ "extends": [
+ "plugin:@typescript-eslint/eslint-recommended",
+ "plugin:@typescript-eslint/recommended",
+ "prettier/@typescript-eslint"
+ ],
+ "parserOptions": {
+ "sourceType": "module",
+ "project": "./tsconfig.eslint.json"
+ },
+ "rules": {
+ "@typescript-eslint/prefer-for-of": 0,
+ "@typescript-eslint/member-ordering": 0,
+ "@typescript-eslint/explicit-function-return-type": 0,
+ "@typescript-eslint/no-unused-vars": 0,
+ "@typescript-eslint/no-use-before-define": [
+ 2,
+ { "functions": false }
+ ],
+ "@typescript-eslint/consistent-type-definitions": [
+ 2,
+ "interface"
+ ],
+ "@typescript-eslint/prefer-function-type": 2,
+ "@typescript-eslint/no-unnecessary-type-arguments": 2,
+ "@typescript-eslint/prefer-string-starts-ends-with": 2,
+ "@typescript-eslint/prefer-readonly": 2,
+ "@typescript-eslint/prefer-includes": 2,
+ "@typescript-eslint/no-unnecessary-condition": 2,
+ "@typescript-eslint/switch-exhaustiveness-check": 2,
+ "@typescript-eslint/prefer-nullish-coalescing": 2
+ }
+ }
+ ]
}
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index e68c90cab..b440c7477 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,12 +1,2 @@
-# These are supported funding model platforms
-
-github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
-patreon: # Replace with a single Patreon username
-open_collective: # Replace with a single Open Collective username
-ko_fi: # Replace with a single Ko-fi username
-tidelift: "npm/htmlparser2"
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-otechie: # Replace with a single Otechie username
-custom: # Replace with a single custom sponsorship URL
+github: [fb55]
+tidelift: npm/htmlparser2
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 000000000..646e1e1d3
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,52 @@
+name: "Code scanning - action"
+
+on:
+ push:
+ branches: [master]
+ pull_request:
+ # The branches below must be a subset of the branches above
+ branches: [master]
+ schedule:
+ - cron: "0 7 * * 0"
+
+jobs:
+ CodeQL-Build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+ with:
+ # We must fetch at least the immediate parents so that if this is
+ # a pull request then we can checkout the head.
+ fetch-depth: 2
+
+ # If this run was triggered by a pull request event, then checkout
+ # the head of the pull request instead of the merge commit.
+ - run: git checkout HEAD^2
+ if: ${{ github.event_name == 'pull_request' }}
+
+ # Initializes the CodeQL tools for scanning.
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v1
+ # Override language selection by uncommenting this and choosing your languages
+ # with:
+ # languages: go, javascript, csharp, python, cpp, java
+ # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
+ # If this step fails, then you should remove it and run the build manually (see below)
+ - name: Autobuild
+ uses: github/codeql-action/autobuild@v1
+
+ # â„¹ï¸ Command-line programs to run using the OS shell.
+ # 📚 https://git.io/JvXDl
+
+ # âœï¸ If the Autobuild fails above, remove it and uncomment the following three lines
+ # and modify them (or add more) to build your code if your project
+ # uses a compiled language
+
+ #- run: |
+ # make bootstrap
+ # make release
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v1
diff --git a/.github/workflows/nodejs-lint.yml b/.github/workflows/nodejs-lint.yml
new file mode 100644
index 000000000..e4e25ceca
--- /dev/null
+++ b/.github/workflows/nodejs-lint.yml
@@ -0,0 +1,16 @@
+name: Node.js Lint
+
+on: [push, pull_request]
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/setup-node@v1
+ with:
+ node-version: 14.x
+ - run: npm ci
+ - run: npm run lint
+ env:
+ CI: true
diff --git a/.github/workflows/nodejs-test.yml b/.github/workflows/nodejs-test.yml
new file mode 100644
index 000000000..4445e1130
--- /dev/null
+++ b/.github/workflows/nodejs-test.yml
@@ -0,0 +1,39 @@
+name: Node.js Test
+
+on: [push, pull_request]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ node-version: [10.x, 12.x, 14.x]
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Use Node.js ${{ matrix.node-version }}
+ uses: actions/setup-node@v1
+ with:
+ node-version: ${{ matrix.node-version }}
+ - run: npm ci
+ - run: npm run build --if-present
+ - run: npm test
+ env:
+ CI: true
+ - name: Coveralls Parallel
+ uses: coverallsapp/github-action@v1.1.1
+ with:
+ github-token: ${{ secrets.github_token }}
+ flag-name: run-${{ matrix.node-version }}
+ parallel: true
+
+ finish:
+ needs: test
+ runs-on: ubuntu-latest
+ steps:
+ - name: Coveralls Finished
+ uses: coverallsapp/github-action@v1.1.1
+ with:
+ github-token: ${{ secrets.github_token }}
+ parallel-finished: true
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 000000000..f41745234
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1,3 @@
+node_modules/
+coverage/
+lib/
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 02026fc8b..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,4 +0,0 @@
-language: node_js
-node_js:
- - lts/*
-after_success: npm run coverage
diff --git a/README.md b/README.md
index 39d327adb..e0b594007 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
[](https://npmjs.org/package/htmlparser2)
[](https://npmjs.org/package/htmlparser2)
-[](http://travis-ci.org/fb55/htmlparser2)
+[](https://github.com/fb55/htmlparser2/actions?query=workflow%3A%22Node.js+Test%22)
[](https://coveralls.io/r/fb55/htmlparser2)
A forgiving HTML/XML/RSS parser.
@@ -10,7 +10,7 @@ The parser can handle streams and provides a callback interface.
## Installation
- npm install htmlparser2
+ npm install --save htmlparser2
A live demo of htmlparser2 is available [here](https://astexplorer.net/#/2AmVrGuGVJ).
@@ -18,24 +18,21 @@ A live demo of htmlparser2 is available [here](https://astexplorer.net/#/2AmVrGu
```javascript
const htmlparser2 = require("htmlparser2");
-const parser = new htmlparser2.Parser(
- {
- onopentag(name, attribs) {
- if (name === "script" && attribs.type === "text/javascript") {
- console.log("JS! Hooray!");
- }
- },
- ontext(text) {
- console.log("-->", text);
- },
- onclosetag(tagname) {
- if (tagname === "script") {
- console.log("That's it?!");
- }
+const parser = new htmlparser2.Parser({
+ onopentag(name, attribs) {
+ if (name === "script" && attribs.type === "text/javascript") {
+ console.log("JS! Hooray!");
}
},
- { decodeEntities: true }
-);
+ ontext(text) {
+ console.log("-->", text);
+ },
+ onclosetag(tagname) {
+ if (tagname === "script") {
+ console.log("That's it?!");
+ }
+ },
+});
parser.write(
"Xyz ";
+ const normalScriptOutput = [
+ "onopentagname: 'script'",
+ "onopentagend",
+ "onclosetag: 'script'",
+ "onopentagname: 'div'",
+ "onopentagend",
+ "onclosetag: 'div'",
+ "onend",
+ ];
+
+ tokenizer.write(normalScriptInput);
+ tokenizer.end();
+ expect(logger.log).toEqual(normalScriptOutput);
+ tokenizer.reset();
+ logger.log = [];
+
+ const normalStyleInput = "";
+ const normalStyleOutput = [
+ "onopentagname: 'style'",
+ "onopentagend",
+ "onclosetag: 'style'",
+ "onopentagname: 'div'",
+ "onopentagend",
+ "onclosetag: 'div'",
+ "onend",
+ ];
+
+ tokenizer.write(normalStyleInput);
+ tokenizer.end();
+ expect(logger.log).toEqual(normalStyleOutput);
+ tokenizer.reset();
+ logger.log = [];
+
+ const normalTitleInput = "";
+ const normalTitleOutput = [
+ "onopentagname: 'title'",
+ "onopentagend",
+ "onclosetag: 'title'",
+ "onopentagname: 'div'",
+ "onopentagend",
+ "onclosetag: 'div'",
+ "onend",
+ ];
+
+ tokenizer.write(normalTitleInput);
+ tokenizer.end();
+ expect(logger.log).toEqual(normalTitleOutput);
+ tokenizer.reset();
+ logger.log = [];
+ });
+});
diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 3a5761898..f4c4acb01 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -6,14 +6,14 @@ import xmlMap from "entities/lib/maps/xml.json";
/** All the states the tokenizer can be in. */
const enum State {
Text = 1,
- BeforeTagName, //after <
+ BeforeTagName, // After <
InTagName,
InSelfClosingTag,
BeforeClosingTagName,
InClosingTagName,
AfterClosingTagName,
- //attributes
+ // Attributes
BeforeAttributeName,
InAttributeName,
AfterAttributeName,
@@ -22,20 +22,21 @@ const enum State {
InAttributeValueSq, // '
InAttributeValueNq,
- //declarations
+ // Declarations
BeforeDeclaration, // !
InDeclaration,
- //processing instructions
+ // Processing instructions
InProcessingInstruction, // ?
- //comments
+ // Comments
BeforeComment,
InComment,
+ InSpecialComment,
AfterComment1,
AfterComment2,
- //cdata
+ // Cdata
BeforeCdata1, // [
BeforeCdata2, // C
BeforeCdata3, // D
@@ -46,50 +47,66 @@ const enum State {
AfterCdata1, // ]
AfterCdata2, // ]
- //special tags
- BeforeSpecial, //S
- BeforeSpecialEnd, //S
-
- BeforeScript1, //C
- BeforeScript2, //R
- BeforeScript3, //I
- BeforeScript4, //P
- BeforeScript5, //T
- AfterScript1, //C
- AfterScript2, //R
- AfterScript3, //I
- AfterScript4, //P
- AfterScript5, //T
-
- BeforeStyle1, //T
- BeforeStyle2, //Y
- BeforeStyle3, //L
- BeforeStyle4, //E
- AfterStyle1, //T
- AfterStyle2, //Y
- AfterStyle3, //L
- AfterStyle4, //E
-
- BeforeEntity, //&
- BeforeNumericEntity, //#
+ // Special tags
+ BeforeSpecialS, // S
+ BeforeSpecialSEnd, // S
+
+ BeforeScript1, // C
+ BeforeScript2, // R
+ BeforeScript3, // I
+ BeforeScript4, // P
+ BeforeScript5, // T
+ AfterScript1, // C
+ AfterScript2, // R
+ AfterScript3, // I
+ AfterScript4, // P
+ AfterScript5, // T
+
+ BeforeStyle1, // T
+ BeforeStyle2, // Y
+ BeforeStyle3, // L
+ BeforeStyle4, // E
+ AfterStyle1, // T
+ AfterStyle2, // Y
+ AfterStyle3, // L
+ AfterStyle4, // E
+
+ BeforeSpecialT, // T
+ BeforeSpecialTEnd, // T
+ BeforeTitle1, // I
+ BeforeTitle2, // T
+ BeforeTitle3, // L
+ BeforeTitle4, // E
+ AfterTitle1, // I
+ AfterTitle2, // T
+ AfterTitle3, // L
+ AfterTitle4, // E
+
+ BeforeEntity, // &
+ BeforeNumericEntity, // #
InNamedEntity,
InNumericEntity,
- InHexEntity //X
+ InHexEntity, // X
}
const enum Special {
None = 1,
Script,
- Style
+ Style,
+ Title,
}
function whitespace(c: string): boolean {
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
}
+function isASCIIAlpha(c: string): boolean {
+ return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z");
+}
+
interface Callbacks {
- onattribdata(value: string): void; //TODO implement the new event
- onattribend(): void;
+ onattribdata(value: string): void;
+ onattribend(quote: string | undefined | null): void;
onattribname(name: string): void;
oncdata(data: string): void;
onclosetag(name: string): void;
@@ -116,16 +133,15 @@ function ifElseState(upper: string, SUCCESS: State, FAILURE: State) {
t._index--;
}
};
- } else {
- return (t: Tokenizer, c: string) => {
- if (c === lower || c === upper) {
- t._state = SUCCESS;
- } else {
- t._state = FAILURE;
- t._index--;
- }
- };
}
+ return (t: Tokenizer, c: string) => {
+ if (c === lower || c === upper) {
+ t._state = SUCCESS;
+ } else {
+ t._state = FAILURE;
+ t._index--;
+ }
+ };
}
function consumeSpecialNameChar(upper: string, NEXT_STATE: State) {
@@ -136,7 +152,7 @@ function consumeSpecialNameChar(upper: string, NEXT_STATE: State) {
t._state = NEXT_STATE;
} else {
t._state = State.InTagName;
- t._index--; //consume the token again
+ t._index--; // Consume the token again
}
};
}
@@ -185,6 +201,16 @@ const stateAfterStyle1 = ifElseState("Y", State.AfterStyle2, State.Text);
const stateAfterStyle2 = ifElseState("L", State.AfterStyle3, State.Text);
const stateAfterStyle3 = ifElseState("E", State.AfterStyle4, State.Text);
+const stateBeforeSpecialT = consumeSpecialNameChar("I", State.BeforeTitle1);
+const stateBeforeTitle1 = consumeSpecialNameChar("T", State.BeforeTitle2);
+const stateBeforeTitle2 = consumeSpecialNameChar("L", State.BeforeTitle3);
+const stateBeforeTitle3 = consumeSpecialNameChar("E", State.BeforeTitle4);
+
+const stateAfterSpecialTEnd = ifElseState("I", State.AfterTitle1, State.Text);
+const stateAfterTitle1 = ifElseState("T", State.AfterTitle2, State.Text);
+const stateAfterTitle2 = ifElseState("L", State.AfterTitle3, State.Text);
+const stateAfterTitle3 = ifElseState("E", State.AfterTitle4, State.Text);
+
const stateBeforeEntity = ifElseState(
"#",
State.BeforeNumericEntity,
@@ -200,228 +226,266 @@ export default class Tokenizer {
/** The current state the tokenizer is in. */
_state = State.Text;
/** The read buffer. */
- _buffer = "";
+ private buffer = "";
/** The beginning of the section that is currently being read. */
- _sectionStart = 0;
+ public sectionStart = 0;
/** The index within the buffer that we are currently looking at. */
_index = 0;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
- _bufferOffset = 0;
+ private bufferOffset = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
- _baseState = State.Text;
+ private baseState = State.Text;
/** For special parsing behavior inside of script and style tags. */
- _special = Special.None;
+ private special = Special.None;
/** Indicates whether the tokenizer has been paused. */
- _running = true;
+ private running = true;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
- _ended = false;
+ private ended = false;
- _cbs: Callbacks;
- _xmlMode: boolean;
- _decodeEntities: boolean;
+ private readonly cbs: Callbacks;
+ private readonly xmlMode: boolean;
+ private readonly decodeEntities: boolean;
constructor(
options: { xmlMode?: boolean; decodeEntities?: boolean } | null,
cbs: Callbacks
) {
- this._cbs = cbs;
- this._xmlMode = !!(options && options.xmlMode);
- this._decodeEntities = !!(options && options.decodeEntities);
+ this.cbs = cbs;
+ this.xmlMode = !!options?.xmlMode;
+ this.decodeEntities = options?.decodeEntities ?? true;
}
- reset() {
+ public reset(): void {
this._state = State.Text;
- this._buffer = "";
- this._sectionStart = 0;
+ this.buffer = "";
+ this.sectionStart = 0;
this._index = 0;
- this._bufferOffset = 0;
- this._baseState = State.Text;
- this._special = Special.None;
- this._running = true;
- this._ended = false;
+ this.bufferOffset = 0;
+ this.baseState = State.Text;
+ this.special = Special.None;
+ this.running = true;
+ this.ended = false;
}
- _stateText(c: string) {
+ public write(chunk: string): void {
+ if (this.ended) this.cbs.onerror(Error(".write() after done!"));
+ this.buffer += chunk;
+ this.parse();
+ }
+
+ public end(chunk?: string): void {
+ if (this.ended) this.cbs.onerror(Error(".end() after done!"));
+ if (chunk) this.write(chunk);
+ this.ended = true;
+ if (this.running) this.finish();
+ }
+
+ public pause(): void {
+ this.running = false;
+ }
+
+ public resume(): void {
+ this.running = true;
+ if (this._index < this.buffer.length) {
+ this.parse();
+ }
+ if (this.ended) {
+ this.finish();
+ }
+ }
+
+ /**
+ * The current index within all of the written data.
+ */
+ public getAbsoluteIndex(): number {
+ return this.bufferOffset + this._index;
+ }
+
+ private stateText(c: string) {
if (c === "<") {
- if (this._index > this._sectionStart) {
- this._cbs.ontext(this._getSection());
+ if (this._index > this.sectionStart) {
+ this.cbs.ontext(this.getSection());
}
this._state = State.BeforeTagName;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
} else if (
- this._decodeEntities &&
- this._special === Special.None &&
+ this.decodeEntities &&
+ this.special === Special.None &&
c === "&"
) {
- if (this._index > this._sectionStart) {
- this._cbs.ontext(this._getSection());
+ if (this._index > this.sectionStart) {
+ this.cbs.ontext(this.getSection());
}
- this._baseState = State.Text;
+ this.baseState = State.Text;
this._state = State.BeforeEntity;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateBeforeTagName(c: string) {
+ private stateBeforeTagName(c: string) {
if (c === "/") {
this._state = State.BeforeClosingTagName;
} else if (c === "<") {
- this._cbs.ontext(this._getSection());
- this._sectionStart = this._index;
+ this.cbs.ontext(this.getSection());
+ this.sectionStart = this._index;
} else if (
c === ">" ||
- this._special !== Special.None ||
+ this.special !== Special.None ||
whitespace(c)
) {
this._state = State.Text;
} else if (c === "!") {
this._state = State.BeforeDeclaration;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else if (c === "?") {
this._state = State.InProcessingInstruction;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
+ } else if (!isASCIIAlpha(c)) {
+ this._state = State.Text;
} else {
this._state =
- !this._xmlMode && (c === "s" || c === "S")
- ? State.BeforeSpecial
+ !this.xmlMode && (c === "s" || c === "S")
+ ? State.BeforeSpecialS
+ : !this.xmlMode && (c === "t" || c === "T")
+ ? State.BeforeSpecialT
: State.InTagName;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateInTagName(c: string) {
+ private stateInTagName(c: string) {
if (c === "/" || c === ">" || whitespace(c)) {
- this._emitToken("onopentagname");
+ this.emitToken("onopentagname");
this._state = State.BeforeAttributeName;
this._index--;
}
}
- _stateBeforeClosingTagName(c: string) {
+ private stateBeforeClosingTagName(c: string) {
if (whitespace(c)) {
- // ignore
+ // Ignore
} else if (c === ">") {
this._state = State.Text;
- } else if (this._special !== Special.None) {
+ } else if (this.special !== Special.None) {
if (c === "s" || c === "S") {
- this._state = State.BeforeSpecialEnd;
+ this._state = State.BeforeSpecialSEnd;
+ } else if (c === "t" || c === "T") {
+ this._state = State.BeforeSpecialTEnd;
} else {
this._state = State.Text;
this._index--;
}
+ } else if (!isASCIIAlpha(c)) {
+ this._state = State.InSpecialComment;
+ this.sectionStart = this._index;
} else {
this._state = State.InClosingTagName;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateInClosingTagName(c: string) {
+ private stateInClosingTagName(c: string) {
if (c === ">" || whitespace(c)) {
- this._emitToken("onclosetag");
+ this.emitToken("onclosetag");
this._state = State.AfterClosingTagName;
this._index--;
}
}
- _stateAfterClosingTagName(c: string) {
- //skip everything until ">"
+ private stateAfterClosingTagName(c: string) {
+ // Skip everything until ">"
if (c === ">") {
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
}
}
- _stateBeforeAttributeName(c: string) {
+ private stateBeforeAttributeName(c: string) {
if (c === ">") {
- this._cbs.onopentagend();
+ this.cbs.onopentagend();
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else if (c === "/") {
this._state = State.InSelfClosingTag;
} else if (!whitespace(c)) {
this._state = State.InAttributeName;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateInSelfClosingTag(c: string) {
+ private stateInSelfClosingTag(c: string) {
if (c === ">") {
- this._cbs.onselfclosingtag();
+ this.cbs.onselfclosingtag();
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
+ this.special = Special.None; // Reset special state, in case of self-closing special tags
} else if (!whitespace(c)) {
this._state = State.BeforeAttributeName;
this._index--;
}
}
- _stateInAttributeName(c: string) {
+ private stateInAttributeName(c: string) {
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
- this._cbs.onattribname(this._getSection());
- this._sectionStart = -1;
+ this.cbs.onattribname(this.getSection());
+ this.sectionStart = -1;
this._state = State.AfterAttributeName;
this._index--;
}
}
- _stateAfterAttributeName(c: string) {
+ private stateAfterAttributeName(c: string) {
if (c === "=") {
this._state = State.BeforeAttributeValue;
} else if (c === "/" || c === ">") {
- this._cbs.onattribend();
+ this.cbs.onattribend(undefined);
this._state = State.BeforeAttributeName;
this._index--;
} else if (!whitespace(c)) {
- this._cbs.onattribend();
+ this.cbs.onattribend(undefined);
this._state = State.InAttributeName;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateBeforeAttributeValue(c: string) {
+ private stateBeforeAttributeValue(c: string) {
if (c === '"') {
this._state = State.InAttributeValueDq;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else if (c === "'") {
this._state = State.InAttributeValueSq;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = State.InAttributeValueNq;
- this._sectionStart = this._index;
- this._index--; //reconsume token
+ this.sectionStart = this._index;
+ this._index--; // Reconsume token
}
}
- _stateInAttributeValueDoubleQuotes(c: string) {
- if (c === '"') {
- this._emitToken("onattribdata");
- this._cbs.onattribend();
+ private handleInAttributeValue(c: string, quote: string) {
+ if (c === quote) {
+ this.emitToken("onattribdata");
+ this.cbs.onattribend(quote);
this._state = State.BeforeAttributeName;
- } else if (this._decodeEntities && c === "&") {
- this._emitToken("onattribdata");
- this._baseState = this._state;
+ } else if (this.decodeEntities && c === "&") {
+ this.emitToken("onattribdata");
+ this.baseState = this._state;
this._state = State.BeforeEntity;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateInAttributeValueSingleQuotes(c: string) {
- if (c === "'") {
- this._emitToken("onattribdata");
- this._cbs.onattribend();
- this._state = State.BeforeAttributeName;
- } else if (this._decodeEntities && c === "&") {
- this._emitToken("onattribdata");
- this._baseState = this._state;
- this._state = State.BeforeEntity;
- this._sectionStart = this._index;
- }
+ private stateInAttributeValueDoubleQuotes(c: string) {
+ this.handleInAttributeValue(c, '"');
+ }
+ private stateInAttributeValueSingleQuotes(c: string) {
+ this.handleInAttributeValue(c, "'");
}
- _stateInAttributeValueNoQuotes(c: string) {
+ private stateInAttributeValueNoQuotes(c: string) {
if (whitespace(c) || c === ">") {
- this._emitToken("onattribdata");
- this._cbs.onattribend();
+ this.emitToken("onattribdata");
+ this.cbs.onattribend(null);
this._state = State.BeforeAttributeName;
this._index--;
- } else if (this._decodeEntities && c === "&") {
- this._emitToken("onattribdata");
- this._baseState = this._state;
+ } else if (this.decodeEntities && c === "&") {
+ this.emitToken("onattribdata");
+ this.baseState = this._state;
this._state = State.BeforeEntity;
- this._sectionStart = this._index;
+ this.sectionStart = this._index;
}
}
- _stateBeforeDeclaration(c: string) {
+ private stateBeforeDeclaration(c: string) {
this._state =
c === "["
? State.BeforeCdata1
@@ -429,317 +493,304 @@ export default class Tokenizer {
? State.BeforeComment
: State.InDeclaration;
}
- _stateInDeclaration(c: string) {
+ private stateInDeclaration(c: string) {
if (c === ">") {
- this._cbs.ondeclaration(this._getSection());
+ this.cbs.ondeclaration(this.getSection());
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
}
}
- _stateInProcessingInstruction(c: string) {
+ private stateInProcessingInstruction(c: string) {
if (c === ">") {
- this._cbs.onprocessinginstruction(this._getSection());
+ this.cbs.onprocessinginstruction(this.getSection());
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
}
}
- _stateBeforeComment(c: string) {
+ private stateBeforeComment(c: string) {
if (c === "-") {
this._state = State.InComment;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else {
this._state = State.InDeclaration;
}
}
- _stateInComment(c: string) {
+ private stateInComment(c: string) {
if (c === "-") this._state = State.AfterComment1;
}
- _stateAfterComment1(c: string) {
+ private stateInSpecialComment(c: string) {
+ if (c === ">") {
+ this.cbs.oncomment(
+ this.buffer.substring(this.sectionStart, this._index)
+ );
+ this._state = State.Text;
+ this.sectionStart = this._index + 1;
+ }
+ }
+ private stateAfterComment1(c: string) {
if (c === "-") {
this._state = State.AfterComment2;
} else {
this._state = State.InComment;
}
}
- _stateAfterComment2(c: string) {
+ private stateAfterComment2(c: string) {
if (c === ">") {
- //remove 2 trailing chars
- this._cbs.oncomment(
- this._buffer.substring(this._sectionStart, this._index - 2)
+ // Remove 2 trailing chars
+ this.cbs.oncomment(
+ this.buffer.substring(this.sectionStart, this._index - 2)
);
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else if (c !== "-") {
this._state = State.InComment;
}
- // else: stay in AFTER_COMMENT_2 (`--->`)
+ // Else: stay in AFTER_COMMENT_2 (`--->`)
}
- _stateBeforeCdata6(c: string) {
+ private stateBeforeCdata6(c: string) {
if (c === "[") {
this._state = State.InCdata;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else {
this._state = State.InDeclaration;
this._index--;
}
}
- _stateInCdata(c: string) {
+ private stateInCdata(c: string) {
if (c === "]") this._state = State.AfterCdata1;
}
- _stateAfterCdata1(c: string) {
+ private stateAfterCdata1(c: string) {
if (c === "]") this._state = State.AfterCdata2;
else this._state = State.InCdata;
}
- _stateAfterCdata2(c: string) {
+ private stateAfterCdata2(c: string) {
if (c === ">") {
- //remove 2 trailing chars
- this._cbs.oncdata(
- this._buffer.substring(this._sectionStart, this._index - 2)
+ // Remove 2 trailing chars
+ this.cbs.oncdata(
+ this.buffer.substring(this.sectionStart, this._index - 2)
);
this._state = State.Text;
- this._sectionStart = this._index + 1;
+ this.sectionStart = this._index + 1;
} else if (c !== "]") {
this._state = State.InCdata;
}
- //else: stay in AFTER_CDATA_2 (`]]]>`)
+ // Else: stay in AFTER_CDATA_2 (`]]]>`)
}
- _stateBeforeSpecial(c: string) {
+ private stateBeforeSpecialS(c: string) {
if (c === "c" || c === "C") {
this._state = State.BeforeScript1;
} else if (c === "t" || c === "T") {
this._state = State.BeforeStyle1;
} else {
this._state = State.InTagName;
- this._index--; //consume the token again
+ this._index--; // Consume the token again
}
}
- _stateBeforeSpecialEnd(c: string) {
- if (this._special === Special.Script && (c === "c" || c === "C")) {
+ private stateBeforeSpecialSEnd(c: string) {
+ if (this.special === Special.Script && (c === "c" || c === "C")) {
this._state = State.AfterScript1;
- } else if (
- this._special === Special.Style &&
- (c === "t" || c === "T")
- ) {
+ } else if (this.special === Special.Style && (c === "t" || c === "T")) {
this._state = State.AfterStyle1;
} else this._state = State.Text;
}
- _stateBeforeScript5(c: string) {
+ private stateBeforeSpecialLast(c: string, special: Special) {
if (c === "/" || c === ">" || whitespace(c)) {
- this._special = Special.Script;
+ this.special = special;
}
this._state = State.InTagName;
- this._index--; //consume the token again
+ this._index--; // Consume the token again
}
- _stateAfterScript5(c: string) {
+ private stateAfterSpecialLast(c: string, sectionStartOffset: number) {
if (c === ">" || whitespace(c)) {
- this._special = Special.None;
+ this.special = Special.None;
this._state = State.InClosingTagName;
- this._sectionStart = this._index - 6;
- this._index--; //reconsume the token
+ this.sectionStart = this._index - sectionStartOffset;
+ this._index--; // Reconsume the token
} else this._state = State.Text;
}
- _stateBeforeStyle4(c: string) {
- if (c === "/" || c === ">" || whitespace(c)) {
- this._special = Special.Style;
- }
- this._state = State.InTagName;
- this._index--; //consume the token again
- }
- _stateAfterStyle4(c: string) {
- if (c === ">" || whitespace(c)) {
- this._special = Special.None;
- this._state = State.InClosingTagName;
- this._sectionStart = this._index - 5;
- this._index--; //reconsume the token
- } else this._state = State.Text;
- }
- //for entities terminated with a semicolon
- _parseNamedEntityStrict() {
- //offset = 1
- if (this._sectionStart + 1 < this._index) {
- const entity = this._buffer.substring(
- this._sectionStart + 1,
- this._index
- ),
- map = this._xmlMode ? xmlMap : entityMap;
+ // For entities terminated with a semicolon
+ private parseFixedEntity(
+ map: Record = this.xmlMode ? xmlMap : entityMap
+ ) {
+ // Offset = 1
+ if (this.sectionStart + 1 < this._index) {
+ const entity = this.buffer.substring(
+ this.sectionStart + 1,
+ this._index
+ );
if (Object.prototype.hasOwnProperty.call(map, entity)) {
- // @ts-ignore
- this._emitPartial(map[entity]);
- this._sectionStart = this._index + 1;
+ this.emitPartial(map[entity]);
+ this.sectionStart = this._index + 1;
}
}
}
- //parses legacy entities (without trailing semicolon)
- _parseLegacyEntity() {
- const start = this._sectionStart + 1;
- let limit = this._index - start;
- if (limit > 6) limit = 6; // The max length of legacy entities is 6
+ // Parses legacy entities (without trailing semicolon)
+ private parseLegacyEntity() {
+ const start = this.sectionStart + 1;
+ // The max length of legacy entities is 6
+ let limit = Math.min(this._index - start, 6);
while (limit >= 2) {
// The min length of legacy entities is 2
- const entity = this._buffer.substr(start, limit);
+ const entity = this.buffer.substr(start, limit);
if (Object.prototype.hasOwnProperty.call(legacyMap, entity)) {
- // @ts-ignore
- this._emitPartial(legacyMap[entity]);
- this._sectionStart += limit + 1;
+ this.emitPartial((legacyMap as Record)[entity]);
+ this.sectionStart += limit + 1;
return;
- } else {
- limit--;
}
+ limit--;
}
}
- _stateInNamedEntity(c: string) {
+ private stateInNamedEntity(c: string) {
if (c === ";") {
- this._parseNamedEntityStrict();
- if (this._sectionStart + 1 < this._index && !this._xmlMode) {
- this._parseLegacyEntity();
+ this.parseFixedEntity();
+ // Retry as legacy entity if entity wasn't parsed
+ if (
+ this.baseState === State.Text &&
+ this.sectionStart + 1 < this._index &&
+ !this.xmlMode
+ ) {
+ this.parseLegacyEntity();
}
- this._state = this._baseState;
- } else if (
- (c < "a" || c > "z") &&
- (c < "A" || c > "Z") &&
- (c < "0" || c > "9")
- ) {
- if (this._xmlMode || this._sectionStart + 1 === this._index) {
- // ignore
- } else if (this._baseState !== State.Text) {
+ this._state = this.baseState;
+ } else if ((c < "0" || c > "9") && !isASCIIAlpha(c)) {
+ if (this.xmlMode || this.sectionStart + 1 === this._index) {
+ // Ignore
+ } else if (this.baseState !== State.Text) {
if (c !== "=") {
- this._parseNamedEntityStrict();
+ // Parse as legacy entity, without allowing additional characters.
+ this.parseFixedEntity(legacyMap);
}
} else {
- this._parseLegacyEntity();
+ this.parseLegacyEntity();
}
- this._state = this._baseState;
+ this._state = this.baseState;
this._index--;
}
}
- _decodeNumericEntity(offset: number, base: number) {
- const sectionStart = this._sectionStart + offset;
+ private decodeNumericEntity(offset: number, base: number, strict: boolean) {
+ const sectionStart = this.sectionStart + offset;
if (sectionStart !== this._index) {
- //parse entity
- const entity = this._buffer.substring(sectionStart, this._index);
+ // Parse entity
+ const entity = this.buffer.substring(sectionStart, this._index);
const parsed = parseInt(entity, base);
- this._emitPartial(decodeCodePoint(parsed));
- this._sectionStart = this._index;
- } else {
- this._sectionStart--;
+ this.emitPartial(decodeCodePoint(parsed));
+ this.sectionStart = strict ? this._index + 1 : this._index;
}
- this._state = this._baseState;
+ this._state = this.baseState;
}
- _stateInNumericEntity(c: string) {
+ private stateInNumericEntity(c: string) {
if (c === ";") {
- this._decodeNumericEntity(2, 10);
- this._sectionStart++;
+ this.decodeNumericEntity(2, 10, true);
} else if (c < "0" || c > "9") {
- if (!this._xmlMode) {
- this._decodeNumericEntity(2, 10);
+ if (!this.xmlMode) {
+ this.decodeNumericEntity(2, 10, false);
} else {
- this._state = this._baseState;
+ this._state = this.baseState;
}
this._index--;
}
}
- _stateInHexEntity(c: string) {
+ private stateInHexEntity(c: string) {
if (c === ";") {
- this._decodeNumericEntity(3, 16);
- this._sectionStart++;
+ this.decodeNumericEntity(3, 16, true);
} else if (
(c < "a" || c > "f") &&
(c < "A" || c > "F") &&
(c < "0" || c > "9")
) {
- if (!this._xmlMode) {
- this._decodeNumericEntity(3, 16);
+ if (!this.xmlMode) {
+ this.decodeNumericEntity(3, 16, false);
} else {
- this._state = this._baseState;
+ this._state = this.baseState;
}
this._index--;
}
}
- _cleanup() {
- if (this._sectionStart < 0) {
- this._buffer = "";
- this._bufferOffset += this._index;
+ private cleanup() {
+ if (this.sectionStart < 0) {
+ this.buffer = "";
+ this.bufferOffset += this._index;
this._index = 0;
- } else if (this._running) {
+ } else if (this.running) {
if (this._state === State.Text) {
- if (this._sectionStart !== this._index) {
- this._cbs.ontext(this._buffer.substr(this._sectionStart));
+ if (this.sectionStart !== this._index) {
+ this.cbs.ontext(this.buffer.substr(this.sectionStart));
}
- this._buffer = "";
- this._bufferOffset += this._index;
+ this.buffer = "";
+ this.bufferOffset += this._index;
this._index = 0;
- } else if (this._sectionStart === this._index) {
- //the section just started
- this._buffer = "";
- this._bufferOffset += this._index;
+ } else if (this.sectionStart === this._index) {
+ // The section just started
+ this.buffer = "";
+ this.bufferOffset += this._index;
this._index = 0;
} else {
- //remove everything unnecessary
- this._buffer = this._buffer.substr(this._sectionStart);
- this._index -= this._sectionStart;
- this._bufferOffset += this._sectionStart;
+ // Remove everything unnecessary
+ this.buffer = this.buffer.substr(this.sectionStart);
+ this._index -= this.sectionStart;
+ this.bufferOffset += this.sectionStart;
}
- this._sectionStart = 0;
+ this.sectionStart = 0;
}
}
- //TODO make events conditional
- write(chunk: string) {
- if (this._ended) this._cbs.onerror(Error(".write() after done!"));
- this._buffer += chunk;
- this._parse();
- }
-
- // Iterates through the buffer, calling the function corresponding to the current state.
- // States that are more likely to be hit are higher up, as a performance improvement.
- _parse() {
- while (this._index < this._buffer.length && this._running) {
- const c = this._buffer.charAt(this._index);
+ /**
+ * Iterates through the buffer, calling the function corresponding to the current state.
+ *
+ * States that are more likely to be hit are higher up, as a performance improvement.
+ */
+ private parse() {
+ while (this._index < this.buffer.length && this.running) {
+ const c = this.buffer.charAt(this._index);
if (this._state === State.Text) {
- this._stateText(c);
+ this.stateText(c);
} else if (this._state === State.InAttributeValueDq) {
- this._stateInAttributeValueDoubleQuotes(c);
+ this.stateInAttributeValueDoubleQuotes(c);
} else if (this._state === State.InAttributeName) {
- this._stateInAttributeName(c);
+ this.stateInAttributeName(c);
} else if (this._state === State.InComment) {
- this._stateInComment(c);
+ this.stateInComment(c);
+ } else if (this._state === State.InSpecialComment) {
+ this.stateInSpecialComment(c);
} else if (this._state === State.BeforeAttributeName) {
- this._stateBeforeAttributeName(c);
+ this.stateBeforeAttributeName(c);
} else if (this._state === State.InTagName) {
- this._stateInTagName(c);
+ this.stateInTagName(c);
} else if (this._state === State.InClosingTagName) {
- this._stateInClosingTagName(c);
+ this.stateInClosingTagName(c);
} else if (this._state === State.BeforeTagName) {
- this._stateBeforeTagName(c);
+ this.stateBeforeTagName(c);
} else if (this._state === State.AfterAttributeName) {
- this._stateAfterAttributeName(c);
+ this.stateAfterAttributeName(c);
} else if (this._state === State.InAttributeValueSq) {
- this._stateInAttributeValueSingleQuotes(c);
+ this.stateInAttributeValueSingleQuotes(c);
} else if (this._state === State.BeforeAttributeValue) {
- this._stateBeforeAttributeValue(c);
+ this.stateBeforeAttributeValue(c);
} else if (this._state === State.BeforeClosingTagName) {
- this._stateBeforeClosingTagName(c);
+ this.stateBeforeClosingTagName(c);
} else if (this._state === State.AfterClosingTagName) {
- this._stateAfterClosingTagName(c);
- } else if (this._state === State.BeforeSpecial) {
- this._stateBeforeSpecial(c);
+ this.stateAfterClosingTagName(c);
+ } else if (this._state === State.BeforeSpecialS) {
+ this.stateBeforeSpecialS(c);
} else if (this._state === State.AfterComment1) {
- this._stateAfterComment1(c);
+ this.stateAfterComment1(c);
} else if (this._state === State.InAttributeValueNq) {
- this._stateInAttributeValueNoQuotes(c);
+ this.stateInAttributeValueNoQuotes(c);
} else if (this._state === State.InSelfClosingTag) {
- this._stateInSelfClosingTag(c);
+ this.stateInSelfClosingTag(c);
} else if (this._state === State.InDeclaration) {
- this._stateInDeclaration(c);
+ this.stateInDeclaration(c);
} else if (this._state === State.BeforeDeclaration) {
- this._stateBeforeDeclaration(c);
+ this.stateBeforeDeclaration(c);
} else if (this._state === State.AfterComment2) {
- this._stateAfterComment2(c);
+ this.stateAfterComment2(c);
} else if (this._state === State.BeforeComment) {
- this._stateBeforeComment(c);
- } else if (this._state === State.BeforeSpecialEnd) {
- this._stateBeforeSpecialEnd(c);
+ this.stateBeforeComment(c);
+ } else if (this._state === State.BeforeSpecialSEnd) {
+ this.stateBeforeSpecialSEnd(c);
+ } else if (this._state === State.BeforeSpecialTEnd) {
+ stateAfterSpecialTEnd(this, c);
} else if (this._state === State.AfterScript1) {
stateAfterScript1(this, c);
} else if (this._state === State.AfterScript2) {
@@ -755,21 +806,21 @@ export default class Tokenizer {
} else if (this._state === State.BeforeScript4) {
stateBeforeScript4(this, c);
} else if (this._state === State.BeforeScript5) {
- this._stateBeforeScript5(c);
+ this.stateBeforeSpecialLast(c, Special.Script);
} else if (this._state === State.AfterScript4) {
stateAfterScript4(this, c);
} else if (this._state === State.AfterScript5) {
- this._stateAfterScript5(c);
+ this.stateAfterSpecialLast(c, 6);
} else if (this._state === State.BeforeStyle1) {
stateBeforeStyle1(this, c);
} else if (this._state === State.InCdata) {
- this._stateInCdata(c);
+ this.stateInCdata(c);
} else if (this._state === State.BeforeStyle2) {
stateBeforeStyle2(this, c);
} else if (this._state === State.BeforeStyle3) {
stateBeforeStyle3(this, c);
} else if (this._state === State.BeforeStyle4) {
- this._stateBeforeStyle4(c);
+ this.stateBeforeSpecialLast(c, Special.Style);
} else if (this._state === State.AfterStyle1) {
stateAfterStyle1(this, c);
} else if (this._state === State.AfterStyle2) {
@@ -777,11 +828,29 @@ export default class Tokenizer {
} else if (this._state === State.AfterStyle3) {
stateAfterStyle3(this, c);
} else if (this._state === State.AfterStyle4) {
- this._stateAfterStyle4(c);
+ this.stateAfterSpecialLast(c, 5);
+ } else if (this._state === State.BeforeSpecialT) {
+ stateBeforeSpecialT(this, c);
+ } else if (this._state === State.BeforeTitle1) {
+ stateBeforeTitle1(this, c);
+ } else if (this._state === State.BeforeTitle2) {
+ stateBeforeTitle2(this, c);
+ } else if (this._state === State.BeforeTitle3) {
+ stateBeforeTitle3(this, c);
+ } else if (this._state === State.BeforeTitle4) {
+ this.stateBeforeSpecialLast(c, Special.Title);
+ } else if (this._state === State.AfterTitle1) {
+ stateAfterTitle1(this, c);
+ } else if (this._state === State.AfterTitle2) {
+ stateAfterTitle2(this, c);
+ } else if (this._state === State.AfterTitle3) {
+ stateAfterTitle3(this, c);
+ } else if (this._state === State.AfterTitle4) {
+ this.stateAfterSpecialLast(c, 5);
} else if (this._state === State.InProcessingInstruction) {
- this._stateInProcessingInstruction(c);
+ this.stateInProcessingInstruction(c);
} else if (this._state === State.InNamedEntity) {
- this._stateInNamedEntity(c);
+ this.stateInNamedEntity(c);
} else if (this._state === State.BeforeCdata1) {
stateBeforeCdata1(this, c);
} else if (this._state === State.BeforeEntity) {
@@ -791,84 +860,69 @@ export default class Tokenizer {
} else if (this._state === State.BeforeCdata3) {
stateBeforeCdata3(this, c);
} else if (this._state === State.AfterCdata1) {
- this._stateAfterCdata1(c);
+ this.stateAfterCdata1(c);
} else if (this._state === State.AfterCdata2) {
- this._stateAfterCdata2(c);
+ this.stateAfterCdata2(c);
} else if (this._state === State.BeforeCdata4) {
stateBeforeCdata4(this, c);
} else if (this._state === State.BeforeCdata5) {
stateBeforeCdata5(this, c);
} else if (this._state === State.BeforeCdata6) {
- this._stateBeforeCdata6(c);
+ this.stateBeforeCdata6(c);
} else if (this._state === State.InHexEntity) {
- this._stateInHexEntity(c);
+ this.stateInHexEntity(c);
} else if (this._state === State.InNumericEntity) {
- this._stateInNumericEntity(c);
+ this.stateInNumericEntity(c);
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
} else if (this._state === State.BeforeNumericEntity) {
stateBeforeNumericEntity(this, c);
} else {
- this._cbs.onerror(Error("unknown _state"), this._state);
+ this.cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;
}
- this._cleanup();
- }
- pause() {
- this._running = false;
+ this.cleanup();
}
- resume() {
- this._running = true;
- if (this._index < this._buffer.length) {
- this._parse();
- }
- if (this._ended) {
- this._finish();
- }
- }
- end(chunk?: string) {
- if (this._ended) this._cbs.onerror(Error(".end() after done!"));
- if (chunk) this.write(chunk);
- this._ended = true;
- if (this._running) this._finish();
- }
- _finish() {
- //if there is remaining data, emit it in a reasonable way
- if (this._sectionStart < this._index) {
- this._handleTrailingData();
+
+ private finish() {
+ // If there is remaining data, emit it in a reasonable way
+ if (this.sectionStart < this._index) {
+ this.handleTrailingData();
}
- this._cbs.onend();
+ this.cbs.onend();
}
- _handleTrailingData() {
- const data = this._buffer.substr(this._sectionStart);
+
+ private handleTrailingData() {
+ const data = this.buffer.substr(this.sectionStart);
if (
this._state === State.InCdata ||
this._state === State.AfterCdata1 ||
this._state === State.AfterCdata2
) {
- this._cbs.oncdata(data);
+ this.cbs.oncdata(data);
} else if (
this._state === State.InComment ||
this._state === State.AfterComment1 ||
this._state === State.AfterComment2
) {
- this._cbs.oncomment(data);
- } else if (this._state === State.InNamedEntity && !this._xmlMode) {
- this._parseLegacyEntity();
- if (this._sectionStart < this._index) {
- this._state = this._baseState;
- this._handleTrailingData();
+ this.cbs.oncomment(data);
+ } else if (this._state === State.InNamedEntity && !this.xmlMode) {
+ this.parseLegacyEntity();
+ if (this.sectionStart < this._index) {
+ this._state = this.baseState;
+ this.handleTrailingData();
}
- } else if (this._state === State.InNumericEntity && !this._xmlMode) {
- this._decodeNumericEntity(2, 10);
- if (this._sectionStart < this._index) {
- this._state = this._baseState;
- this._handleTrailingData();
+ } else if (this._state === State.InNumericEntity && !this.xmlMode) {
+ this.decodeNumericEntity(2, 10, false);
+ if (this.sectionStart < this._index) {
+ this._state = this.baseState;
+ this.handleTrailingData();
}
- } else if (this._state === State.InHexEntity && !this._xmlMode) {
- this._decodeNumericEntity(3, 16);
- if (this._sectionStart < this._index) {
- this._state = this._baseState;
- this._handleTrailingData();
+ } else if (this._state === State.InHexEntity && !this.xmlMode) {
+ this.decodeNumericEntity(3, 16, false);
+ if (this.sectionStart < this._index) {
+ this._state = this.baseState;
+ this.handleTrailingData();
}
} else if (
this._state !== State.InTagName &&
@@ -881,26 +935,26 @@ export default class Tokenizer {
this._state !== State.InAttributeValueNq &&
this._state !== State.InClosingTagName
) {
- this._cbs.ontext(data);
+ this.cbs.ontext(data);
}
- //else, ignore remaining data
- //TODO add a way to remove current tag
+ /*
+ * Else, ignore remaining data
+ * TODO add a way to remove current tag
+ */
}
- getAbsoluteIndex(): number {
- return this._bufferOffset + this._index;
- }
- _getSection(): string {
- return this._buffer.substring(this._sectionStart, this._index);
+
+ private getSection(): string {
+ return this.buffer.substring(this.sectionStart, this._index);
}
- _emitToken(name: "onopentagname" | "onclosetag" | "onattribdata") {
- this._cbs[name](this._getSection());
- this._sectionStart = -1;
+ private emitToken(name: "onopentagname" | "onclosetag" | "onattribdata") {
+ this.cbs[name](this.getSection());
+ this.sectionStart = -1;
}
- _emitPartial(value: string) {
- if (this._baseState !== State.Text) {
- this._cbs.onattribdata(value); //TODO implement the new event
+ private emitPartial(value: string) {
+ if (this.baseState !== State.Text) {
+ this.cbs.onattribdata(value); // TODO implement the new event
} else {
- this._cbs.ontext(value);
+ this.cbs.ontext(value);
}
}
}
diff --git a/src/WritableStream.spec.ts b/src/WritableStream.spec.ts
index d0d31f643..e0f6fa7ec 100644
--- a/src/WritableStream.spec.ts
+++ b/src/WritableStream.spec.ts
@@ -7,6 +7,7 @@ describe("WritableStream", () => {
stream.write(Buffer.from([0xe2, 0x82]));
stream.write(Buffer.from([0xac]));
+ stream.write("");
stream.end();
expect(ontext).toBeCalledWith("€");
diff --git a/src/WritableStream.ts b/src/WritableStream.ts
index 670020cd1..baa204e38 100644
--- a/src/WritableStream.ts
+++ b/src/WritableStream.ts
@@ -13,21 +13,22 @@ function isBuffer(_chunk: string | Buffer, encoding: string): _chunk is Buffer {
* @see Parser
*/
export class WritableStream extends Writable {
- _parser: Parser;
- _decoder = new StringDecoder();
+ private readonly _parser: Parser;
+ private readonly _decoder = new StringDecoder();
constructor(cbs: Partial, options?: ParserOptions) {
super({ decodeStrings: false });
this._parser = new Parser(cbs, options);
}
- _write(chunk: string | Buffer, encoding: string, cb: () => void) {
- if (isBuffer(chunk, encoding)) chunk = this._decoder.write(chunk);
- this._parser.write(chunk);
+ _write(chunk: string | Buffer, encoding: string, cb: () => void): void {
+ this._parser.write(
+ isBuffer(chunk, encoding) ? this._decoder.write(chunk) : chunk
+ );
cb();
}
- _final(cb: () => void) {
+ _final(cb: () => void): void {
this._parser.end(this._decoder.end());
cb();
}
diff --git a/src/__fixtures__/Documents/Atom_Example.xml b/src/__fixtures__/Documents/Atom_Example.xml
index f83638030..c19b0d36d 100644
--- a/src/__fixtures__/Documents/Atom_Example.xml
+++ b/src/__fixtures__/Documents/Atom_Example.xml
@@ -22,4 +22,6 @@
Some content.
+
+
diff --git a/src/__fixtures__/Documents/RSS_Example.xml b/src/__fixtures__/Documents/RSS_Example.xml
index 0d1fde875..18563449e 100644
--- a/src/__fixtures__/Documents/RSS_Example.xml
+++ b/src/__fixtures__/Documents/RSS_Example.xml
@@ -43,6 +43,7 @@
Tue, 20 May 2003 08:56:02 GMT
http://liftoff.msfc.nasa.gov/2003/05/20.html#item570
+
\ No newline at end of file
diff --git a/src/__fixtures__/Events/01-simple.json b/src/__fixtures__/Events/01-simple.json
index 1efe6a4f4..7f5fd154a 100644
--- a/src/__fixtures__/Events/01-simple.json
+++ b/src/__fixtures__/Events/01-simple.json
@@ -1,9 +1,5 @@
{
"name": "simple",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "adsf
",
"expected": [
{
@@ -12,7 +8,7 @@
},
{
"event": "attribute",
- "data": ["class", "test"]
+ "data": ["class", "test", null]
},
{
"event": "opentag",
diff --git a/src/__fixtures__/Events/02-template.json b/src/__fixtures__/Events/02-template.json
index 76447a4e7..1b77db1b6 100644
--- a/src/__fixtures__/Events/02-template.json
+++ b/src/__fixtures__/Events/02-template.json
@@ -1,9 +1,5 @@
{
"name": "Template script tags",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "",
"expected": [
{
@@ -20,7 +16,7 @@
},
{
"event": "attribute",
- "data": ["type", "text/template"]
+ "data": ["type", "text/template", "\""]
},
{
"event": "opentag",
diff --git a/src/__fixtures__/Events/03-lowercase_tags.json b/src/__fixtures__/Events/03-lowercase_tags.json
index dafa4eedc..c65665a3b 100644
--- a/src/__fixtures__/Events/03-lowercase_tags.json
+++ b/src/__fixtures__/Events/03-lowercase_tags.json
@@ -1,7 +1,6 @@
{
"name": "Lowercase tags",
"options": {
- "handler": {},
"parser": {
"lowerCaseTags": true
}
@@ -14,7 +13,7 @@
},
{
"event": "attribute",
- "data": ["class", "test"]
+ "data": ["class", "test", null]
},
{
"event": "opentag",
diff --git a/src/__fixtures__/Events/04-cdata.json b/src/__fixtures__/Events/04-cdata.json
index c4c655535..2e3d1ac3c 100644
--- a/src/__fixtures__/Events/04-cdata.json
+++ b/src/__fixtures__/Events/04-cdata.json
@@ -1,7 +1,6 @@
{
"name": "CDATA",
"options": {
- "handler": {},
"parser": { "xmlMode": true }
},
"html": "<> fo]]>",
diff --git a/src/__fixtures__/Events/05-cdata-special.json b/src/__fixtures__/Events/05-cdata-special.json
index d23adf415..977b7bd92 100644
--- a/src/__fixtures__/Events/05-cdata-special.json
+++ b/src/__fixtures__/Events/05-cdata-special.json
@@ -1,9 +1,5 @@
{
"name": "CDATA (inside special)",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "",
"expected": [
{
diff --git a/src/__fixtures__/Events/06-leading-lt.json b/src/__fixtures__/Events/06-leading-lt.json
index f99044f09..101800b59 100644
--- a/src/__fixtures__/Events/06-leading-lt.json
+++ b/src/__fixtures__/Events/06-leading-lt.json
@@ -1,9 +1,5 @@
{
"name": "leading lt",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": ">a>",
"expected": [
{
diff --git a/src/__fixtures__/Events/07-self-closing.json b/src/__fixtures__/Events/07-self-closing.json
index 6cbabbfbb..b5ba22591 100644
--- a/src/__fixtures__/Events/07-self-closing.json
+++ b/src/__fixtures__/Events/07-self-closing.json
@@ -1,9 +1,5 @@
{
"name": "Self-closing tags",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "Foo
",
"expected": [
{
@@ -12,7 +8,7 @@
},
{
"event": "attribute",
- "data": ["href", "http://test.com/"]
+ "data": ["href", "http://test.com/", null]
},
{
"event": "opentag",
diff --git a/src/__fixtures__/Events/08-implicit-close-tags.json b/src/__fixtures__/Events/08-implicit-close-tags.json
index f3b56ca21..01d8af217 100644
--- a/src/__fixtures__/Events/08-implicit-close-tags.json
+++ b/src/__fixtures__/Events/08-implicit-close-tags.json
@@ -1,17 +1,16 @@
{
"name": "Implicit close tags",
- "options": {},
"html": "Heading 2
Para
Heading 4
",
"expected": [
{ "event": "opentagname", "data": ["ol"] },
{ "event": "opentag", "data": ["ol", {}] },
{ "event": "opentagname", "data": ["li"] },
- { "event": "attribute", "data": ["class", "test"] },
+ { "event": "attribute", "data": ["class", "test", null] },
{ "event": "opentag", "data": ["li", { "class": "test" }] },
{ "event": "opentagname", "data": ["div"] },
{ "event": "opentag", "data": ["div", {}] },
{ "event": "opentagname", "data": ["table"] },
- { "event": "attribute", "data": ["style", "width:100%"] },
+ { "event": "attribute", "data": ["style", "width:100%", null] },
{ "event": "opentag", "data": ["table", { "style": "width:100%" }] },
{ "event": "opentagname", "data": ["tr"] },
{ "event": "opentag", "data": ["tr", {}] },
@@ -20,7 +19,7 @@
{ "event": "text", "data": ["TH"] },
{ "event": "closetag", "data": ["th"] },
{ "event": "opentagname", "data": ["td"] },
- { "event": "attribute", "data": ["colspan", "2"] },
+ { "event": "attribute", "data": ["colspan", "2", null] },
{ "event": "opentag", "data": ["td", { "colspan": "2" }] },
{ "event": "opentagname", "data": ["h3"] },
{ "event": "opentag", "data": ["h3", {}] },
diff --git a/src/__fixtures__/Events/09-attributes.json b/src/__fixtures__/Events/09-attributes.json
index b5aac14c5..c1f72716a 100644
--- a/src/__fixtures__/Events/09-attributes.json
+++ b/src/__fixtures__/Events/09-attributes.json
@@ -1,9 +1,5 @@
{
"name": "attributes (no white space, no value, no quotes)",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "",
"expected": [
{
@@ -12,11 +8,11 @@
},
{
"event": "attribute",
- "data": ["class", "test0"]
+ "data": ["class", "test0", "\""]
},
{
"event": "attribute",
- "data": ["title", "test1"]
+ "data": ["title", "test1", "\""]
},
{
"event": "attribute",
@@ -24,7 +20,7 @@
},
{
"event": "attribute",
- "data": ["value", "test2"]
+ "data": ["value", "test2", null]
},
{
"event": "opentag",
diff --git a/src/__fixtures__/Events/10-crazy-attrib.json b/src/__fixtures__/Events/10-crazy-attrib.json
index a76ec161c..23b607739 100644
--- a/src/__fixtures__/Events/10-crazy-attrib.json
+++ b/src/__fixtures__/Events/10-crazy-attrib.json
@@ -1,9 +1,5 @@
{
"name": "crazy attribute",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "stuff
",
"expected": [
{
diff --git a/src/__fixtures__/Events/12-long-comment-end.json b/src/__fixtures__/Events/12-long-comment-end.json
index 65963e248..a6344b270 100644
--- a/src/__fixtures__/Events/12-long-comment-end.json
+++ b/src/__fixtures__/Events/12-long-comment-end.json
@@ -1,19 +1,15 @@
{
"name": "Long comment ending",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "",
"expected": [
{ "event": "opentagname", "data": ["meta"] },
- { "event": "attribute", "data": ["id", "before"] },
+ { "event": "attribute", "data": ["id", "before", "'"] },
{ "event": "opentag", "data": ["meta", { "id": "before" }] },
{ "event": "closetag", "data": ["meta"] },
{ "event": "comment", "data": [" text -"] },
{ "event": "commentend", "data": [] },
{ "event": "opentagname", "data": ["meta"] },
- { "event": "attribute", "data": ["id", "after"] },
+ { "event": "attribute", "data": ["id", "after", "'"] },
{ "event": "opentag", "data": ["meta", { "id": "after" }] },
{ "event": "closetag", "data": ["meta"] }
]
diff --git a/src/__fixtures__/Events/13-long-cdata-end.json b/src/__fixtures__/Events/13-long-cdata-end.json
index b000ad7b0..9e8d4f93c 100644
--- a/src/__fixtures__/Events/13-long-cdata-end.json
+++ b/src/__fixtures__/Events/13-long-cdata-end.json
@@ -1,7 +1,6 @@
{
"name": "Long CDATA ending",
"options": {
- "handler": {},
"parser": { "xmlMode": true }
},
"html": "",
diff --git a/src/__fixtures__/Events/14-implicit-open-tags.json b/src/__fixtures__/Events/14-implicit-open-tags.json
index fdcd647e9..274d10a76 100644
--- a/src/__fixtures__/Events/14-implicit-open-tags.json
+++ b/src/__fixtures__/Events/14-implicit-open-tags.json
@@ -1,9 +1,5 @@
{
"name": "Implicit open p and br tags",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "
",
"expected": [
{ "event": "opentagname", "data": ["div"] },
diff --git a/src/__fixtures__/Events/15-lt-whitespace.json b/src/__fixtures__/Events/15-lt-whitespace.json
index 6c6ef6455..47b26abe9 100644
--- a/src/__fixtures__/Events/15-lt-whitespace.json
+++ b/src/__fixtures__/Events/15-lt-whitespace.json
@@ -1,9 +1,5 @@
{
"name": "lt followed by whitespace",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "a < b",
"expected": [
{
diff --git a/src/__fixtures__/Events/16-double_attribs.json b/src/__fixtures__/Events/16-double_attribs.json
index d21d57f50..1b811cbb3 100644
--- a/src/__fixtures__/Events/16-double_attribs.json
+++ b/src/__fixtures__/Events/16-double_attribs.json
@@ -1,9 +1,5 @@
{
"name": "double attribute",
- "options": {
- "handler": {},
- "parser": {}
- },
"html": "",
"expected": [
{
@@ -12,11 +8,11 @@
},
{
"event": "attribute",
- "data": ["class", "test"]
+ "data": ["class", "test", null]
},
{
"event": "attribute",
- "data": ["class", "boo"]
+ "data": ["class", "boo", null]
},
{
"event": "opentag",
diff --git a/src/__fixtures__/Events/17-numeric_entities.json b/src/__fixtures__/Events/17-numeric_entities.json
index 02bfb3fdd..a869954b5 100644
--- a/src/__fixtures__/Events/17-numeric_entities.json
+++ b/src/__fixtures__/Events/17-numeric_entities.json
@@ -1,9 +1,5 @@
{
"name": "numeric entities",
- "options": {
- "handler": {},
- "parser": { "decodeEntities": true }
- },
"html": "abcdfg&#x;h",
"expected": [
{
diff --git a/src/__fixtures__/Events/18-legacy_entities.json b/src/__fixtures__/Events/18-legacy_entities.json
index 9ee83d7f9..3c3281e5e 100644
--- a/src/__fixtures__/Events/18-legacy_entities.json
+++ b/src/__fixtures__/Events/18-legacy_entities.json
@@ -1,9 +1,5 @@
{
"name": "legacy entities",
- "options": {
- "handler": {},
- "parser": { "decodeEntities": true }
- },
"html": "&elíe&eer;s<er",
"expected": [
{
diff --git a/src/__fixtures__/Events/19-named_entities.json b/src/__fixtures__/Events/19-named_entities.json
index d71a4f80d..25a941273 100644
--- a/src/__fixtures__/Events/19-named_entities.json
+++ b/src/__fixtures__/Events/19-named_entities.json
@@ -1,9 +1,5 @@
{
"name": "named entities",
- "options": {
- "handler": {},
- "parser": { "decodeEntities": true }
- },
"html": "&el<er∳foo&bar",
"expected": [
{
diff --git a/src/__fixtures__/Events/20-xml_entities.json b/src/__fixtures__/Events/20-xml_entities.json
index 0e636ba56..96d3ea393 100644
--- a/src/__fixtures__/Events/20-xml_entities.json
+++ b/src/__fixtures__/Events/20-xml_entities.json
@@ -1,8 +1,7 @@
{
"name": "xml entities",
"options": {
- "handler": {},
- "parser": { "decodeEntities": true, "xmlMode": true }
+ "parser": { "xmlMode": true }
},
"html": "&>&<üabcde",
"expected": [
diff --git a/src/__fixtures__/Events/21-entity_in_attribute.json b/src/__fixtures__/Events/21-entity_in_attribute.json
index 65b67c072..b41a90c1a 100644
--- a/src/__fixtures__/Events/21-entity_in_attribute.json
+++ b/src/__fixtures__/Events/21-entity_in_attribute.json
@@ -1,9 +1,5 @@
{
"name": "entity in attribute",
- "options": {
- "handler": {},
- "parser": { "decodeEntities": true }
- },
"html": "",
"expected": [
{
@@ -14,7 +10,8 @@
"event": "attribute",
"data": [
"href",
- "http://example.com/page?param=value¶m2¶m3=>testing",
"expected": [
{
diff --git a/src/__fixtures__/Events/23-legacy_entity_fail.json b/src/__fixtures__/Events/23-legacy_entity_fail.json
index b7bf5afc1..be7d3cb26 100644
--- a/src/__fixtures__/Events/23-legacy_entity_fail.json
+++ b/src/__fixtures__/Events/23-legacy_entity_fail.json
@@ -1,9 +1,5 @@
{
"name": "legacy entities",
- "options": {
- "handler": {},
- "parser": { "decodeEntities": true }
- },
"html": "M&M",
"expected": [
{
diff --git a/src/__fixtures__/Events/24-special_special.json b/src/__fixtures__/Events/24-special_special.json
index f81a62f58..b347a0ff1 100644
--- a/src/__fixtures__/Events/24-special_special.json
+++ b/src/__fixtures__/Events/24-special_special.json
@@ -1,8 +1,71 @@
{
"name": "Special special tags",
- "options": {},
- "html": "