-
-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
🐛 fix valid Unicode Properties (fixes #6)
1 parent
00abeab
commit 2bd358f
Showing
6 changed files
with
418 additions
and
478 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
import fs from "fs" | ||
import { JSDOM, DOMWindow } from "jsdom" | ||
import { CLIEngine } from "eslint" | ||
|
||
const DataSources = [ | ||
{ | ||
url: "https://www.ecma-international.org/ecma-262/9.0/", | ||
version: 2018, | ||
binProperties: "#table-binary-unicode-properties", | ||
gcValues: "#table-unicode-general-category-values", | ||
scValues: "#table-unicode-script-values", | ||
}, | ||
{ | ||
url: "https://www.ecma-international.org/ecma-262/10.0/", | ||
version: 2019, | ||
binProperties: "#table-binary-unicode-properties", | ||
gcValues: "#table-unicode-general-category-values", | ||
scValues: "#table-unicode-script-values", | ||
}, | ||
{ | ||
url: "https://tc39.es/ecma262/", | ||
version: 2020, | ||
binProperties: "#table-binary-unicode-properties", | ||
gcValues: "#table-unicode-general-category-values", | ||
scValues: "#table-unicode-script-values", | ||
}, | ||
] | ||
const FILE_PATH = "src/unicode/properties.ts" | ||
const logger = console | ||
|
||
type Datum = { | ||
binProperties: string[] | ||
gcValues: string[] | ||
scValues: string[] | ||
} | ||
|
||
// Main | ||
;(async () => { | ||
const data: Record<number, Datum> = Object.create(null) | ||
const existing = { | ||
binProperties: new Set<string>(), | ||
gcValues: new Set<string>(), | ||
scValues: new Set<string>(), | ||
} | ||
|
||
for (const { | ||
binProperties, | ||
gcValues, | ||
scValues, | ||
url, | ||
version, | ||
} of DataSources) { | ||
logger.log("---- ECMAScript %d ----", version) | ||
const datum: Datum = { | ||
binProperties: [], | ||
gcValues: [], | ||
scValues: [], | ||
} | ||
data[version] = datum | ||
|
||
let window: DOMWindow | null = null | ||
do { | ||
try { | ||
logger.log("Fetching data from %o", url) | ||
;({ window } = await JSDOM.fromURL(url)) | ||
} catch (error) { | ||
if (!error || error.message !== "Error: socket hang up") { | ||
throw error | ||
} | ||
logger.log("Failed: %s", error) | ||
await new Promise(resolve => setTimeout(resolve, 2000)) | ||
} | ||
} while (window == null) | ||
|
||
logger.log("Parsing tables") | ||
datum.binProperties = collectValues( | ||
window, | ||
binProperties, | ||
existing.binProperties, | ||
) | ||
datum.gcValues = collectValues(window, gcValues, existing.gcValues) | ||
datum.scValues = collectValues(window, scValues, existing.scValues) | ||
|
||
logger.log("Done") | ||
} | ||
|
||
logger.log("Generating code...") | ||
let code = `/* This file was generated with ECMAScript specifications. */ | ||
const gcNamePattern = /^(?:General_Category|gc)$/u | ||
const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u | ||
const gcValuePatterns = { | ||
${Array.from( | ||
Object.keys(data), | ||
version => `es${version}: null as RegExp | null,`, | ||
).join("\n")} | ||
} | ||
const scValuePatterns = { | ||
${Array.from( | ||
Object.keys(data), | ||
version => `es${version}: null as RegExp | null,`, | ||
).join("\n")} | ||
} | ||
const binPropertyPatterns = { | ||
${Array.from( | ||
Object.keys(data), | ||
version => `es${version}: null as RegExp | null,`, | ||
).join("\n")} | ||
} | ||
export function isValidUnicodeProperty(version: number, name: string, value: string): boolean { | ||
if (gcNamePattern.test(name)) { | ||
${Array.from(Object.entries(data), ([version, { gcValues }]) => | ||
makeVerificationCode(version, "gcValuePatterns", gcValues, 52), | ||
).join("\n")} | ||
} | ||
if (scNamePattern.test(name)) { | ||
${Array.from(Object.entries(data), ([version, { scValues }]) => | ||
makeVerificationCode(version, "scValuePatterns", scValues, 52), | ||
).join("\n")} | ||
} | ||
return false | ||
} | ||
export function isValidLoneUnicodeProperty(version: number, value: string): boolean { | ||
${Array.from(Object.entries(data), ([version, { binProperties }]) => | ||
makeVerificationCode(version, "binPropertyPatterns", binProperties, 56), | ||
).join("\n")} | ||
return false | ||
} | ||
` | ||
|
||
logger.log("Formatting code...") | ||
const engine = new CLIEngine({ fix: true }) | ||
const result = engine.executeOnText(code, "properties.ts").results[0] | ||
code = result.output || code | ||
|
||
logger.log("Writing '%s'...", FILE_PATH) | ||
await save(code) | ||
|
||
logger.log("Completed!") | ||
})().catch(error => { | ||
logger.error(error.stack) | ||
process.exitCode = 1 | ||
}) | ||
|
||
function collectValues( | ||
window: Window, | ||
id: string, | ||
existingSet: Set<string>, | ||
): string[] { | ||
return Array.from( | ||
window.document.querySelectorAll(`${id} td:nth-child(1) code`), | ||
node => node.textContent || "", | ||
) | ||
.filter(value => { | ||
if (existingSet.has(value)) { | ||
return false | ||
} | ||
existingSet.add(value) | ||
return true | ||
}) | ||
.sort(undefined) | ||
} | ||
|
||
function makeVerificationCode( | ||
version: string, | ||
patternVar: string, | ||
values: string[], | ||
maxLen: number, | ||
): string { | ||
if (values.length === 0) { | ||
return "" | ||
} | ||
|
||
return ` | ||
if (version >= ${version}) { | ||
if (!${patternVar}.es${version}) { | ||
${patternVar}.es${version} = new RegExp( | ||
${makeRegExpPatternCode(values, maxLen)}, | ||
"u" | ||
) | ||
} | ||
if (${patternVar}.es${version}.test(value)) { | ||
return true | ||
} | ||
} | ||
` | ||
} | ||
|
||
function makeRegExpPatternCode(names: string[], maxLen: number): string { | ||
const lines = ["^(?:"] | ||
for (const name of names) { | ||
const line = lines[lines.length - 1] | ||
const part = `${name}|` | ||
|
||
if (line.length + part.length > maxLen) { | ||
lines.push(part) | ||
} else { | ||
lines[lines.length - 1] += part | ||
} | ||
} | ||
lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$` | ||
return lines.map(line => `"${line}"`).join("+") | ||
} | ||
|
||
function save(content: string): Promise<void> { | ||
return new Promise((resolve, reject) => { | ||
fs.writeFile(FILE_PATH, content, error => | ||
error ? reject(error) : resolve(), | ||
) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.