Skip to content

Commit

Permalink
🎨 Convert RegExp objects to Sets (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
Validark authored Apr 3, 2020
1 parent d180814 commit 21bd514
Showing 4 changed files with 1,619 additions and 438 deletions.
66 changes: 33 additions & 33 deletions scripts/update-unicode-ids.ts
Original file line number Diff line number Diff line change
@@ -51,10 +51,21 @@ const logger = console
normalizeRanges(idContinueLarge)

logger.log("Generating code...")
const { set: setStart, ranges: rangesStart } = makeLargePattern(
idStartLarge,
)

const { set: setContinue, ranges: rangesContinue } = makeLargePattern(
idContinueLarge,
)

let code = `${banner}
let largeIdStartPattern: RegExp | null = null;
let largeIdContinuePattern: RegExp | null = null;
let largeIdStartPatternSymbols: Set<number> | null = null
let largeIdStartPatternRanges: [number, number][] | null = null
let largeIdContinuePatternSymbols: Set<number> | null = null
let largeIdContinuePatternRanges: [number, number][] | null = null
export function isIdStart(cp: number): boolean {
${makeSmallCondtion(idStartSmall)}
@@ -65,18 +76,18 @@ export function isIdContinue(cp: number): boolean {
return isLargeIdStart(cp) || isLargeIdContinue(cp)
}
function isLargeIdStart(cp: number): boolean {
if (!largeIdStartPattern) {
largeIdStartPattern = new RegExp(${makeLargePattern(idStartLarge)}, "u")
if (largeIdStartPatternSymbols === null) {
largeIdStartPatternSymbols = ${setStart};
largeIdStartPatternRanges = ${rangesStart};
}
return largeIdStartPattern.test(String.fromCodePoint(cp))
return largeIdStartPatternSymbols.has(cp) || largeIdStartPatternRanges!.some(([r1, r2]) => r1 <= cp && cp <= r2);
}
function isLargeIdContinue(cp: number): boolean {
if (!largeIdContinuePattern) {
largeIdContinuePattern = new RegExp(${makeLargePattern(
idContinueLarge,
)}, "u")
if (largeIdContinuePatternSymbols === null) {
largeIdContinuePatternSymbols = ${setContinue};
largeIdContinuePatternRanges = ${rangesContinue};
}
return largeIdContinuePattern.test(String.fromCodePoint(cp))
return largeIdContinuePatternSymbols.has(cp) || largeIdContinuePatternRanges!.some(([r1, r2]) => r1 <= cp && cp <= r2);
}`

logger.log("Formatting code...")
@@ -147,35 +158,24 @@ function makeSmallCondtion(ranges: [number, number][]): string {
return conditions.join("\n")
}

function makeLargePattern(ranges: [number, number][]): string {
const lines = ["^["]
function makeLargePattern(ranges: [number, number][]) {
const symbols: string[] = []
const symbolRanges: string[] = []

for (const [min, max] of ranges) {
const line = lines[lines.length - 1]
const part =
min === max
? esc(min)
: min + 1 === max
? `${esc(min)}${esc(max)}`
: `${esc(min)}-${esc(max)}`

if (line.length + part.length > 60) {
lines.push(part)
if (min === max) {
symbols.push(`0x${min.toString(16)}`)
} else if (min + 1 === max) {
symbols.push(`0x${min.toString(16)}`, `0x${max.toString(16)}`)
} else {
lines[lines.length - 1] += part
symbolRanges.push(`[0x${min.toString(16)}, 0x${max.toString(16)}]`)
}
}
lines[lines.length - 1] += "]$"
return lines.map(line => `"${line}"`).join("+")
}

function esc(cp: number): string {
if (cp <= 0xff) {
return `\\x${cp.toString(16).padStart(2, "0")}`
}
if (cp <= 0xffff) {
return `\\u${cp.toString(16).padStart(4, "0")}`
return {
set: `new Set([${symbols.join()}])`,
ranges: `[${symbolRanges.join()}]`,
}
return `\\u{${cp.toString(16)}}`
}

function save(content: string): Promise<void> {
41 changes: 12 additions & 29 deletions scripts/update-unicode-properties.ts
Original file line number Diff line number Diff line change
@@ -87,34 +87,34 @@ type Datum = {
logger.log("Generating code...")
let code = `/* This file was generated with ECMAScript specifications. */
const gcNamePattern = /^(?:General_Category|gc)$/u
const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u
const gcNamePattern = new Set(["General_Category", "gc"])
const scNamePattern = new Set(["Script", "Script_Extensions", "sc", "scx"])
const gcValuePatterns = {
${Array.from(
Object.keys(data),
version => `es${version}: null as RegExp | null,`,
version => `es${version}: null as Set<string> | null,`,
).join("\n")}
}
const scValuePatterns = {
${Array.from(
Object.keys(data),
version => `es${version}: null as RegExp | null,`,
version => `es${version}: null as Set<string> | null,`,
).join("\n")}
}
const binPropertyPatterns = {
${Array.from(
Object.keys(data),
version => `es${version}: null as RegExp | null,`,
version => `es${version}: null as Set<string> | null,`,
).join("\n")}
}
export function isValidUnicodeProperty(version: number, name: string, value: string): boolean {
if (gcNamePattern.test(name)) {
if (gcNamePattern.has(name)) {
${Array.from(Object.entries(data), ([version, { gcValues }]) =>
makeVerificationCode(version, "gcValuePatterns", gcValues, 52),
).join("\n")}
}
if (scNamePattern.test(name)) {
if (scNamePattern.has(name)) {
${Array.from(Object.entries(data), ([version, { scValues }]) =>
makeVerificationCode(version, "scValuePatterns", scValues, 52),
).join("\n")}
@@ -175,35 +175,18 @@ function makeVerificationCode(

return `
if (version >= ${version}) {
if (!${patternVar}.es${version}) {
${patternVar}.es${version} = new RegExp(
${makeRegExpPatternCode(values, maxLen)},
"u"
)
if (${patternVar}.es${version} === null) {
${patternVar}.es${version} = new Set([${values
.map(v => `"${v}"`)
.join()}])
}
if (${patternVar}.es${version}.test(value)) {
if (${patternVar}.es${version}.has(value)) {
return true
}
}
`
}

function makeRegExpPatternCode(names: string[], maxLen: number): string {
const lines = ["^(?:"]
for (const name of names) {
const line = lines[lines.length - 1]
const part = `${name}|`

if (line.length + part.length > maxLen) {
lines.push(part)
} else {
lines[lines.length - 1] += part
}
}
lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$`
return lines.map(line => `"${line}"`).join("+")
}

function save(content: string): Promise<void> {
return new Promise((resolve, reject) => {
fs.writeFile(FILE_PATH, content, error =>
Loading

0 comments on commit 21bd514

Please sign in to comment.