Skip to content

Commit

Permalink
fix(tools): update exclusions in search index
Browse files Browse the repository at this point in the history
  • Loading branch information
postspectacular committed Sep 24, 2022
1 parent 7fb366c commit 027fd92
Showing 1 changed file with 75 additions and 75 deletions.
150 changes: 75 additions & 75 deletions tools/src/build-search-index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { ArraySet, MultiTrie } from "@thi.ng/associative";
import {
files,
readJSON,
readText,
writeFile,
writeJSON,
files,
readJSON,
readText,
writeFile,
writeJSON,
} from "@thi.ng/file-io";
// @ts-ignore
import msgpack from "@ygoe/msgpack";
Expand All @@ -23,85 +23,85 @@ type IndexValue = number;
const fileIDs = new Map<string, number>();
const pkgIDs = new Map<string, number>();
const index = new MultiTrie<string, IndexValue>(null, {
vals: () => new ArraySet<IndexValue>(),
vals: () => new ArraySet<IndexValue>(),
});
const ignore = new Set(readJSON("./tools/ignore-words.json", LOGGER));

const encodeConfig = [
[0, 0xff],
[8, 0xfff],
[20, 0xfff],
[0, 0xff],
[8, 0xfff],
[20, 0xfff],
];
const encode = defEncoder(encodeConfig);

let numFiles = 0;
for (let f of files("packages", ".ts")) {
if (f.indexOf("/src/") < 0) continue;
console.log(f);
const path = /packages\/([a-z0-9-]+)\/src\/(.+)/.exec(f)!;
let [, pkg, fname] = path;
fname = fname.substr(0, fname.length - 3);
const pkgId =
pkgIDs.get(pkg) || (pkgIDs.set(pkg, pkgIDs.size), pkgIDs.size - 1);
const fileId = fileIDs.has(fname) ? fileIDs.get(fname)! : fileIDs.size;
const src = readText(f, LOGGER);
let indexed = false;
let isComment = false;
let isCode = false;
let ln = 0;
const knownWords = new Set<string>();
for (let line of src.split("\n")) {
ln++;
if (RE_DOC_START.test(line)) {
isComment = true;
continue;
}
if (RE_DOC_END.test(line)) {
isComment = false;
continue;
}
if (RE_DOC_CODE.test(line)) {
isCode = !isCode;
continue;
}
const sym = RE_SYM.exec(line);
if (sym) {
const word = sym[2].toLowerCase();
if (word.length < 3 || ignore.has(word) || word.startsWith("_"))
continue;
if (!knownWords.has(word)) {
// knownWords.add(word);
index.add(word, encode(pkgId, fileId, ln));
indexed = true;
}
} else if (isComment && !isCode) {
const re = /[@a-z][@a-z0-9_-]{2,}/gi;
let match;
while ((match = re.exec(line))) {
const word = match[0].toLowerCase();
if (ignore.has(word)) continue;
if (!knownWords.has(word)) {
// knownWords.add(word);
index.add(word, encode(pkgId, fileId, ln));
indexed = true;
}
}
}
}
if (indexed) {
fileIDs.set(fname, fileId);
numFiles++;
}
if (f.indexOf("/src/") < 0 || /\/(dev|export|fixtures)\//.test(f)) continue;
console.log(f);
const path = /packages\/([a-z0-9-]+)\/src\/(.+)/.exec(f)!;
let [, pkg, fname] = path;
fname = fname.substr(0, fname.length - 3);
const pkgId =
pkgIDs.get(pkg) || (pkgIDs.set(pkg, pkgIDs.size), pkgIDs.size - 1);
const fileId = fileIDs.has(fname) ? fileIDs.get(fname)! : fileIDs.size;
const src = readText(f, LOGGER);
let indexed = false;
let isComment = false;
let isCode = false;
let ln = 0;
const knownWords = new Set<string>();
for (let line of src.split("\n")) {
ln++;
if (RE_DOC_START.test(line)) {
isComment = true;
continue;
}
if (RE_DOC_END.test(line)) {
isComment = false;
continue;
}
if (RE_DOC_CODE.test(line)) {
isCode = !isCode;
continue;
}
const sym = RE_SYM.exec(line);
if (sym) {
const word = sym[2].toLowerCase();
if (word.length < 3 || ignore.has(word) || word.startsWith("_"))
continue;
if (!knownWords.has(word)) {
// knownWords.add(word);
index.add(word, encode(pkgId, fileId, ln));
indexed = true;
}
} else if (isComment && !isCode) {
const re = /[@a-z][@a-z0-9_-]{2,}/gi;
let match;
while ((match = re.exec(line))) {
const word = match[0].toLowerCase();
if (ignore.has(word)) continue;
if (!knownWords.has(word)) {
// knownWords.add(word);
index.add(word, encode(pkgId, fileId, ln));
indexed = true;
}
}
}
}
if (indexed) {
fileIDs.set(fname, fileId);
numFiles++;
}
}

const packed = build(
encodeConfig,
pkgIDs,
fileIDs,
numFiles,
[...index.keys()].length,
[...index.values()].length,
index
encodeConfig,
pkgIDs,
fileIDs,
numFiles,
[...index.keys()].length,
[...index.values()].length,
index
);

writeJSON("assets/search.json", packed, null, 0, LOGGER);
Expand All @@ -111,7 +111,7 @@ execSync("gzip -9 -f assets/search.bin");

console.log("uploading...");
console.log(
execSync(
`aws s3 cp assets/search.bin.gz s3://docs.thi.ng/umbrella/search-index-latest.bin --content-encoding gzip --acl public-read --profile thing-umbrella`
).toString()
execSync(
`aws s3 cp assets/search.bin.gz s3://docs.thi.ng/umbrella/search-index-latest.bin --content-encoding gzip --acl public-read --profile thing-umbrella`
).toString()
);

0 comments on commit 027fd92

Please sign in to comment.