feat: Support forbidden words in dictionaries (#1516)

## Making Words Forbidden There are several ways to mark a word as forbidden: 1. In a custom word list with words beginning with `!`. ``` !forbiddenWord ``` 2. In `words` section of `cspell` configuration: ``` "words": [ "!forbiddenWord", "configstore" ], ``` 3. In `flagWords` section of `cspell` configuration: ``` "flagWords": ["forbiddenWord"] ``` ## Overriding Forbidden words Sometimes it is necessary to allow a word even if it is forbidden. ### In a comment ```js /** * Do not mark `forbiddenWord` as incorrect. * cspell:ignore forbiddenWord */ ``` ### In the `cspell` configuration ```jsonc { "ignoreWords": ["forbiddenWord"] } ```
streetsidesoftware · Aug 14, 2021 · 8d7596b · 8d7596b
1 parent 9f19c81
commit 8d7596b
Show file tree

Hide file tree

Showing 20 changed files with 307 additions and 84 deletions.
diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts
@@ -37,6 +37,7 @@ export interface SpellingDictionary {
     has(word: string, useCompounds: boolean): boolean;
     has(word: string, options: HasOptions): boolean;
     has(word: string, options?: HasOptions): boolean;
+    isForbidden(word: string): boolean;
     suggest(
         word: string,
         numSuggestions?: number,

diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryCollection.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryCollection.ts
@@ -18,6 +18,8 @@ import {
 import { CASE_INSENSITIVE_PREFIX } from 'cspell-trie-lib';
 import { genSequence } from 'gensequence';
 import { getDefaultSettings } from '../Settings';
+import { memorizer } from '../util/Memorizer';
+import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie';
 
 function identityString(w: string): string {
     return w;
@@ -40,7 +42,11 @@ export class SpellingDictionaryCollection implements SpellingDictionary {
 
     public has(word: string, hasOptions?: HasOptions): boolean {
         const options = hasOptionToSearchOption(hasOptions);
-        return !this.wordsToFlag.has(word.toLowerCase()) && isWordInAnyDictionary(this.dictionaries, word, options);
+        return !this.wordsToFlag.has(word.toLowerCase()) && !!isWordInAnyDictionary(this.dictionaries, word, options);
+    }
+
+    public isForbidden(word: string): boolean {
+        return this.wordsToFlag.has(word.toLowerCase()) || !!this._isForbiddenInDict(word);
     }
 
     public suggest(
@@ -95,6 +101,11 @@ export class SpellingDictionaryCollection implements SpellingDictionary {
     public getErrors(): Error[] {
         return this.dictionaries.reduce((errors, dict) => errors.concat(dict.getErrors?.() || []), [] as Error[]);
     }
+
+    private _isForbiddenInDict = memorizer(
+        (word: string) => isWordForbiddenInAnyDictionary(this.dictionaries, word),
+        SpellingDictionaryFromTrie.cachedWordsLimit
+    );
 }
 
 export function createCollection(
@@ -105,8 +116,16 @@ export function createCollection(
     return new SpellingDictionaryCollection(dictionaries, name, wordsToFlag);
 }
 
-export function isWordInAnyDictionary(dicts: SpellingDictionary[], word: string, options: SearchOptions): boolean {
-    return !!genSequence(dicts).first((dict) => dict.has(word, options));
+function isWordInAnyDictionary(
+    dicts: SpellingDictionary[],
+    word: string,
+    options: SearchOptions
+): SpellingDictionary | undefined {
+    return genSequence(dicts).first((dict) => dict.has(word, options));
+}
+
+function isWordForbiddenInAnyDictionary(dicts: SpellingDictionary[], word: string): SpellingDictionary | undefined {
+    return genSequence(dicts).first((dict) => dict.isForbidden(word));
 }
 
 export function createCollectionP(
@@ -116,3 +135,8 @@ export function createCollectionP(
 ): Promise<SpellingDictionaryCollection> {
     return Promise.all(dicts).then((dicts) => new SpellingDictionaryCollection(dicts, name, wordsToFlag));
 }
+
+export const __testing__ = {
+    isWordInAnyDictionary,
+    isWordForbiddenInAnyDictionary,
+};
diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryFromTrie.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryFromTrie.ts
@@ -85,6 +85,11 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
         }
         return false;
     }
+
+    public isForbidden(word: string): boolean {
+        return this.trie.isForbiddenWord(word);
+    }
+
     public suggest(
         word: string,
         numSuggestions?: number,

diff --git a/packages/cspell-lib/src/SpellingDictionary/createSpellingDictionary.ts b/packages/cspell-lib/src/SpellingDictionary/createSpellingDictionary.ts
@@ -24,6 +24,7 @@ export function createFailedToLoadDictionary(error: SpellingDictionaryLoadError)
         source,
         type: 'error',
         has: () => false,
+        isForbidden: () => false,
         suggest: () => [],
         mapWord: (a) => a,
         genSuggestions: () => {

diff --git a/packages/cspell-lib/src/textValidator.test.ts b/packages/cspell-lib/src/textValidator.test.ts
@@ -191,18 +191,24 @@ describe('Validate textValidator functions', () => {
         const words = results.sort((a, b) => a.offset - b.offset).map((r) => r.text);
         expect(words.join(' ')).toBe('Test the line breaks from begin to end eol');
     });
+
+    test.each`
+        text        | ignoreWords   | expected
+        ${'red'}    | ${[]}         | ${[]}
+        ${'color'}  | ${[]}         | ${[ov({ text: 'color', isFound: false })]}
+        ${'colour'} | ${[]}         | ${[ov({ text: 'colour', isFlagged: true })]}
+        ${'colour'} | ${['colour']} | ${[]}
+    `('Validate forbidden words', ({ text, ignoreWords, expected }) => {
+        const dict = getSpellingDictionaryCollectionSync();
+        const result = [
+            ...validateText(text, dict, { ignoreWords, ignoreCase: false, ignoreWordsAreCaseSensitive: false }),
+        ];
+        expect(result).toEqual(expected);
+    });
 });
 
 async function getSpellingDictionaryCollection() {
-    const dicts = await Promise.all([
-        createSpellingDictionary(colors, 'colors', 'test'),
-        createSpellingDictionary(fruit, 'fruit', 'test'),
-        createSpellingDictionary(animals, 'animals', 'test'),
-        createSpellingDictionary(insects, 'insects', 'test'),
-        createSpellingDictionary(words, 'words', 'test', { repMap: [['’', "'"]] }),
-    ]);
-
-    return createCollection(dicts, 'collection');
+    return getSpellingDictionaryCollectionSync();
 }
 
 const colors = [
@@ -262,6 +268,8 @@ const words = [
     "should've",
 ];
 
+const forbiddenWords = ['!colour', '!favour'];
+
 const specialWords = ['Range8', '4wheel', 'db2Admin', 'Amsterdam', 'Berlin', 'Paris'];
 
 const sampleText = `
@@ -270,3 +278,20 @@ const sampleText = `
     The little ant ate the big purple grape.
     The orange tiger ate the whiteberry and the redberry.
 `;
+
+function getSpellingDictionaryCollectionSync() {
+    const dicts = [
+        createSpellingDictionary(colors, 'colors', 'test'),
+        createSpellingDictionary(fruit, 'fruit', 'test'),
+        createSpellingDictionary(animals, 'animals', 'test'),
+        createSpellingDictionary(insects, 'insects', 'test'),
+        createSpellingDictionary(words, 'words', 'test', { repMap: [['’', "'"]] }),
+        createSpellingDictionary(forbiddenWords, 'forbidden-words', 'test'),
+    ];
+
+    return createCollection(dicts, 'collection');
+}
+
+function ov<T>(t: Partial<T>, ...rest: Partial<T>[]): T {
+    return expect.objectContaining(Object.assign({}, t, ...rest));
+}
diff --git a/packages/cspell-lib/src/textValidator.ts b/packages/cspell-lib/src/textValidator.ts
@@ -53,7 +53,7 @@ export function validateText(
     text: string,
     dict: SpellingDictionary,
     options: ValidationOptions
-): Sequence<Text.TextOffset> {
+): Sequence<ValidationResult> {
     const { maxNumberOfProblems = defaultMaxNumberOfProblems, maxDuplicateProblems = defaultMaxDuplicateProblems } =
         options;
 
@@ -109,7 +109,7 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
         caseSensitive,
     });
 
-    function isIgnored(word: string) {
+    function isWordIgnored(word: string) {
         return ignoreDict.has(word, { ignoreCase });
     }
 
@@ -129,18 +129,21 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
     };
 
     function testForFlaggedWord(wo: TextOffset): boolean {
-        return setOfFlagWords.has(wo.text) || setOfFlagWords.has(wo.text.toLowerCase());
+        const text = wo.text;
+        return setOfFlagWords.has(text) || setOfFlagWords.has(text.toLowerCase()) || dict.isForbidden(text);
     }
 
     function checkFlagWords(word: ValidationResult): ValidationResult {
-        const isFlagged = testForFlaggedWord(word);
+        const isIgnored = isWordIgnored(word.text);
+        const isFlagged = !isIgnored && testForFlaggedWord(word);
         word.isFlagged = isFlagged;
         return word;
     }
 
     function checkWord(word: ValidationResult, options: HasWordOptions): ValidationResult {
-        const isFlagged = testForFlaggedWord(word);
-        const isFound = isFlagged ? undefined : isWordValid(dict, word, word.line, options);
+        const isIgnored = isWordIgnored(word.text);
+        const { isFlagged = !isIgnored && testForFlaggedWord(word) } = word;
+        const isFound = isFlagged ? undefined : isIgnored || isWordValid(dict, word, word.line, options);
         return { ...word, isFlagged, isFound };
     }
 
@@ -167,7 +170,6 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
                 })
                 .map((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions)))
                 .filter(rememberFilter((wo) => wo.isFlagged || !wo.isFound))
-                .filter(rememberFilter((wo) => !isIgnored(wo.text)))
                 .filter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))) // Filter out any repeated characters like xxxxxxxxxx
                 // get back the original text.
                 .map((wo) => ({
@@ -176,7 +178,7 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
                 }))
                 .toArray();
 
-            if (!codeWordResults.length || isIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) {
+            if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) {
                 rememberFilter((_) => false)(vr);
                 return [];
             }

diff --git a/packages/cspell-lib/src/trace.ts b/packages/cspell-lib/src/trace.ts
@@ -7,6 +7,7 @@ import { genSequence } from 'gensequence';
 export interface TraceResult {
     word: string;
     found: boolean;
+    forbidden: boolean;
     dictName: string;
     dictSource: string;
     configSource: string;
@@ -50,6 +51,7 @@ export async function traceWords(words: string[], settings: CSpellSettings): Pro
             return dicts.dictionaries.map((dict) => ({
                 word,
                 found: dict.has(word),
+                forbidden: dict.isForbidden(word),
                 dictName: dict.name,
                 dictSource: dict.source,
                 configSource: config.name || '',

diff --git a/packages/cspell-lib/src/util/Memorizer.test.ts b/packages/cspell-lib/src/util/Memorizer.test.ts
@@ -20,25 +20,49 @@ describe('Validate Memorizer', () => {
         fnTest(0, 1, 5);
     });
 
-    test('cache reset', () => {
+    test('cache reset dual cache', () => {
         const counts = new Map<number, number>();
         const fn = (a: number) => {
-            counts.set(a, (counts.get(a) || 0) + 1);
-            return a;
+            const v = (counts.get(a) || 0) + 1;
+            counts.set(a, v);
+            return v;
         };
         const calc = memorizer(fn, 2);
-        const fnTest = (v: number, expected: number, repeat: number) => {
-            for (; repeat > 0; repeat--) {
-                expect(calc(v)).toBe(v);
-                expect(counts.get(v)).toBe(expected);
-            }
-        };
+        expect(calc(5)).toBe(1);
+        expect(calc(5)).toBe(1);
+        expect(calc(6)).toBe(1);
+        expect(calc(0)).toBe(1);
+        expect(calc(0)).toBe(1);
+        expect(calc(5)).toBe(1);
+        expect(calc(6)).toBe(1);
+        expect(calc(0)).toBe(1);
+    });
+});
 
-        fnTest(5, 1, 5);
-        fnTest(6, 1, 5);
-        fnTest(0, 1, 5);
-        fnTest(5, 2, 5);
-        fnTest(6, 2, 5);
-        fnTest(0, 2, 5);
+describe('Validate Memorizer Dual Cache', () => {
+    const counts = new Map<string, number>();
+    const fn = (a: string) => {
+        const v = (counts.get(a) || 0) + 1;
+        counts.set(a, v);
+        return v;
+    };
+    const calc = memorizer(fn, 2);
+
+    test.each`
+        value  | expected
+        ${'a'} | ${1}
+        ${'b'} | ${1}
+        ${'c'} | ${1}
+        ${'b'} | ${1}
+        ${'a'} | ${1}
+        ${'c'} | ${1}
+        ${'d'} | ${1}
+        ${'e'} | ${1}
+        ${'a'} | ${1}
+        ${'b'} | ${2}
+        ${'c'} | ${2}
+    `('cache reset dual cache $value $expected', ({ value, expected }) => {
+        expect(calc(value)).toBe(expected);
+        expect(calc(value)).toBe(expected);
     });
 });
diff --git a/packages/cspell-lib/src/util/Memorizer.ts b/packages/cspell-lib/src/util/Memorizer.ts
@@ -1,22 +1,71 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
 const defaultSize = 50000;
 
-export function memorizer<A0, T>(fn: (arg: A0) => T, size?: number): (arg0: A0) => T;
-export function memorizer<A0, A1, T>(fn: (arg: A0, arg1: A1) => T, size?: number): (arg0: A0, arg1: A1) => T;
-export function memorizer<A0, A1, A2, T>(
-    fn: (arg: A0, arg1: A1, arg2: A2) => T,
-    size?: number
-): (arg0: A0, arg1: A1, arg2: A2) => T;
-export function memorizer<A, T>(fn: (...args: A[]) => T, size: number = defaultSize): (...args: A[]) => T {
-    const cache = new Map<string, T>();
-    return (...args: A[]) => {
-        const key = args.join('>!@[');
-        if (!cache.has(key)) {
-            if (cache.size >= size) {
-                cache.clear();
-            }
-            cache.set(key, fn(...args));
+/** Only types that can be easily turned into strings */
+type P0 = string | number | boolean | RegExp | undefined;
+
+type Primitive = P0 | P0[];
+
+/**
+ * Memorize the result of a function call to be returned on later calls with the same parameters.
+ *
+ * Note: The parameters are converted into a string: `key = args.join('>!@[')`
+ *
+ * For speed, it keeps two caches, L0 and L1. Each cache can contain up to `size` values. But that actual number
+ * of cached values is between `size + 1` and `size * 2`.
+ *
+ * Caches are NOT sorted. Items are added to L0 until it is full. Once it is full, L1 takes over L0's values and L0 is cleared.
+ *
+ * If an item is not found in L0, L1 is checked before calling the `fn` and the resulting value store in L0.
+ *
+ * @param fn - function to be called.
+ * @param size - size of cache
+ */
+export function memorizer<
+    F extends (...args: Primitive[]) => any,
+    Args extends Parameters<F> = Parameters<F>,
+    R extends ReturnType<F> = ReturnType<F>
+>(fn: F, size?: number): (...args: Args) => R {
+    return memorizerKeyBy(fn, (...args: Args) => args.join('>!@['), size);
+}
+
+/**
+ * Memorize the result of a function call to be returned on later calls with the same parameters.
+ *
+ * Note: `keyFn` is use to convert the function parameters into a string to look up in the cache.
+ *
+ * For speed, it keeps two caches, L0 and L1. Each cache can contain up to `size` values. But that actual number
+ * of cached values is between `size + 1` and `size * 2`.
+ *
+ * Caches are NOT sorted. Items are added to L0 until it is full. Once it is full, L1 takes over L0's values and L0 is cleared.
+ *
+ * If an item is not found in L0, L1 is checked before calling the `fn` and the resulting value store in L0.
+ *
+ * @param fn - function to be memorized
+ * @param keyFn - extracts a `key` value from the arguments to `fn` to be used as the key to the cache
+ * @param size - size of the cache.
+ * @returns A function
+ */
+export function memorizerKeyBy<
+    F extends (...args: any[]) => any,
+    Args extends Parameters<F> = Parameters<F>,
+    R extends ReturnType<F> = ReturnType<F>
+>(fn: F, keyFn: (...args: Args) => string, size: number = defaultSize): (...args: Args) => R {
+    let count = 0;
+    let cacheL0: Record<string, R> = Object.create(null);
+    let cacheL1: Record<string, R> = Object.create(null);
+    return (...args: Args) => {
+        const key = keyFn(...args);
+        if (key in cacheL0) return cacheL0[key];
+
+        const v = key in cacheL1 ? cacheL1[key] : fn(...args);
+        if (count >= size) {
+            cacheL1 = cacheL0;
+            cacheL0 = Object.create(null);
+            count = 0;
         }
-        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-        return cache.get(key)!;
+        cacheL0[key] = v;
+        ++count;
+        return v;
     };
 }