Skip to content

Commit

Permalink
feat: Support forbidden words in dictionaries (#1516)
Browse files Browse the repository at this point in the history
## Making Words Forbidden

There are several ways to mark a word as forbidden:

1. In a custom word list with words beginning with `!`.
    ```
    !forbiddenWord
    ```
2. In `words` section of `cspell` configuration:
    ```
    "words": [
        "!forbiddenWord",
        "configstore"
    ],
    ```
3. In `flagWords` section of `cspell` configuration:
    ```
    "flagWords": ["forbiddenWord"]
    ```

## Overriding Forbidden words
Sometimes it is necessary to allow a word even if it is forbidden.

### In a comment

```js
/**
 * Do not mark `forbiddenWord` as incorrect.
 * cspell:ignore forbiddenWord
 */
```

### In the `cspell` configuration

```jsonc
{
    "ignoreWords": ["forbiddenWord"]
}
```
  • Loading branch information
Jason3S authored Aug 14, 2021
1 parent 9f19c81 commit 8d7596b
Show file tree
Hide file tree
Showing 20 changed files with 307 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export interface SpellingDictionary {
has(word: string, useCompounds: boolean): boolean;
has(word: string, options: HasOptions): boolean;
has(word: string, options?: HasOptions): boolean;
isForbidden(word: string): boolean;
suggest(
word: string,
numSuggestions?: number,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import {
import { CASE_INSENSITIVE_PREFIX } from 'cspell-trie-lib';
import { genSequence } from 'gensequence';
import { getDefaultSettings } from '../Settings';
import { memorizer } from '../util/Memorizer';
import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie';

function identityString(w: string): string {
return w;
Expand All @@ -40,7 +42,11 @@ export class SpellingDictionaryCollection implements SpellingDictionary {

public has(word: string, hasOptions?: HasOptions): boolean {
const options = hasOptionToSearchOption(hasOptions);
return !this.wordsToFlag.has(word.toLowerCase()) && isWordInAnyDictionary(this.dictionaries, word, options);
return !this.wordsToFlag.has(word.toLowerCase()) && !!isWordInAnyDictionary(this.dictionaries, word, options);
}

public isForbidden(word: string): boolean {
return this.wordsToFlag.has(word.toLowerCase()) || !!this._isForbiddenInDict(word);
}

public suggest(
Expand Down Expand Up @@ -95,6 +101,11 @@ export class SpellingDictionaryCollection implements SpellingDictionary {
public getErrors(): Error[] {
return this.dictionaries.reduce((errors, dict) => errors.concat(dict.getErrors?.() || []), [] as Error[]);
}

private _isForbiddenInDict = memorizer(
(word: string) => isWordForbiddenInAnyDictionary(this.dictionaries, word),
SpellingDictionaryFromTrie.cachedWordsLimit
);
}

export function createCollection(
Expand All @@ -105,8 +116,16 @@ export function createCollection(
return new SpellingDictionaryCollection(dictionaries, name, wordsToFlag);
}

export function isWordInAnyDictionary(dicts: SpellingDictionary[], word: string, options: SearchOptions): boolean {
return !!genSequence(dicts).first((dict) => dict.has(word, options));
function isWordInAnyDictionary(
dicts: SpellingDictionary[],
word: string,
options: SearchOptions
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.has(word, options));
}

function isWordForbiddenInAnyDictionary(dicts: SpellingDictionary[], word: string): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.isForbidden(word));
}

export function createCollectionP(
Expand All @@ -116,3 +135,8 @@ export function createCollectionP(
): Promise<SpellingDictionaryCollection> {
return Promise.all(dicts).then((dicts) => new SpellingDictionaryCollection(dicts, name, wordsToFlag));
}

export const __testing__ = {
isWordInAnyDictionary,
isWordForbiddenInAnyDictionary,
};
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
}
return false;
}

public isForbidden(word: string): boolean {
return this.trie.isForbiddenWord(word);
}

public suggest(
word: string,
numSuggestions?: number,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export function createFailedToLoadDictionary(error: SpellingDictionaryLoadError)
source,
type: 'error',
has: () => false,
isForbidden: () => false,
suggest: () => [],
mapWord: (a) => a,
genSuggestions: () => {
Expand Down
43 changes: 34 additions & 9 deletions packages/cspell-lib/src/textValidator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -191,18 +191,24 @@ describe('Validate textValidator functions', () => {
const words = results.sort((a, b) => a.offset - b.offset).map((r) => r.text);
expect(words.join(' ')).toBe('Test the line breaks from begin to end eol');
});

test.each`
text | ignoreWords | expected
${'red'} | ${[]} | ${[]}
${'color'} | ${[]} | ${[ov({ text: 'color', isFound: false })]}
${'colour'} | ${[]} | ${[ov({ text: 'colour', isFlagged: true })]}
${'colour'} | ${['colour']} | ${[]}
`('Validate forbidden words', ({ text, ignoreWords, expected }) => {
const dict = getSpellingDictionaryCollectionSync();
const result = [
...validateText(text, dict, { ignoreWords, ignoreCase: false, ignoreWordsAreCaseSensitive: false }),
];
expect(result).toEqual(expected);
});
});

async function getSpellingDictionaryCollection() {
const dicts = await Promise.all([
createSpellingDictionary(colors, 'colors', 'test'),
createSpellingDictionary(fruit, 'fruit', 'test'),
createSpellingDictionary(animals, 'animals', 'test'),
createSpellingDictionary(insects, 'insects', 'test'),
createSpellingDictionary(words, 'words', 'test', { repMap: [['’', "'"]] }),
]);

return createCollection(dicts, 'collection');
return getSpellingDictionaryCollectionSync();
}

const colors = [
Expand Down Expand Up @@ -262,6 +268,8 @@ const words = [
"should've",
];

const forbiddenWords = ['!colour', '!favour'];

const specialWords = ['Range8', '4wheel', 'db2Admin', 'Amsterdam', 'Berlin', 'Paris'];

const sampleText = `
Expand All @@ -270,3 +278,20 @@ const sampleText = `
The little ant ate the big purple grape.
The orange tiger ate the whiteberry and the redberry.
`;

function getSpellingDictionaryCollectionSync() {
const dicts = [
createSpellingDictionary(colors, 'colors', 'test'),
createSpellingDictionary(fruit, 'fruit', 'test'),
createSpellingDictionary(animals, 'animals', 'test'),
createSpellingDictionary(insects, 'insects', 'test'),
createSpellingDictionary(words, 'words', 'test', { repMap: [['’', "'"]] }),
createSpellingDictionary(forbiddenWords, 'forbidden-words', 'test'),
];

return createCollection(dicts, 'collection');
}

function ov<T>(t: Partial<T>, ...rest: Partial<T>[]): T {
return expect.objectContaining(Object.assign({}, t, ...rest));
}
18 changes: 10 additions & 8 deletions packages/cspell-lib/src/textValidator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export function validateText(
text: string,
dict: SpellingDictionary,
options: ValidationOptions
): Sequence<Text.TextOffset> {
): Sequence<ValidationResult> {
const { maxNumberOfProblems = defaultMaxNumberOfProblems, maxDuplicateProblems = defaultMaxDuplicateProblems } =
options;

Expand Down Expand Up @@ -109,7 +109,7 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
caseSensitive,
});

function isIgnored(word: string) {
function isWordIgnored(word: string) {
return ignoreDict.has(word, { ignoreCase });
}

Expand All @@ -129,18 +129,21 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
};

function testForFlaggedWord(wo: TextOffset): boolean {
return setOfFlagWords.has(wo.text) || setOfFlagWords.has(wo.text.toLowerCase());
const text = wo.text;
return setOfFlagWords.has(text) || setOfFlagWords.has(text.toLowerCase()) || dict.isForbidden(text);
}

function checkFlagWords(word: ValidationResult): ValidationResult {
const isFlagged = testForFlaggedWord(word);
const isIgnored = isWordIgnored(word.text);
const isFlagged = !isIgnored && testForFlaggedWord(word);
word.isFlagged = isFlagged;
return word;
}

function checkWord(word: ValidationResult, options: HasWordOptions): ValidationResult {
const isFlagged = testForFlaggedWord(word);
const isFound = isFlagged ? undefined : isWordValid(dict, word, word.line, options);
const isIgnored = isWordIgnored(word.text);
const { isFlagged = !isIgnored && testForFlaggedWord(word) } = word;
const isFound = isFlagged ? undefined : isIgnored || isWordValid(dict, word, word.line, options);
return { ...word, isFlagged, isFound };
}

Expand All @@ -167,7 +170,6 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
})
.map((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions)))
.filter(rememberFilter((wo) => wo.isFlagged || !wo.isFound))
.filter(rememberFilter((wo) => !isIgnored(wo.text)))
.filter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))) // Filter out any repeated characters like xxxxxxxxxx
// get back the original text.
.map((wo) => ({
Expand All @@ -176,7 +178,7 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
}))
.toArray();

if (!codeWordResults.length || isIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) {
if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) {
rememberFilter((_) => false)(vr);
return [];
}
Expand Down
2 changes: 2 additions & 0 deletions packages/cspell-lib/src/trace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { genSequence } from 'gensequence';
export interface TraceResult {
word: string;
found: boolean;
forbidden: boolean;
dictName: string;
dictSource: string;
configSource: string;
Expand Down Expand Up @@ -50,6 +51,7 @@ export async function traceWords(words: string[], settings: CSpellSettings): Pro
return dicts.dictionaries.map((dict) => ({
word,
found: dict.has(word),
forbidden: dict.isForbidden(word),
dictName: dict.name,
dictSource: dict.source,
configSource: config.name || '',
Expand Down
54 changes: 39 additions & 15 deletions packages/cspell-lib/src/util/Memorizer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,49 @@ describe('Validate Memorizer', () => {
fnTest(0, 1, 5);
});

test('cache reset', () => {
test('cache reset dual cache', () => {
const counts = new Map<number, number>();
const fn = (a: number) => {
counts.set(a, (counts.get(a) || 0) + 1);
return a;
const v = (counts.get(a) || 0) + 1;
counts.set(a, v);
return v;
};
const calc = memorizer(fn, 2);
const fnTest = (v: number, expected: number, repeat: number) => {
for (; repeat > 0; repeat--) {
expect(calc(v)).toBe(v);
expect(counts.get(v)).toBe(expected);
}
};
expect(calc(5)).toBe(1);
expect(calc(5)).toBe(1);
expect(calc(6)).toBe(1);
expect(calc(0)).toBe(1);
expect(calc(0)).toBe(1);
expect(calc(5)).toBe(1);
expect(calc(6)).toBe(1);
expect(calc(0)).toBe(1);
});
});

fnTest(5, 1, 5);
fnTest(6, 1, 5);
fnTest(0, 1, 5);
fnTest(5, 2, 5);
fnTest(6, 2, 5);
fnTest(0, 2, 5);
describe('Validate Memorizer Dual Cache', () => {
const counts = new Map<string, number>();
const fn = (a: string) => {
const v = (counts.get(a) || 0) + 1;
counts.set(a, v);
return v;
};
const calc = memorizer(fn, 2);

test.each`
value | expected
${'a'} | ${1}
${'b'} | ${1}
${'c'} | ${1}
${'b'} | ${1}
${'a'} | ${1}
${'c'} | ${1}
${'d'} | ${1}
${'e'} | ${1}
${'a'} | ${1}
${'b'} | ${2}
${'c'} | ${2}
`('cache reset dual cache $value $expected', ({ value, expected }) => {
expect(calc(value)).toBe(expected);
expect(calc(value)).toBe(expected);
});
});
83 changes: 66 additions & 17 deletions packages/cspell-lib/src/util/Memorizer.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,71 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
const defaultSize = 50000;

export function memorizer<A0, T>(fn: (arg: A0) => T, size?: number): (arg0: A0) => T;
export function memorizer<A0, A1, T>(fn: (arg: A0, arg1: A1) => T, size?: number): (arg0: A0, arg1: A1) => T;
export function memorizer<A0, A1, A2, T>(
fn: (arg: A0, arg1: A1, arg2: A2) => T,
size?: number
): (arg0: A0, arg1: A1, arg2: A2) => T;
export function memorizer<A, T>(fn: (...args: A[]) => T, size: number = defaultSize): (...args: A[]) => T {
const cache = new Map<string, T>();
return (...args: A[]) => {
const key = args.join('>!@[');
if (!cache.has(key)) {
if (cache.size >= size) {
cache.clear();
}
cache.set(key, fn(...args));
/** Only types that can be easily turned into strings */
type P0 = string | number | boolean | RegExp | undefined;

type Primitive = P0 | P0[];

/**
* Memorize the result of a function call to be returned on later calls with the same parameters.
*
* Note: The parameters are converted into a string: `key = args.join('>!@[')`
*
* For speed, it keeps two caches, L0 and L1. Each cache can contain up to `size` values. But that actual number
* of cached values is between `size + 1` and `size * 2`.
*
* Caches are NOT sorted. Items are added to L0 until it is full. Once it is full, L1 takes over L0's values and L0 is cleared.
*
* If an item is not found in L0, L1 is checked before calling the `fn` and the resulting value store in L0.
*
* @param fn - function to be called.
* @param size - size of cache
*/
export function memorizer<
F extends (...args: Primitive[]) => any,
Args extends Parameters<F> = Parameters<F>,
R extends ReturnType<F> = ReturnType<F>
>(fn: F, size?: number): (...args: Args) => R {
return memorizerKeyBy(fn, (...args: Args) => args.join('>!@['), size);
}

/**
* Memorize the result of a function call to be returned on later calls with the same parameters.
*
* Note: `keyFn` is use to convert the function parameters into a string to look up in the cache.
*
* For speed, it keeps two caches, L0 and L1. Each cache can contain up to `size` values. But that actual number
* of cached values is between `size + 1` and `size * 2`.
*
* Caches are NOT sorted. Items are added to L0 until it is full. Once it is full, L1 takes over L0's values and L0 is cleared.
*
* If an item is not found in L0, L1 is checked before calling the `fn` and the resulting value store in L0.
*
* @param fn - function to be memorized
* @param keyFn - extracts a `key` value from the arguments to `fn` to be used as the key to the cache
* @param size - size of the cache.
* @returns A function
*/
export function memorizerKeyBy<
F extends (...args: any[]) => any,
Args extends Parameters<F> = Parameters<F>,
R extends ReturnType<F> = ReturnType<F>
>(fn: F, keyFn: (...args: Args) => string, size: number = defaultSize): (...args: Args) => R {
let count = 0;
let cacheL0: Record<string, R> = Object.create(null);
let cacheL1: Record<string, R> = Object.create(null);
return (...args: Args) => {
const key = keyFn(...args);
if (key in cacheL0) return cacheL0[key];

const v = key in cacheL1 ? cacheL1[key] : fn(...args);
if (count >= size) {
cacheL1 = cacheL0;
cacheL0 = Object.create(null);
count = 0;
}
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return cache.get(key)!;
cacheL0[key] = v;
++count;
return v;
};
}
Loading

0 comments on commit 8d7596b

Please sign in to comment.