Skip to content

Commit

Permalink
Fixed a bug where generation would mutate the start variable in some …
Browse files Browse the repository at this point in the history
…circumstances. Added Analyze static and instance methods to Markov Chain and related unit tests. Removed unreachable code paths in Markov functions.
  • Loading branch information
abrisene committed Sep 8, 2021
1 parent 426bc5b commit f8ca96d
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 9 deletions.
74 changes: 74 additions & 0 deletions src/__tests__/markov.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,42 @@ describe('Markov Chain', () => {
expect(genF0).toEqual(optF0.start);
expect(genF1).toEqual([]);
});
it('can analyze the sources and sinks of a sequence', () => {
const eng = engine.clone();

// Default
const a1 = MarkovChain.analyze({ model: dtoC2, engine: eng });
expect(a1).toHaveProperty('sequence');
expect(a1).toHaveProperty('sources');
expect(a1).toHaveProperty('sinks');
expect(a1.sequence).toEqual([dtoC2.startDelimiter]);
expect(a1.sources).toEqual({ undefined: 1 });
Object.values(a1.sinks).forEach(v => {
expect(v).toBeCloseTo(0.5, 1);
});

// Samples & Un-Normalized
const a2 = MarkovChain.analyze({ model: dtoC2, engine: eng, samples: 500, normalize: false });
expect(a2.sequence).toEqual([dtoC2.startDelimiter]);
expect(a2.sources).toEqual({ undefined: 500 });
expect(Object.values(a2.sinks).reduce((a, b) => a + b)).toEqual(500);

// Starting Values
const a3 = MarkovChain.analyze({ model: dtoC2, engine: eng, start: ['+'] });
expect(a3.sequence).toEqual(['+']);
Object.values(a3.sources).forEach(v => {
expect(v).toBeCloseTo(0.5, 1);
});
Object.values(a3.sinks).forEach(v => {
expect(v).toBeCloseTo(0.5, 1);
});

// These are covered in Generation since they're just passed through.
const a4 = MarkovChain.analyze({ model: dtoC2, engine: eng, min: 1, max: 1, order: 2, strict: true });
expect(a4).toHaveProperty('sequence');
expect(a4).toHaveProperty('sources');
expect(a4).toHaveProperty('sinks');
});
it('are immutable', () => {
const mOriginal = MarkovChain.clone(dtoA3);
const mClone = MarkovChain.clone(mOriginal);
Expand Down Expand Up @@ -756,9 +792,11 @@ describe('Markov Chain', () => {
expect([gC1[2], gC2[2]]).toContain(pickNext2);

// Last
const pickDLast = mB1.pick(undefined, false);
const pickSLast = mB1.pick([gB1[1]], false);
const pickLast = mB1.last([gB1[1]]);
const pickLast2 = mC2.last(['+']);
expect(pickDLast).toEqual(gB1[2]);
expect(pickSLast).toEqual(gB1[0]);
expect(pickLast).toEqual(pickSLast);
expect([gC1[0], gC2[0]]).toContain(pickLast2);
Expand Down Expand Up @@ -839,5 +877,41 @@ describe('Markov Chain', () => {
expect(genF0).toEqual(optF0.start);
expect(genF1).toEqual([]);
});
it('can analyze the sources and sinks of a sequence', () => {
const mc = new MarkovChain({ ...dtoC2, seed: engine.seed });

// Default
const a1 = mc.analyze({});
expect(a1).toHaveProperty('sequence');
expect(a1).toHaveProperty('sources');
expect(a1).toHaveProperty('sinks');
expect(a1.sequence).toEqual([mc.startDelimiter]);
expect(a1.sources).toEqual({ undefined: 1 });
Object.values(a1.sinks).forEach(v => {
expect(v).toBeCloseTo(0.5, 1);
});

// Samples & Un-Normalized
const a2 = mc.analyze({ samples: 500, normalize: false });
expect(a2.sequence).toEqual([dtoC2.startDelimiter]);
expect(a2.sources).toEqual({ undefined: 500 });
expect(Object.values(a2.sinks).reduce((a, b) => a + b)).toEqual(500);

// Starting Values
const a3 = mc.analyze({ start: ['+'] });
expect(a3.sequence).toEqual(['+']);
Object.values(a3.sources).forEach(v => {
expect(v).toBeCloseTo(0.5, 1);
});
Object.values(a3.sinks).forEach(v => {
expect(v).toBeCloseTo(0.5, 1);
});

// These are covered in Generation since they're just passed through.
const a4 = mc.analyze({ min: 1, max: 1, order: 2, strict: true });
expect(a4).toHaveProperty('sequence');
expect(a4).toHaveProperty('sources');
expect(a4).toHaveProperty('sinks');
});
});
});
141 changes: 132 additions & 9 deletions src/structures/markov.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ TODO:
# Module Dependencies
*/

import { normalizeObject } from 'scalr';
import { Random, RandomDTO } from '../services';
import { Distribution, DistributionSourceDTO } from './distribution';
import { CONSTANTS } from '..';
Expand Down Expand Up @@ -99,6 +100,19 @@ export interface MCGeneratorStaticOptions extends MCGeneratorOptions {
engine?: Random;
}

export interface MCAnalyzeOptions extends MCGeneratorOptions {
samples?: number;
normalize?: boolean;
}

export interface MCAnalyzeStaticOptions extends MCAnalyzeOptions, MCGeneratorStaticOptions {}

export interface MCAnalysis {
sequence: string[];
sources: { [key: string]: number };
sinks: { [key: string]: number };
}

/**
# Constants
*/
Expand All @@ -124,6 +138,12 @@ const defaultGenOptions = {
trim: true,
};

const defaultAnalyzeOptions = {
...defaultGenOptions,
samples: 1000,
normalize: true,
};

/**
# Utility Functions
*/
Expand Down Expand Up @@ -541,6 +561,42 @@ export class MarkovChain {
});
}

/**
* Analyze's a sequences sources and sinks. Generates a number of samples from a given gram sequence, and gives the resulting
* distribution of where the generated sequences terminated both backwards (sources) and forwards (sinks).
* @param start The sequence to start with. If this is not defined, the sequence will start from the beginning or end (as appropriate to the direction).
* @param order The desired order (gram length) for the picks. Higher values will reduce randomness. If this is not defined it will default to the model's max order.
* @param samples The desired number of samples to collect.
* @param min The minimum length of the sequence. This will not prevent early termination if suitable grams or states cannot be found.
* @param max The maximum length of the sequence.
* @param mask A mask containing keys in the chain that should be ignored.
* @param strict If true, order will not be dynamically adjusted to find suitable grams.
* Order will still be adjusted if the starting sequence provided is less than the max order to get up to the preferred order.
*/
public analyze({
start,
order,
samples = defaultAnalyzeOptions.samples,
normalize = true,
min = defaultAnalyzeOptions.min,
max = defaultAnalyzeOptions.max,
mask,
strict = defaultAnalyzeOptions.strict,
}: MCAnalyzeOptions) {
return MarkovChain.analyze({
model: this.dto,
start,
order,
samples,
normalize,
min,
max,
mask,
strict,
engine: this._engine,
});
}

/**
* Serializes a Markov Chain instance into a DTO.
* @param stripSequences If true this will strip out the sequences, removing the chain's source data.
Expand Down Expand Up @@ -701,14 +757,8 @@ export class MarkovChain {
* @param engine A Random engine.
*/
static pickGram(gram: Gram, next = true, mask?: string[], engine?: Random) {
let result;
if (gram !== undefined) {
const distribution = next ? gram.next : gram.last;
if (distribution !== undefined) {
result = Distribution.pickOne(distribution, mask, engine);
}
}
return result;
const distribution = next ? gram.next : gram.last;
return Distribution.pickOne(distribution, mask, engine);
}

/**
Expand Down Expand Up @@ -780,7 +830,7 @@ export class MarkovChain {
// SETUP
// Set the starting sequence and the terminating character.
const dirForward = direction === 'next';
const picks = start || (dirForward ? [model.startDelimiter] : [model.endDelimiter]);
const picks = start !== undefined ? [...start] : dirForward ? [model.startDelimiter] : [model.endDelimiter];
const terminator = dirForward ? model.endDelimiter : model.startDelimiter;

// Determine the order
Expand Down Expand Up @@ -845,6 +895,79 @@ export class MarkovChain {
return trim ? picks.filter(v => ![model.startDelimiter, model.endDelimiter].includes(v)) : picks;
}

/**
* Analyze's a sequences sources and sinks. Generates a number of samples from a given gram sequence, and gives the resulting
* distribution of where the generated sequences terminated both backwards (sources) and forwards (sinks).
* @param model A Markov Chain data transfer object.
* @param start The sequence to start with. If this is not defined, the sequence will start from the beginning or end (as appropriate to the direction).
* @param order The desired order (gram length) for the picks. Higher values will reduce randomness. If this is not defined it will default to the model's max order.
* @param samples The desired number of samples to collect.
* @param min The minimum length of the sequence. This will not prevent early termination if suitable grams or states cannot be found.
* @param max The maximum length of the sequence.
* @param mask A mask containing keys in the chain that should be ignored.
* @param strict If true, order will not be dynamically adjusted to find suitable grams.
* Order will still be adjusted if the starting sequence provided is less than the max order to get up to the preferred order.
* @param engine A Random engine. If one is not provided, a new one will be created for the generation.
*/
static analyze({
model,
start,
order,
samples = defaultAnalyzeOptions.samples,
normalize = true,
min = defaultAnalyzeOptions.min,
max = defaultAnalyzeOptions.max,
mask,
strict = defaultAnalyzeOptions.strict,
engine,
}: MCAnalyzeStaticOptions) {
// Get the starting sequence.
const s = start || [model.startDelimiter];
const results: MCAnalysis = { sequence: s, sources: {}, sinks: {} };

// Sample the sequences.
for (let i = 0; i < samples; i += 1) {
const sink = MarkovChain.generate({
model,
start: s,
order,
min,
max,
direction: 'next',
mask,
strict,
trim: true,
engine,
});

const source = MarkovChain.generate({
model,
start: s,
order,
min,
max,
direction: 'last',
mask,
strict,
trim: true,
engine,
});

const sinkState = sink[sink.length - 1];
const sourceState = source[0];

if (sink !== undefined && results.sinks[sinkState] === undefined) results.sinks[sinkState] = 0;
if (source !== undefined && results.sources[sourceState] === undefined) results.sources[sourceState] = 0;

results.sinks[sinkState] += 1;
results.sources[sourceState] += 1;
}

return normalize
? { sequence: s, sources: normalizeObject(results.sources), sinks: normalizeObject(results.sinks) }
: results;
}

/**
* Creates a new Markov Chain data transfer object.
* @param sequences An optional array of sequences to generate the grams from.
Expand Down

0 comments on commit f8ca96d

Please sign in to comment.