Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -661,12 +661,16 @@ export async function *getBestMatches<

let lowestCostSource = spaceQueue.dequeue();
const newResult = lowestCostSource.handleNextNode();
spaceQueue.enqueue(lowestCostSource);
spaceQueue = new PriorityQueue(CORRECTION_QUEUE_COMPARATOR, spaceQueue.toArray());

if(newResult.type == 'none') {
// Do not re-add the source if its searchspace is exhausted.
return null;
} else if(newResult.type == 'complete') {
} else {
spaceQueue.enqueue(lowestCostSource);
spaceQueue = new PriorityQueue(CORRECTION_QUEUE_COMPARATOR, spaceQueue.toArray());
}

if(newResult.type == 'complete') {
const mapping = newResult.mapping;
return filter(mapping) ? mapping : null;
}
Expand All @@ -683,7 +687,7 @@ export async function *getBestMatches<
if(timer.timeSinceLastDefer > STANDARD_TIME_BETWEEN_DEFERS) {
await timer.defer();
}
} while(!timer.elapsed && spaceQueue.peek().currentCost < Number.POSITIVE_INFINITY);
} while(!timer.elapsed && spaceQueue.count > 0 && spaceQueue.peek().currentCost < Number.POSITIVE_INFINITY);

return null;
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import { ContextState, determineContextSlideTransform } from './correction/conte
import { ContextTransition } from './correction/context-transition.js';
import { ExecutionTimer } from './correction/execution-timer.js';
import ModelCompositor from './model-compositor.js';
import { EDIT_DISTANCE_COST_SCALE, getBestTokenMatches } from './correction/distance-modeler.js';
import { EDIT_DISTANCE_COST_SCALE, getBestMatches } from './correction/distance-modeler.js';
import { TokenizationCorrector } from './correction/tokenization-corrector.js';
import { TokenizationResultMapping } from './correction/tokenization-result-mapping.js';
import { TokenizationResult, TokenizationResultMapping } from './correction/tokenization-result-mapping.js';

const searchForProperty = defaultWordbreaker.searchForProperty;

Expand Down Expand Up @@ -671,30 +671,34 @@ export async function correctAndEnumerate(
const tokenizations = transition.final.tokenizations;
const searchModules = tokenizations.map(t => t.tail.searchModule);

const preppedTokenizationSearch = prepareTokenizationSearch(transition, tokenizations);

// Only run the correction search when corrections are enabled.
let rawPredictions: IntermediateTokenizedPrediction[] = [];
let bestCorrectionCost: number;
for await(const match of getBestTokenMatches(searchModules, timer)) {
// Corrections obtained: now to predict from them!
const tokenization = tokenizations.find(t => t.spaceId == match.spaceId);

// If our 'match' results in fully deleting the new token, reject it and try again.
if(match.matchSequence.length == 0 && match.inputSequence.length != 0) {
for await(const match of getBestMatches<TokenizationResult, TokenizationResultMapping, TokenizationCorrector>(preppedTokenizationSearch, timer)) {
const { totalEditCount, totalEditableCodepoints } = match.matchedResult;
// If our 'match' fully replaces the tokens, reject it and try again.
//
// If the known edit count matches the total length of editable text, reject
// the suggestion source. Q: for any token, or across ALL tokens? If "for
// any", we need to distinguish between penalization where corrections
// couldn't be found and where there's actual edit cost. (2.5 edits does
// stand out a bit, but we should do something more robust.)
if(totalEditCount != 0 && totalEditableCodepoints == totalEditCount) { // TODO: double-check approach!
continue;
}

// If our 'match' fully replaces the token, reject it and try again.
if(match.matchSequence.length != 0 && match.matchSequence.length == match.knownCost) {
// Perhaps make a return object, add a cumulative 'editCount' property?
// Or, we could just sum it up here.
if(totalEditCount > 0 && !searchModules.find(s => s.correctionsEnabled)) {
continue;
}

if(match.editCount > 0 && !searchModules.find(s => s.correctionsEnabled)) {
continue;
}
// Worth considering: extend Traversal to allow direct prediction lookups?
// let traversal = match.finalTraversal;

const suggestionRange = determineSuggestionRange(transition.base.displayTokenization, tokenization)
const corrector = new TokenizationCorrector(tokenization, suggestionRange.tokensToPredict.length, () => true);
const predictions = buildAndMapPredictions(transition, new TokenizationResultMapping([match], corrector));
const predictions = buildAndMapPredictions(transition, match);

// Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions.
if(predictions.length > 0 && bestCorrectionCost === undefined) {
Expand Down
Loading