diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index 8eef460ee0f..a848e58098b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -661,12 +661,16 @@ export async function *getBestMatches< let lowestCostSource = spaceQueue.dequeue(); const newResult = lowestCostSource.handleNextNode(); - spaceQueue.enqueue(lowestCostSource); - spaceQueue = new PriorityQueue(CORRECTION_QUEUE_COMPARATOR, spaceQueue.toArray()); if(newResult.type == 'none') { + // Do not re-add the source if its searchspace is exhausted. return null; - } else if(newResult.type == 'complete') { + } else { + spaceQueue.enqueue(lowestCostSource); + spaceQueue = new PriorityQueue(CORRECTION_QUEUE_COMPARATOR, spaceQueue.toArray()); + } + + if(newResult.type == 'complete') { const mapping = newResult.mapping; return filter(mapping) ? mapping : null; } @@ -683,7 +687,7 @@ export async function *getBestMatches< if(timer.timeSinceLastDefer > STANDARD_TIME_BETWEEN_DEFERS) { await timer.defer(); } - } while(!timer.elapsed && spaceQueue.peek().currentCost < Number.POSITIVE_INFINITY); + } while(!timer.elapsed && spaceQueue.count > 0 && spaceQueue.peek().currentCost < Number.POSITIVE_INFINITY); return null; } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index 171c3d675ff..8fcb2fa49ea 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -12,9 +12,9 @@ import { ContextState, determineContextSlideTransform } from './correction/conte import { ContextTransition } from './correction/context-transition.js'; import { ExecutionTimer } from './correction/execution-timer.js'; import ModelCompositor from './model-compositor.js'; -import { EDIT_DISTANCE_COST_SCALE, getBestTokenMatches } from './correction/distance-modeler.js'; +import { EDIT_DISTANCE_COST_SCALE, getBestMatches } from './correction/distance-modeler.js'; import { TokenizationCorrector } from './correction/tokenization-corrector.js'; -import { TokenizationResultMapping } from './correction/tokenization-result-mapping.js'; +import { TokenizationResult, TokenizationResultMapping } from './correction/tokenization-result-mapping.js'; const searchForProperty = defaultWordbreaker.searchForProperty; @@ -671,30 +671,34 @@ export async function correctAndEnumerate( const tokenizations = transition.final.tokenizations; const searchModules = tokenizations.map(t => t.tail.searchModule); + const preppedTokenizationSearch = prepareTokenizationSearch(transition, tokenizations); + // Only run the correction search when corrections are enabled. let rawPredictions: IntermediateTokenizedPrediction[] = []; let bestCorrectionCost: number; - for await(const match of getBestTokenMatches(searchModules, timer)) { - // Corrections obtained: now to predict from them! - const tokenization = tokenizations.find(t => t.spaceId == match.spaceId); - - // If our 'match' results in fully deleting the new token, reject it and try again. - if(match.matchSequence.length == 0 && match.inputSequence.length != 0) { + for await(const match of getBestMatches(preppedTokenizationSearch, timer)) { + const { totalEditCount, totalEditableCodepoints } = match.matchedResult; + // If our 'match' fully replaces the tokens, reject it and try again. + // + // If the known edit count matches the total length of editable text, reject + // the suggestion source. Q: for any token, or across ALL tokens? If "for + // any", we need to distinguish between penalization where corrections + // couldn't be found and where there's actual edit cost. (2.5 edits does + // stand out a bit, but we should do something more robust.) + if(totalEditCount != 0 && totalEditableCodepoints == totalEditCount) { // TODO: double-check approach! continue; } - // If our 'match' fully replaces the token, reject it and try again. - if(match.matchSequence.length != 0 && match.matchSequence.length == match.knownCost) { + // Perhaps make a return object, add a cumulative 'editCount' property? + // Or, we could just sum it up here. + if(totalEditCount > 0 && !searchModules.find(s => s.correctionsEnabled)) { continue; } - if(match.editCount > 0 && !searchModules.find(s => s.correctionsEnabled)) { - continue; - } + // Worth considering: extend Traversal to allow direct prediction lookups? + // let traversal = match.finalTraversal; - const suggestionRange = determineSuggestionRange(transition.base.displayTokenization, tokenization) - const corrector = new TokenizationCorrector(tokenization, suggestionRange.tokensToPredict.length, () => true); - const predictions = buildAndMapPredictions(transition, new TokenizationResultMapping([match], corrector)); + const predictions = buildAndMapPredictions(transition, match); // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions. if(predictions.length > 0 && bestCorrectionCost === undefined) {