import { tokenizeDistinct } from './tokenize.mjs';
export function scoreChunk(queryTokens, chunkTokens, options = {}){
if(queryTokens.length===0) return 0;
const qs = new Set(queryTokens);
let match = 0;
let exactMatches = 0;
let partialMatches = 0;
// Enhanced scoring with exact and partial matches
for(const t of chunkTokens) {
if(qs.has(t)) {
match++;
exactMatches++;
} else {
// Check for partial matches (contains or contained)
for(const qt of queryTokens) {
if(t.length > 3 && qt.length > 3 &&
(t.includes(qt) || qt.includes(t))) {
partialMatches++;
break;
}
}
}
}
// Base score: exact matches weighted higher
let baseScore = match / qs.size;
// Boost for partial matches (lower weight)
const partialBoost = (partialMatches * 0.3) / qs.size;
// Boost for question intent matching (if provided)
let intentBoost = 0;
if (options.intentKeywords) {
const chunkText = chunkTokens.join(' ').toLowerCase();
for (const keyword of options.intentKeywords) {
if (chunkText.includes(keyword.toLowerCase())) {
intentBoost += 0.1;
}
}
}
return Math.min(1.0, baseScore + partialBoost + intentBoost);
}
export function rank(query, chunks, options = {}){
const qTokens = tokenizeDistinct(query);
// Extract intent keywords for enhanced scoring
const intentKeywords = options.intentKeywords || [];
const fastMode = options.fastMode || false;
// Performance optimization: early exit for high-score matches in fast mode
let results = [];
const scoreThreshold = fastMode ? 0.8 : 0.0; // Higher threshold for fast mode
for (let i = 0; i < chunks.length; i++) {
const c = chunks[i];
const cTokens = tokenizeDistinct(c.text);
const score = scoreChunk(qTokens, cTokens, { intentKeywords });
// Enhanced ranking factors
let finalScore = score;
// Skip expensive calculations in fast mode for low scores
if (fastMode && score < 0.3) {
continue;
}
// Boost recent content (if timestamp available) - skip in fast mode for performance
if (!fastMode && c.metadata && c.metadata.updated) {
const age = Date.now() - new Date(c.metadata.updated).getTime();
const daysSinceUpdate = age / (1000 * 60 * 60 * 24);
if (daysSinceUpdate < 30) {
finalScore *= 1.1; // 10% boost for content updated within 30 days
}
}
// Boost heading matches more than body text
if (c.heading && intentKeywords.some(k =>
c.heading.toLowerCase().includes(k.toLowerCase()))) {
finalScore *= 1.15;
}
results.push({
index: i,
score: finalScore,
depth: (c.headingPath.split('>').length),
...c
});
// Early termination in fast mode if we have enough high-quality results
if (fastMode && results.length >= 20 && finalScore > scoreThreshold) {
break;
}
}
return results.sort((a,b)=> {
// Primary sort by score, secondary by depth (shallower headings preferred)
if (Math.abs(b.score - a.score) > 0.01) {
return b.score - a.score;
}
return a.depth - b.depth;
});
}