import type { Scored, ScoreMetadata } from '@portfolio/chat-contract';
const DEFAULT_RECENCY_LAMBDA = 0.2; const DEFAULT_TEXT_SCORE_WEIGHT = 0.3; const DEFAULT_SEMANTIC_SCORE_WEIGHT = 0.5; const NEUTRAL_RECENCY_SCORE = 0.5; const MONTH_IN_MS = 1000 * 60 * 60 * 24 * 30; const MAX_RECENCY_MONTHS = 60;
export type SearchWeights = { textWeight?: number; semanticWeight?: number; recencyLambda?: number; };
const clampMonths = (value: number): number => { if (!Number.isFinite(value) || value <= 0) { return 0; } if (value > MAX_RECENCY_MONTHS) { return MAX_RECENCY_MONTHS; } return value; };
const normalizeWeight = (value?: number): number | undefined => { if (typeof value !== 'number' || !Number.isFinite(value)) { return undefined; } if (value < 0) { return undefined; } return value; };
const computeRecencyScore = (timestamp: number | null | undefined, now: number): number => { if (typeof timestamp !== 'number' || !Number.isFinite(timestamp)) { return NEUTRAL_RECENCY_SCORE; } const delta = now - timestamp; if (!Number.isFinite(delta)) { return NEUTRAL_RECENCY_SCORE; } if (delta <= 0) { return 1; } const monthsOld = clampMonths(delta / MONTH_IN_MS); if (monthsOld >= MAX_RECENCY_MONTHS) { return 0; } return Math.max(0, 1 - monthsOld / MAX_RECENCY_MONTHS); };
export type SearchContext = { filters: TFilters; hasStructuredFilters: boolean; combinedTextQuery: string; textScoreMap: Map<string, number>; semanticScoreMap: Map<string, number>; };
export type SearchSpec<TRecord, TInput, TFilters, TResult> = { normalizeInput(input: TInput): TFilters; hasStructuredFilters(filters: TFilters): boolean; recordMatches(record: TRecord, filters: TFilters): boolean; computeStructuredScore(record: TRecord, filters: TFilters): number; buildCombinedTextQuery(filters: TFilters): string; buildSemanticQuery(input: TInput, filters: TFilters): string; getId(record: TRecord): string; buildResult(record: TRecord): TResult; hasQueryTerms?(filters: TFilters, context: SearchContext): boolean; describeFilters?(filters: TFilters): Record<string, unknown>; getRecencyTimestamp?(record: TRecord, context: { now: number }): number | null; recencyLambda?: number; };
type SearchIndexEntry = { record: TRecord; score: number; };
type SemanticScorer = (records: readonly TRecord[], query: string) => Promise<Map<string, number>>;
export type SearchLogPayload = { filters: TFilters; filterDescription?: Record<string, unknown>; limit: number; structuredCandidates: number; matchedCount: number; expandedCandidates: number; usedSemantic: boolean; topScore?: number; topRawScore?: number; normalizationFactor?: number; recencyLambda?: number; freshestTimestamp?: number | null; topRecencyScore?: number; rawTextMatches?: number; scoredCandidates?: number; candidateCount?: number; };
export type SearcherOptions<TRecord, TInput, TFilters> = { searchIndex?: (query: string) => Promise<SearchIndexEntry[]>; semanticRanker?: SemanticScorer | null; logger?: (payload: SearchLogPayload) => void; defaultLimit?: number; minLimit?: number; maxLimit?: number; getLimit?: (input: TInput) => number | null | undefined; getNow?: () => number; weights?: SearchWeights; };
export function createSearcher<TRecord, TInput, TFilters, TResult>(config: { records: readonly TRecord[]; spec: SearchSpec<TRecord, TInput, TFilters, TResult>; options?: SearcherOptions<TRecord, TInput, TFilters>; }) { const { records, spec } = config; const options = config.options ?? {}; const defaultLimit = options.defaultLimit ?? 5; const minLimit = options.minLimit ?? 1; const maxLimit = options.maxLimit ?? 10; const searchIndex = options.searchIndex; const semanticRanker = options.semanticRanker; const logger = options.logger; const getLimit = options.getLimit; const getNow = options.getNow ?? Date.now; const textWeightOverride = normalizeWeight(options.weights?.textWeight); const semanticWeightOverride = normalizeWeight(options.weights?.semanticWeight); const recencyLambdaOverride = normalizeWeight(options.weights?.recencyLambda); const textScoreWeight = textWeightOverride ?? DEFAULT_TEXT_SCORE_WEIGHT; const semanticScoreWeight = semanticWeightOverride ?? DEFAULT_SEMANTIC_SCORE_WEIGHT; const resolvedRecencyLambda = recencyLambdaOverride ?? (typeof spec.recencyLambda === 'number' ? spec.recencyLambda : DEFAULT_RECENCY_LAMBDA);
const recordById = new Map<string, TRecord>(); const recordOrder = new Map<string, number>(); for (const record of records) { recordById.set(spec.getId(record), record); recordOrder.set(spec.getId(record), recordOrder.size); }
const resolveRecord = (record: TRecord): TRecord => { const id = spec.getId(record); const existing = recordById.get(id); if (existing) { return existing; } recordById.set(id, record); return record; };
const clampLimit = (value: number): number => { if (!Number.isFinite(value)) { return defaultLimit; } const normalized = Math.floor(value); if (normalized < minLimit) { return minLimit; } if (normalized > maxLimit) { return maxLimit; } return normalized; };
const emitLog = (filters: TFilters, payload: Omit<SearchLogPayload, 'filters' | 'filterDescription'>) => { if (!logger) { return; } const filterDescription = spec.describeFilters?.(filters); logger({ ...payload, filters, filterDescription, }); };
async function search(input: TInput): Promise<Scored[]> { const filters = spec.normalizeInput(input); const requestedLimit = getLimit?.(input) ?? undefined; const limit = clampLimit(requestedLimit ?? defaultLimit);
const hasStructuredFilters = spec.hasStructuredFilters(filters);
const recordMatchesFilters = (record: TRecord) => !hasStructuredFilters || spec.recordMatches(record, filters);
const structuredMatches = records.filter(recordMatchesFilters);
const candidateMap = new Map<string, TRecord>();
for (const record of structuredMatches) {
candidateMap.set(spec.getId(record), record);
}
const combinedTextQuery = spec.buildCombinedTextQuery(filters).trim();
const textMatches: SearchIndexEntry<TRecord>[] =
combinedTextQuery && searchIndex ? await searchIndex(combinedTextQuery) : [];
const textMatchCount = textMatches.length;
const textScoreMap = new Map<string, number>();
textMatches.forEach(({ record, score }, index) => {
const resolvedRecord = resolveRecord(record);
const id = spec.getId(resolvedRecord);
const tieBreaker = textMatches.length ? (textMatches.length - index) * 0.01 : 0;
textScoreMap.set(id, score + tieBreaker);
});
const needsCandidateExpansion = hasStructuredFilters && candidateMap.size < limit;
const semanticQuery = spec.buildSemanticQuery(input, filters).trim();
let semanticScoreMap = new Map<string, number>();
if (semanticRanker && semanticQuery) {
const targets = needsCandidateExpansion ? records : structuredMatches;
if (targets.length) {
semanticScoreMap = await semanticRanker(targets, semanticQuery);
}
}
if (needsCandidateExpansion) {
const maxExpanded = Math.max(limit * 3, 10);
for (const { record } of textMatches) {
const resolvedRecord = resolveRecord(record);
if (!spec.recordMatches(resolvedRecord, filters)) {
continue;
}
const id = spec.getId(resolvedRecord);
if (!candidateMap.has(id)) {
candidateMap.set(id, resolvedRecord);
}
if (candidateMap.size >= maxExpanded) {
break;
}
}
if (semanticScoreMap.size > 0) {
const rankedBySemantic = Array.from(semanticScoreMap.entries()).sort((a, b) => b[1] - a[1]);
for (const [recordId] of rankedBySemantic) {
const record = recordById.get(recordId);
if (!record) {
continue;
}
if (!spec.recordMatches(record, filters)) {
continue;
}
if (!candidateMap.has(recordId)) {
candidateMap.set(recordId, record);
}
if (candidateMap.size >= maxExpanded) {
break;
}
}
}
}
const candidateRecords = Array.from(candidateMap.values());
if (!candidateRecords.length) {
emitLog(filters, {
limit,
structuredCandidates: structuredMatches.length,
matchedCount: 0,
expandedCandidates: candidateRecords.length,
usedSemantic: semanticScoreMap.size > 0,
rawTextMatches: textMatchCount,
candidateCount: candidateRecords.length,
scoredCandidates: 0,
});
return [];
}
const queryContext: SearchContext<TFilters> = {
filters,
hasStructuredFilters,
combinedTextQuery,
textScoreMap,
semanticScoreMap,
};
const hasQueryTerms =
spec.hasQueryTerms?.(filters, queryContext) ?? (hasStructuredFilters || Boolean(combinedTextQuery));
const requireSignals =
Boolean(combinedTextQuery) && (textScoreMap.size > 0 || semanticScoreMap.size > 0 || hasStructuredFilters);
const now = getNow();
const recencyAccessor = spec.getRecencyTimestamp;
const recencyEnabled = typeof recencyAccessor === 'function' && resolvedRecencyLambda > 0;
let freshestTimestamp: number | null = null;
let topRecencyContribution = 0;
const scoredAll = candidateRecords
.map((record) => {
const id = spec.getId(record);
const structuredScore = spec.computeStructuredScore(record, filters);
const textScore = textScoreMap.get(id) ?? 0;
const semanticScore = semanticScoreMap.get(id) ?? 0;
const order = recordOrder.get(id) ?? Number.MAX_SAFE_INTEGER;
let recencyContribution = 0;
let recencyTimestamp: number | null = null;
if (recencyEnabled && recencyAccessor) {
const timestamp = recencyAccessor(record, { now });
recencyTimestamp = typeof timestamp === 'number' && Number.isFinite(timestamp) ? timestamp : null;
const recencyScore = computeRecencyScore(recencyTimestamp, now);
recencyContribution = recencyScore * resolvedRecencyLambda;
if (recencyTimestamp !== null) {
if (freshestTimestamp === null || recencyTimestamp > freshestTimestamp) {
freshestTimestamp = recencyTimestamp;
}
}
if (recencyContribution > topRecencyContribution) {
topRecencyContribution = recencyContribution;
}
}
const weightedTextScore = textScore * textScoreWeight;
const weightedSemanticScore = semanticScore * semanticScoreWeight;
const baseScore = structuredScore + weightedTextScore + weightedSemanticScore;
const sortScore = baseScore + recencyContribution;
return {
record,
baseScore,
structuredScore,
textScore: weightedTextScore,
semanticScore: weightedSemanticScore,
recencyContribution,
sortScore,
order,
};
})
.filter((entry) => {
if (!hasQueryTerms) {
return true;
}
if (!requireSignals) {
return entry.structuredScore > 0 || entry.textScore > 0 || entry.semanticScore > 0;
}
if (entry.structuredScore > 0) {
return true;
}
return entry.textScore > 0 || entry.semanticScore > 0;
})
.sort((a, b) => {
if (b.sortScore !== a.sortScore) {
return b.sortScore - a.sortScore;
}
if (b.baseScore !== a.baseScore) {
return b.baseScore - a.baseScore;
}
return a.order - b.order;
});
const maxSortScore = scoredAll.reduce((max, entry) => Math.max(max, entry.sortScore), 0);
const normalizationFactor = maxSortScore > 0 ? maxSortScore : 1;
const scored = scoredAll
.map((entry) => ({
...entry,
normalizedScore: entry.sortScore > 0 ? entry.sortScore / normalizationFactor : 0,
}))
.slice(0, limit);
const results = scored.map(
({ record, structuredScore, textScore, semanticScore, recencyContribution, normalizedScore }) => {
const baseResult = spec.buildResult(record);
const metadata: ScoreMetadata = {
_score: normalizedScore,
_signals: {
structured: structuredScore || undefined,
text: textScore || undefined,
semantic: semanticScore || undefined,
recency: recencyContribution || undefined,
},
};
return { ...baseResult, ...metadata } as Scored<TResult>;
}
);
emitLog(filters, {
limit,
structuredCandidates: structuredMatches.length,
matchedCount: results.length,
expandedCandidates: candidateRecords.length,
usedSemantic: semanticScoreMap.size > 0,
topScore: scored.length ? scored[0]?.normalizedScore ?? 0 : 0,
topRawScore: scoredAll.length ? maxSortScore : undefined,
normalizationFactor: normalizationFactor || undefined,
recencyLambda: recencyEnabled ? resolvedRecencyLambda : undefined,
freshestTimestamp: recencyEnabled ? freshestTimestamp : undefined,
topRecencyScore: recencyEnabled ? topRecencyContribution : undefined,
rawTextMatches: textMatchCount,
candidateCount: candidateRecords.length,
scoredCandidates: scoredAll.length,
});
return results;
}
return { search }; }
