import { randomUUID } from 'crypto'; import { NextRequest } from 'next/server'; import type { LlmClient } from '@portfolio/chat-llm'; import { createChatSseStream, SSE_HEADERS } from './stream'; import { validateChatPostBody, resolveReasoningEnabled, type ChatPostBody } from './validation'; import { moderateChatMessagesForProvider } from './moderation'; import { logChatDebug, resetChatDebugLogs, runWithChatLogContext, type ChatServerLogger } from './server'; import type { ChatApi } from './index'; import type { ChatRuntimeOptions } from '@portfolio/chat-orchestrator'; import type { RuntimeCostClients, RuntimeCostState } from './runtimeCost'; import type { RateLimitResult } from './rateLimit';
type FixtureResponder = (options: { answerModel: string; headers?: HeadersInit }) => Promise | Response;
type RuntimeCostHooks = { getClients: () => Promise<RuntimeCostClients | null>; shouldThrottleForBudget: (clients: RuntimeCostClients, logger?: ChatServerLogger) => Promise; recordRuntimeCost: ( clients: RuntimeCostClients, costUsd: number, logger?: ChatServerLogger ) => Promise; budgetExceededMessage?: string; };
export type NextChatHandlerOptions = { chatApi: ChatApi; chatLogger: ChatServerLogger; chatRuntimeOptions?: ChatRuntimeOptions; getLlmClient: () => Promise; enforceRateLimit?: (request: NextRequest) => Promise; buildRateLimitHeaders?: (result: RateLimitResult) => HeadersInit; shouldServeFixtures?: (headers: Headers) => boolean; buildFixtureResponse?: FixtureResponder; inputModeration?: { enabled?: boolean; model?: string; }; outputModeration?: { enabled?: boolean; model?: string; refusalMessage?: string; refusalBanner?: string; }; runtimeCost?: RuntimeCostHooks; onErrorLog?: (event: string, payload: Record<string, unknown>) => void; };
const DEFAULT_MODERATION_REFUSAL_MESSAGE = 'I can only answer questions about my portfolio and professional background.'; const DEFAULT_MODERATION_REFUSAL_BANNER = 'That request was blocked by my safety filters.'; const DEFAULT_BUDGET_EXCEEDED_MESSAGE = 'Experiencing technical issues, try again later.';
function buildErrorSseResponse({ code, message, retryable, retryAfterMs, headers, status, anchorId, }: { code: string; message: string; retryable: boolean; retryAfterMs?: number; headers?: HeadersInit; status?: number; anchorId?: string; }): Response { const encoder = new TextEncoder(); const stream = new ReadableStream({ start(controller) { controller.enqueue( encoder.encode( `event: error data: ${JSON.stringify({ type: 'error', anchorId, itemId: anchorId, code, message, retryable, retryAfterMs, })}
` ) ); controller.close(); }, });
return new Response(stream, { status: status ?? (retryable ? 429 : 503), headers: { ...SSE_HEADERS, ...(headers ?? {}) }, }); }
export function createNextChatHandler(options: NextChatHandlerOptions) { const answerModel = options.chatRuntimeOptions?.modelConfig?.answerModel; if (!answerModel) { throw new Error('Missing modelConfig.answerModel in chat config.'); } const outputModeration = { enabled: options.outputModeration?.enabled ?? false, model: options.outputModeration?.model, refusalMessage: options.outputModeration?.refusalMessage ?? DEFAULT_MODERATION_REFUSAL_MESSAGE, refusalBanner: options.outputModeration?.refusalBanner ?? DEFAULT_MODERATION_REFUSAL_BANNER, }; const inputModeration = { enabled: options.inputModeration?.enabled ?? false, model: options.inputModeration?.model, }; const budgetExceededMessage = options.runtimeCost?.budgetExceededMessage ?? DEFAULT_BUDGET_EXCEEDED_MESSAGE;
return { async POST(request: NextRequest): Promise { const correlationId = randomUUID(); const body = ((await request.json()) as ChatPostBody) ?? {}; const validation = validateChatPostBody(body); if (!validation.ok) { return new Response(validation.error, { status: validation.status }); } const { messages, responseAnchorId, reasoningEnabled: requestedReasoningEnabled, conversationId, } = validation.value;
return runWithChatLogContext({ correlationId, conversationId }, async () => {
const reasoningEnabled = resolveReasoningEnabled({
requested: requestedReasoningEnabled,
environment: process.env.NODE_ENV,
});
if (process.env.NODE_ENV !== 'production' && messages.length === 1) {
resetChatDebugLogs();
}
const rateLimit = options.enforceRateLimit ? await options.enforceRateLimit(request) : null;
const rateLimitHeaders = rateLimit
? options.buildRateLimitHeaders
? options.buildRateLimitHeaders(rateLimit)
: (rateLimit.headers ?? {})
: {};
if (rateLimit && !rateLimit.success) {
const status =
rateLimit.status ??
(rateLimit.reason === 'Rate limiter unavailable'
? 503
: rateLimit.reason === 'Unable to identify client IP'
? 400
: 429);
const retryAfterMs =
typeof rateLimit.reset === 'number' && Number.isFinite(rateLimit.reset)
? Math.max(0, rateLimit.reset * 1000 - Date.now())
: undefined;
return buildErrorSseResponse({
code: 'rate_limited',
message: rateLimit.reason ?? 'Rate limit exceeded',
retryable: true,
retryAfterMs,
headers: rateLimitHeaders,
status,
anchorId: responseAnchorId,
});
}
const runtimeCostClients = options.runtimeCost ? await options.runtimeCost.getClients() : null;
if (runtimeCostClients && options.runtimeCost) {
try {
const costState = await options.runtimeCost.shouldThrottleForBudget(runtimeCostClients, options.chatLogger);
if (costState.level === 'exceeded') {
return buildErrorSseResponse({
code: 'budget_exceeded',
message: budgetExceededMessage,
retryable: false,
status: 503,
headers: rateLimitHeaders,
anchorId: responseAnchorId,
});
}
} catch (error) {
logChatDebug('api.chat.cost_check_error', { error: String(error), correlationId });
}
}
if (options.shouldServeFixtures && options.buildFixtureResponse) {
const shouldServe = options.shouldServeFixtures(request.headers);
if (shouldServe) {
return options.buildFixtureResponse({ answerModel, headers: rateLimitHeaders });
}
}
try {
const client = await options.getLlmClient();
if (inputModeration.enabled) {
const moderation = await moderateChatMessagesForProvider(client, messages, { model: inputModeration.model });
if (moderation.flagged) {
logChatDebug('api.chat.moderation_blocked', {
categories: moderation.categories ?? [],
correlationId,
});
return Response.json(
{ error: { code: 'input_moderated', message: outputModeration.refusalMessage, retryable: false } },
{ status: 200, headers: rateLimitHeaders }
);
}
}
const stream = createChatSseStream(options.chatApi, client, messages, {
anchorId: responseAnchorId,
runOptions: { reasoningEnabled },
outputModeration: outputModeration.enabled
? {
enabled: outputModeration.enabled,
model: outputModeration.model,
refusalMessage: outputModeration.refusalMessage,
refusalBanner: outputModeration.refusalBanner,
}
: undefined,
runtimeCost:
runtimeCostClients && options.runtimeCost
? {
budgetExceededMessage,
onResult: async (result) => {
const fromUsage = Array.isArray(result.usage)
? result.usage.reduce((acc, entry) => acc + (entry?.costUsd ?? 0), 0)
: 0;
const costUsd = typeof result.totalCostUsd === 'number' ? result.totalCostUsd : fromUsage;
try {
return await options.runtimeCost!.recordRuntimeCost(
runtimeCostClients,
costUsd,
options.chatLogger
);
} catch (error) {
logChatDebug('api.chat.cost_record_error', { error: String(error), correlationId });
return undefined;
}
},
}
: undefined,
onError: (error: unknown) =>
logChatDebug('api.chat.pipeline_error', { error: String(error), correlationId }),
});
return new Response(stream, { headers: { ...SSE_HEADERS, ...rateLimitHeaders } });
} catch (error) {
logChatDebug('api.chat.error', { error: String(error), correlationId });
options.onErrorLog?.('api.chat.error', { error: String(error), correlationId });
return new Response('Chat unavailable', { status: 500 });
}
});
},
}; }
