diff --git a/packages/ai/src/apply/client.ts b/packages/ai/src/apply/client.ts index de613bb784..04f1811084 100644 --- a/packages/ai/src/apply/client.ts +++ b/packages/ai/src/apply/client.ts @@ -1,11 +1,31 @@ import OpenAI from 'openai'; +import type { ApplyCodeChangeAssessment, ApplyCodeChangeGateOptions } from './quality'; +import { assessCodeChange, shouldBlockApply } from './quality'; + export interface ApplyCodeChangeMetadata { userId?: string; projectId?: string; conversationId?: string; } +export interface ApplyCodeChangeQualityOptions { + gate?: ApplyCodeChangeGateOptions; + blockBeforeProvider?: boolean; +} + +export interface ApplyCodeChangeWithQualityResult { + code: string | null; + preflightAssessment: ApplyCodeChangeAssessment; + resultAssessment?: ApplyCodeChangeAssessment; + blocked: boolean; + blockReason?: string; +} + +interface RelaceApplyResponse { + mergedCode?: string | null; +} + const createPrompt = (originalCode: string, updateSnippet: string, instruction: string) => `${instruction}\n${originalCode}\n${updateSnippet}`; @@ -37,7 +57,7 @@ export async function applyCodeChangeWithMorph( }, ], }); - return response.choices[0]?.message.content || null; + return response.choices[0]?.message.content ?? null; } export async function applyCodeChangeWithRelace( @@ -77,8 +97,8 @@ export async function applyCodeChangeWithRelace( if (!response.ok) { throw new Error(`Failed to apply code change: ${response.status}`); } - const result = await response.json(); - return result.mergedCode; + const result = (await response.json()) as RelaceApplyResponse; + return result.mergedCode ?? null; } export async function applyCodeChange( @@ -108,6 +128,8 @@ export async function applyCodeChange( }, ]; + let lastError: unknown = null; + // Run provider attempts in order of preference for (const { provider, applyFn } of providerAttempts) { try { @@ -118,18 +140,52 @@ export async function applyCodeChange( updateSnippet, instruction, ) - : await (applyFn as typeof applyCodeChangeWithRelace)( - originalCode, - updateSnippet, - instruction, - metadata, - ); + : await applyFn(originalCode, updateSnippet, instruction, metadata); if (result) return result; } catch (error) { console.warn(`Code application failed with provider ${provider}:`, error); - throw error; + lastError = error; } } + if (lastError instanceof Error) throw lastError; + if (lastError) throw new Error('Code application failed with a non-error value.'); return null; } + +export async function applyCodeChangeWithQuality( + originalCode: string, + updateSnippet: string, + instruction: string, + metadata?: ApplyCodeChangeMetadata, + preferredProvider: FastApplyProvider = FastApplyProvider.MORPH, + options: ApplyCodeChangeQualityOptions = {}, +): Promise { + const preflightAssessment = assessCodeChange(originalCode, updateSnippet, instruction); + if (options.blockBeforeProvider && shouldBlockApply(preflightAssessment, options.gate)) { + return { + code: null, + preflightAssessment, + blocked: true, + blockReason: + preflightAssessment.blockingConcerns[0] ?? + `Preflight score ${preflightAssessment.score} is below the configured gate.`, + }; + } + + const code = await applyCodeChange( + originalCode, + updateSnippet, + instruction, + metadata, + preferredProvider, + ); + return { + code, + preflightAssessment, + resultAssessment: code + ? assessCodeChange(originalCode, updateSnippet, instruction, code) + : undefined, + blocked: false, + }; +} diff --git a/packages/ai/src/apply/index.ts b/packages/ai/src/apply/index.ts index 4f1cce44fa..807b1e4bf4 100644 --- a/packages/ai/src/apply/index.ts +++ b/packages/ai/src/apply/index.ts @@ -1 +1,2 @@ export * from './client'; +export * from './quality'; diff --git a/packages/ai/src/apply/quality.ts b/packages/ai/src/apply/quality.ts new file mode 100644 index 0000000000..267a5eeae1 --- /dev/null +++ b/packages/ai/src/apply/quality.ts @@ -0,0 +1,449 @@ +export enum ApplyCodeChangeRisk { + LOW = 'low', + MEDIUM = 'medium', + HIGH = 'high', +} + +export interface ApplyCodeChangeSignal { + key: string; + label: string; + score: number; + weight: number; + details?: string; +} + +export interface ApplyCodeChangeStats { + originalLines: number; + updateLines: number; + appliedLines?: number; + editDensity: number; + instructionCoverage: number; + missingOriginalSymbols: string[]; + placeholders: string[]; +} + +export interface ApplyCodeChangeAssessment { + score: number; + confidence: number; + risk: ApplyCodeChangeRisk; + summary: string; + signals: ApplyCodeChangeSignal[]; + blockingConcerns: string[]; + stats: ApplyCodeChangeStats; +} + +export interface ApplyCodeChangeGateOptions { + minimumScore?: number; + blockHighRisk?: boolean; + blockOnMissingSymbols?: boolean; +} + +const DEFAULT_GATE_OPTIONS: Required = { + minimumScore: 55, + blockHighRisk: true, + blockOnMissingSymbols: true, +}; + +const PLACEHOLDER_PATTERNS = [ + /\.\.\.\s*(?:existing|new)?\s*code/gi, + /\b(?:TODO|FIXME|XXX)\b/gi, + /\b(?:your|insert)\s+(?:code|implementation)\s+here\b/gi, + /\/\/\s*other fields\b/gi, +]; + +const RISKY_PATTERNS = [ + { pattern: /\beval\s*\(/g, label: 'uses eval' }, + { pattern: /\bnew\s+Function\s*\(/g, label: 'constructs a Function dynamically' }, + { pattern: /\bdangerouslySetInnerHTML\b/g, label: 'uses dangerouslySetInnerHTML' }, + { pattern: /\bdocument\.write\s*\(/g, label: 'writes directly to document' }, + { pattern: /\blocalStorage\.clear\s*\(/g, label: 'clears local storage' }, +]; + +const STOP_WORDS = new Set([ + 'a', + 'an', + 'and', + 'are', + 'as', + 'be', + 'by', + 'for', + 'from', + 'i', + 'in', + 'is', + 'it', + 'of', + 'on', + 'or', + 'that', + 'the', + 'this', + 'to', + 'with', +]); + +export function assessCodeChange( + originalCode: string, + updateSnippet: string, + instruction: string, + appliedCode?: string | null, +): ApplyCodeChangeAssessment { + const codeToAssess = appliedCode ?? updateSnippet; + const originalLines = countLines(originalCode); + const updateLines = countLines(updateSnippet); + const appliedLines = appliedCode ? countLines(appliedCode) : undefined; + const editDensity = calculateEditDensity(originalCode, codeToAssess); + const instructionCoverage = calculateInstructionCoverage(instruction, codeToAssess); + const missingOriginalSymbols = appliedCode + ? findMissingOriginalSymbols(originalCode, appliedCode) + : []; + const placeholders = newlyIntroduced( + findPlaceholders(originalCode), + findPlaceholders(codeToAssess), + ); + const riskyPatterns = newlyIntroduced( + findRiskyPatterns(originalCode), + findRiskyPatterns(codeToAssess), + ); + const syntaxBalanceScore = scoreSyntaxBalanceRelative(originalCode, codeToAssess); + + const signals: ApplyCodeChangeSignal[] = [ + { + key: 'syntax-balance', + label: 'Syntax balance', + score: syntaxBalanceScore, + weight: 0.24, + details: + syntaxBalanceScore === 100 + ? 'Brackets, braces, and string delimiters look balanced.' + : 'Potentially unbalanced delimiters.', + }, + { + key: 'placeholder-free', + label: 'Placeholder-free output', + score: clampScore(100 - placeholders.length * (appliedCode ? 35 : 18)), + weight: 0.18, + details: placeholders.length + ? `Found ${placeholders.length} placeholder marker(s).` + : 'No obvious placeholders detected.', + }, + { + key: 'symbol-preservation', + label: 'Original symbol preservation', + score: missingOriginalSymbols.length + ? clampScore(100 - missingOriginalSymbols.length * 24) + : 100, + weight: appliedCode ? 0.2 : 0.08, + details: missingOriginalSymbols.length + ? `Missing exported or declared symbol(s): ${missingOriginalSymbols.slice(0, 6).join(', ')}` + : 'No obvious exported or declared symbols were dropped.', + }, + { + key: 'instruction-coverage', + label: 'Instruction coverage', + score: clampScore(Math.round(instructionCoverage * 100)), + weight: 0.14, + details: `${Math.round(instructionCoverage * 100)}% of meaningful instruction tokens appear in the assessed code.`, + }, + { + key: 'edit-density', + label: 'Edit density', + score: scoreEditDensity(editDensity, appliedCode), + weight: 0.14, + details: `${Math.round(editDensity * 100)}% approximate line-level change density.`, + }, + { + key: 'risky-patterns', + label: 'Risky pattern scan', + score: clampScore(100 - riskyPatterns.length * 28), + weight: 0.1, + details: riskyPatterns.length + ? riskyPatterns.join(', ') + : 'No obvious high-risk browser/runtime patterns detected.', + }, + ]; + + const score = weightedScore(signals); + const blockingConcerns = [ + ...placeholders.map((placeholder) => `Placeholder left in generated code: ${placeholder}`), + ...missingOriginalSymbols.map((symbol) => `Possible dropped original symbol: ${symbol}`), + ...riskyPatterns.map((risk) => `Risky generated code pattern: ${risk}`), + ]; + if (syntaxBalanceScore < 70) { + blockingConcerns.unshift('Generated code appears to have unbalanced syntax delimiters.'); + } + + const risk = getRisk(score, blockingConcerns); + return { + score, + confidence: getConfidence( + Boolean(appliedCode), + signals, + originalCode, + updateSnippet, + instruction, + ), + risk, + summary: summarizeAssessment(score, risk, blockingConcerns, Boolean(appliedCode)), + signals, + blockingConcerns, + stats: { + originalLines, + updateLines, + appliedLines, + editDensity, + instructionCoverage, + missingOriginalSymbols, + placeholders, + }, + }; +} + +export function shouldBlockApply( + assessment: ApplyCodeChangeAssessment, + options: ApplyCodeChangeGateOptions = {}, +): boolean { + const gate = { ...DEFAULT_GATE_OPTIONS, ...options }; + const blockingConcerns = gate.blockOnMissingSymbols + ? assessment.blockingConcerns + : assessment.blockingConcerns.filter( + (concern) => !concern.startsWith('Possible dropped original symbol:'), + ); + if (assessment.score < gate.minimumScore) { + return true; + } + if (gate.blockHighRisk && assessment.risk === ApplyCodeChangeRisk.HIGH) { + return assessment.score < 60 || blockingConcerns.length > 0; + } + if (gate.blockHighRisk && blockingConcerns.length > 0) { + return true; + } + if (gate.blockOnMissingSymbols && assessment.stats.missingOriginalSymbols.length > 0) { + return true; + } + return false; +} + +function countLines(value: string): number { + if (!value.trim()) { + return 0; + } + return value.split(/\r?\n/).length; +} + +function calculateEditDensity(originalCode: string, assessedCode: string): number { + const originalLines = normalizeLines(originalCode); + const assessedLines = normalizeLines(assessedCode); + if (!originalLines.length && !assessedLines.length) { + return 0; + } + + const maxLength = Math.max(originalLines.length, assessedLines.length); + let changed = Math.abs(originalLines.length - assessedLines.length); + const sharedLength = Math.min(originalLines.length, assessedLines.length); + for (let i = 0; i < sharedLength; i += 1) { + if (originalLines[i] !== assessedLines[i]) { + changed += 1; + } + } + return roundRatio(changed / maxLength); +} + +function normalizeLines(value: string): string[] { + return value + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean); +} + +function calculateInstructionCoverage(instruction: string, assessedCode: string): number { + const instructionTokens = tokenize(instruction); + if (!instructionTokens.length) { + return 1; + } + const codeTokens = new Set(tokenize(assessedCode)); + const matched = instructionTokens.filter((token) => codeTokens.has(token)); + return roundRatio(matched.length / instructionTokens.length); +} + +function tokenize(value: string): string[] { + const tokens = value.toLowerCase().match(/[a-z][a-z0-9_]{2,}/g) ?? []; + return [...new Set(tokens.filter((token) => !STOP_WORDS.has(token)))]; +} + +function findMissingOriginalSymbols(originalCode: string, appliedCode: string): string[] { + const originalSymbols = extractDeclaredSymbols(originalCode); + const appliedSymbols = extractDeclaredSymbols(appliedCode); + return [...originalSymbols].filter((symbol) => !appliedSymbols.has(symbol)).sort(); +} + +function extractDeclaredSymbols(code: string): Set { + const symbols = new Set(); + if (/^export\s+default\b/gm.test(code)) { + symbols.add('default'); + } + const patterns = [ + /^export\s+(?!default\b)(?:async\s+)?(?:function|class|interface|type|enum|const|let|var)\s+([A-Za-z_$][\w$]*)/gm, + /^export\s*\{\s*([^}]+)\s*\}/gm, + ]; + for (const pattern of patterns) { + for (const match of code.matchAll(pattern)) { + if (!match[1]) continue; + if (pattern.source.includes('\\{')) { + for (const exportName of match[1].split(',')) { + const [localName, aliasName] = exportName + .split(/\s+as\s+/i) + .map((part) => part.trim()); + const symbolName = aliasName ?? localName; + if (symbolName) { + symbols.add(symbolName); + } + } + } else { + symbols.add(match[1]); + } + } + } + return symbols; +} + +function newlyIntroduced(before: string[], after: string[]): string[] { + const beforeSet = new Set(before); + return after.filter((item) => !beforeSet.has(item)); +} + +function findPlaceholders(code: string): string[] { + const placeholders = new Set(); + for (const pattern of PLACEHOLDER_PATTERNS) { + for (const match of code.matchAll(pattern)) { + placeholders.add(match[0].trim()); + } + } + return [...placeholders]; +} + +function findRiskyPatterns(code: string): string[] { + const riskyPatterns = new Set(); + for (const { pattern, label } of RISKY_PATTERNS) { + if (pattern.test(code)) { + riskyPatterns.add(label); + } + pattern.lastIndex = 0; + } + return [...riskyPatterns]; +} + +function scoreSyntaxBalance(code: string): number { + const stripped = stripStringsAndComments(code); + const pairs: Record = { + '(': ')', + '[': ']', + '{': '}', + }; + const closers = new Set(Object.values(pairs)); + const stack: string[] = []; + let mismatches = 0; + + for (const char of stripped) { + if (pairs[char]) { + stack.push(pairs[char]); + } else if (closers.has(char) && stack.pop() !== char) { + mismatches += 1; + } + } + + return clampScore(100 - (stack.length + mismatches) * 18); +} + +function scoreSyntaxBalanceRelative(originalCode: string, assessedCode: string): number { + const originalScore = scoreSyntaxBalance(originalCode); + const assessedScore = scoreSyntaxBalance(assessedCode); + if (assessedScore >= originalScore) { + return 100; + } + return assessedScore; +} + +function stripStringsAndComments(code: string): string { + return code + .replace(/\/\*[\s\S]*?\*\//g, '') + .replace(/\/\/.*$/gm, '') + .replace(/(['"`])(?:\\.|(?!\1)[\s\S])*\1/g, ''); +} + +function scoreEditDensity( + editDensity: number, + hasAppliedCode: string | null | undefined | boolean, +): number { + if (!hasAppliedCode) { + return editDensity > 0.85 ? 55 : 85; + } + if (editDensity <= 0.45) { + return 100; + } + if (editDensity <= 0.7) { + return 78; + } + if (editDensity <= 0.9) { + return 58; + } + return 38; +} + +function weightedScore(signals: ApplyCodeChangeSignal[]): number { + const totalWeight = signals.reduce((sum, signal) => sum + signal.weight, 0); + const score = + signals.reduce((sum, signal) => sum + signal.score * signal.weight, 0) / totalWeight; + return clampScore(Math.round(score)); +} + +function getRisk(score: number, blockingConcerns: string[]): ApplyCodeChangeRisk { + if (score < 60 || blockingConcerns.length >= 3) { + return ApplyCodeChangeRisk.HIGH; + } + if (score < 80 || blockingConcerns.length > 0) { + return ApplyCodeChangeRisk.MEDIUM; + } + return ApplyCodeChangeRisk.LOW; +} + +function getConfidence( + hasAppliedCode: boolean, + signals: ApplyCodeChangeSignal[], + originalCode: string, + updateSnippet: string, + instruction: string, +): number { + const nonEmptyInputs = [originalCode, updateSnippet, instruction].filter( + (value) => value.trim().length > 0, + ).length; + const base = hasAppliedCode ? 0.78 : 0.58; + const inputBonus = nonEmptyInputs * 0.05; + const signalBonus = Math.min(signals.length, 6) * 0.015; + return roundRatio(Math.min(base + inputBonus + signalBonus, 0.92)); +} + +function summarizeAssessment( + score: number, + risk: ApplyCodeChangeRisk, + blockingConcerns: string[], + hasAppliedCode: boolean, +): string { + const stage = hasAppliedCode ? 'Applied code' : 'Preflight snippet'; + if (risk === ApplyCodeChangeRisk.LOW) { + return `${stage} looks clear enough to review with low risk.`; + } + if (blockingConcerns.length) { + return `${stage} needs review before apply. Main concern: ${blockingConcerns[0]}`; + } + return `${stage} scored ${score}/100 and should get a closer look before apply.`; +} + +function clampScore(value: number): number { + return Math.max(0, Math.min(100, value)); +} + +function roundRatio(value: number): number { + return Math.round(value * 1000) / 1000; +} diff --git a/packages/ai/test/apply-quality.test.ts b/packages/ai/test/apply-quality.test.ts new file mode 100644 index 0000000000..f0eb351088 --- /dev/null +++ b/packages/ai/test/apply-quality.test.ts @@ -0,0 +1,198 @@ +import { describe, expect, it } from 'bun:test'; + +import { ApplyCodeChangeRisk, assessCodeChange, shouldBlockApply } from '../src/apply'; + +const originalCode = `export interface User { + id: string; + name: string; +} + +export async function fetchUserData(userId: string): Promise { + const response = await fetch('/api/users/' + userId); + return response.json(); +}`; + +describe('apply code quality assessment', () => { + it('scores a focused applied change as low risk', () => { + const appliedCode = `export interface User { + id: string; + name: string; + email?: string; +} + +export async function fetchUserData(userId: string): Promise { + const response = await fetch('/api/users/' + userId); + if (!response.ok) { + throw new Error('Failed to fetch user: ' + response.status); + } + return response.json(); +}`; + + const assessment = assessCodeChange( + originalCode, + `Add email?: string to User and add fetch response error handling.`, + 'Add email to User and return a typed fetch response with error handling', + appliedCode, + ); + + expect(assessment.score).toBeGreaterThanOrEqual(80); + expect(assessment.risk).toBe(ApplyCodeChangeRisk.LOW); + expect(assessment.blockingConcerns).toEqual([]); + expect(shouldBlockApply(assessment)).toBe(false); + }); + + it('blocks applied code that leaves fast-apply placeholders behind', () => { + const appliedCode = `export interface User { + id: string; + name: string; + // other fields +} + +export async function fetchUserData(userId: string): Promise { + // ... existing code +}`; + + const assessment = assessCodeChange( + originalCode, + 'Add email and error handling', + 'Add email to User and return a typed fetch response with error handling', + appliedCode, + ); + + expect(assessment.risk).not.toBe(ApplyCodeChangeRisk.LOW); + expect(assessment.stats.placeholders).toContain('// other fields'); + expect(assessment.blockingConcerns.some((concern) => concern.includes('Placeholder'))).toBe( + true, + ); + expect(shouldBlockApply(assessment)).toBe(true); + }); + + it('surfaces exported symbols that disappear from the applied result', () => { + const appliedCode = `export interface User { + id: string; + name: string; + email?: string; +}`; + + const assessment = assessCodeChange( + originalCode, + 'Add email to User only.', + 'Add email to User without changing fetchUserData', + appliedCode, + ); + + expect(assessment.stats.missingOriginalSymbols).toContain('fetchUserData'); + expect(assessment.blockingConcerns).toContain( + 'Possible dropped original symbol: fetchUserData', + ); + expect(shouldBlockApply(assessment)).toBe(true); + }); + + it('does not treat default export internal names as stable exported symbols', () => { + const originalDefaultExport = `export default function UserCard() { + return null; +}`; + const appliedDefaultExport = `export default function ProfileCard() { + return null; +}`; + + const assessment = assessCodeChange( + originalDefaultExport, + 'Rename the default component implementation.', + 'Rename the component used by the default export', + appliedDefaultExport, + ); + + expect(assessment.stats.missingOriginalSymbols).toEqual([]); + expect(assessment.blockingConcerns).not.toContain( + 'Possible dropped original symbol: UserCard', + ); + }); + + it('respects disabled missing-symbol blocking when filtering concerns', () => { + const appliedCode = `export interface User { + id: string; + name: string; +}`; + + const assessment = assessCodeChange( + originalCode, + 'Keep only the User interface.', + 'Remove fetchUserData for an exploratory edit', + appliedCode, + ); + + expect(assessment.stats.missingOriginalSymbols).toContain('fetchUserData'); + expect( + shouldBlockApply(assessment, { + minimumScore: 0, + blockHighRisk: true, + blockOnMissingSymbols: false, + }), + ).toBe(false); + }); + + it('ignores local symbols when checking exported API preservation', () => { + const codeWithLocal = `export function renderUser(id: string) { + const temporaryLabel = id.toUpperCase(); + return temporaryLabel; +}`; + const appliedCode = `export function renderUser(id: string) { + return id.toLowerCase(); +}`; + + const assessment = assessCodeChange( + codeWithLocal, + 'Simplify the label formatting', + 'Simplify renderUser', + appliedCode, + ); + + expect(assessment.stats.missingOriginalSymbols).toEqual([]); + expect(assessment.blockingConcerns).not.toContain( + 'Possible dropped original symbol: temporaryLabel', + ); + }); + + it('only reports placeholders newly introduced by the applied code', () => { + const codeWithExistingTodo = `export function renderUser(id: string) { + // TODO: preserve legacy fallback + return id; +}`; + const appliedCode = `export function renderUser(id: string) { + // TODO: preserve legacy fallback + return id.toLowerCase(); +}`; + + const assessment = assessCodeChange( + codeWithExistingTodo, + 'Lowercase the rendered user id', + 'Lowercase renderUser output', + appliedCode, + ); + + expect(assessment.stats.placeholders).toEqual([]); + expect(assessment.blockingConcerns.some((concern) => concern.includes('Placeholder'))).toBe( + false, + ); + }); + + it('lets callers use a softer review gate for exploratory edits', () => { + const preflight = assessCodeChange( + originalCode, + `export interface User { + id: string; + email?: string; +}`, + 'Experiment with an email-only user type', + ); + + expect( + shouldBlockApply(preflight, { + minimumScore: 30, + blockHighRisk: false, + blockOnMissingSymbols: false, + }), + ).toBe(false); + }); +});