From 7a62a7969396639d4c08aefad07fbeaadeec556b Mon Sep 17 00:00:00 2001 From: Kapil Date: Tue, 26 May 2026 23:20:56 -0400 Subject: [PATCH 1/6] feat: switch category inference from OpenAI to Claude Haiku Replace gpt-4o-mini with claude-haiku-4-5 in the LLM fallback for category classification. Uses ANTHROPIC_API_KEY instead of OPENAI_API_KEY. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/categorize.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/categorize.ts b/lib/categorize.ts index 95b6a69..066a30a 100644 --- a/lib/categorize.ts +++ b/lib/categorize.ts @@ -109,14 +109,14 @@ function matchPatterns(docsUrl: string, name?: string): Category | undefined { /** * Ask Claude to classify the company when pattern-matching returns nothing. - * Requires OPENAI_API_KEY in the environment. + * Requires ANTHROPIC_API_KEY in the environment. * Returns null on any failure so the caller can fall back gracefully. */ async function inferCategoryWithLLM( docsUrl: string, name?: string, ): Promise { - const apiKey = process.env.OPENAI_API_KEY; + const apiKey = process.env.ANTHROPIC_API_KEY; if (!apiKey) return null; const prompt = `You are classifying a company's API/developer documentation site into exactly one category. @@ -132,14 +132,15 @@ Rules: - Reply with ONLY the category name, nothing else.`; try { - const res = await fetch('https://api.openai.com/v1/chat/completions', { + const res = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { - 'Authorization': `Bearer ${apiKey}`, + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01', 'content-type': 'application/json', }, body: JSON.stringify({ - model: 'gpt-4o-mini', + model: 'claude-haiku-4-5', max_tokens: 16, messages: [{ role: 'user', content: prompt }], }), @@ -147,12 +148,12 @@ Rules: }); if (!res.ok) { - console.warn('[categorize] OpenAI API error:', res.status); + console.warn('[categorize] Anthropic API error:', res.status); return null; } const data = await res.json(); - const raw = (data?.choices?.[0]?.message?.content ?? '').trim(); + const raw = (data?.content?.[0]?.text ?? '').trim(); const match = CATEGORIES.find( (c) => c.toLowerCase() === raw.toLowerCase(), ); @@ -166,7 +167,7 @@ Rules: /** * Infer a category for a docs URL + company name. * 1. Fast pattern match — if it hits, return immediately (no API call). - * 2. Ask GPT-4o mini if patterns don't match. + * 2. Ask Claude Haiku if patterns don't match. * 3. Fall back to 'Other' if the API call fails or is unavailable. */ export async function inferCategory( From c71e7f7b8e5b39bf9866362b00008f3d83a85605 Mon Sep 17 00:00:00 2001 From: Kapil Date: Thu, 4 Jun 2026 16:35:38 -0400 Subject: [PATCH 2/6] feat: redirect known duplicate slugs to canonical leaderboard entry Add a slug-alias map so typing a domain like monday.com (which resolves to the weaker "monday" slug) lands on the curated leaderboard entry "developer-monday-com-apps". Applied in the score API and the company page so both search and direct navigation redirect to the canonical slug. Co-Authored-By: Claude Opus 4.8 (1M context) --- app/api/score/route.ts | 7 +++++-- app/company/[slug]/page.tsx | 7 ++++++- lib/slug-aliases.ts | 10 ++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 lib/slug-aliases.ts diff --git a/app/api/score/route.ts b/app/api/score/route.ts index 0708056..e845d0b 100644 --- a/app/api/score/route.ts +++ b/app/api/score/route.ts @@ -7,6 +7,7 @@ import { computeScore } from "afdocs"; import { AFDOCS_VERSION } from "@/lib/scoring"; import { inferCategory } from "@/lib/categorize"; import { isBlockedDomain } from "@/lib/blocked-domains"; +import { resolveSlugAlias } from "@/lib/slug-aliases"; export const runtime = "nodejs"; export const maxDuration = 300; @@ -417,8 +418,10 @@ export async function POST(request: Request) { // When the URL has a meaningful path (e.g. docs.nvidia.com/dynamo vs docs.nvidia.com/heavyai), // use the full URL slug so path-scoped sites don't collide on the domain-derived name slug. const urlPath = (() => { try { return new URL(url).pathname.replace(/^\/|\/$/g, ''); } catch { return ''; } })(); - const effectiveSlug = slugParam || (effectiveName && !urlPath ? nameToSlug(effectiveName) : urlToSlug(url)); - console.log("[score] resolved slug:", effectiveSlug, "name:", effectiveName); + const rawSlug = slugParam || (effectiveName && !urlPath ? nameToSlug(effectiveName) : urlToSlug(url)); + // Redirect known duplicate slugs to the canonical leaderboard entry (e.g. "monday" → "developer-monday-com-apps"). + const effectiveSlug = resolveSlugAlias(rawSlug); + console.log("[score] resolved slug:", effectiveSlug, "name:", effectiveName, rawSlug !== effectiveSlug ? `(aliased from ${rawSlug})` : ''); // Return cached result if company already exists (skip when force=true or in development) if (!force && process.env.NODE_ENV !== 'development') { diff --git a/app/company/[slug]/page.tsx b/app/company/[slug]/page.tsx index 85a5f74..8210875 100644 --- a/app/company/[slug]/page.tsx +++ b/app/company/[slug]/page.tsx @@ -1,6 +1,7 @@ import { getCompanyWithFallback } from '@/lib/scores'; import type { CheckResult } from '@/lib/scores'; -import { notFound } from 'next/navigation'; +import { notFound, redirect } from 'next/navigation'; +import { resolveSlugAlias } from '@/lib/slug-aliases'; import Link from 'next/link'; import type { Metadata } from 'next'; import ScoreRing from './ScoreRing'; @@ -125,6 +126,10 @@ function buildSummary(company: { name: string; score: number; grade: string; che } export default async function CompanyPage({ params }: { params: { slug: string } }) { + // Redirect known duplicate slugs (e.g. /company/monday) to the canonical leaderboard entry. + const canonical = resolveSlugAlias(params.slug); + if (canonical !== params.slug) redirect(`/agent-score/company/${canonical}`); + const company = await getCompanyWithFallback(params.slug); if (!company) notFound(); diff --git a/lib/slug-aliases.ts b/lib/slug-aliases.ts new file mode 100644 index 0000000..032dce1 --- /dev/null +++ b/lib/slug-aliases.ts @@ -0,0 +1,10 @@ +// Maps an auto-generated/typed slug → the canonical leaderboard slug we want to show. +// Use this when a domain a user is likely to type (e.g. monday.com → "monday") resolves +// to a weaker entry than the curated leaderboard entry we'd rather surface. +export const SLUG_ALIASES: Record = { + monday: 'developer-monday-com-apps', +}; + +export function resolveSlugAlias(slug: string): string { + return SLUG_ALIASES[slug] ?? slug; +} From 5870f120456ad53cbf528b849c3976c7253ae618 Mon Sep 17 00:00:00 2001 From: Kapil Gowru Date: Thu, 4 Jun 2026 17:06:43 -0400 Subject: [PATCH 3/6] fix: keep *.ferndocs.com sites distinct from canonical company slug (#23) Fern preview/staging hosts (*.ferndocs.com) now always slug by URL instead of the derived company name, so e.g. docusign.ferndocs.com no longer collapses onto the "docusign" slug and return the cached live entry. These sites are now graded as distinct entries. Co-authored-by: Claude Opus 4.8 (1M context) --- app/api/score/route.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/api/score/route.ts b/app/api/score/route.ts index e845d0b..fe912d8 100644 --- a/app/api/score/route.ts +++ b/app/api/score/route.ts @@ -418,7 +418,10 @@ export async function POST(request: Request) { // When the URL has a meaningful path (e.g. docs.nvidia.com/dynamo vs docs.nvidia.com/heavyai), // use the full URL slug so path-scoped sites don't collide on the domain-derived name slug. const urlPath = (() => { try { return new URL(url).pathname.replace(/^\/|\/$/g, ''); } catch { return ''; } })(); - const rawSlug = slugParam || (effectiveName && !urlPath ? nameToSlug(effectiveName) : urlToSlug(url)); + // Fern preview/staging hosts (*.ferndocs.com) always slug by URL so they stay distinct from the + // canonical live company entry — otherwise e.g. docusign.ferndocs.com collapses onto the "docusign" slug. + const isFernHost = (() => { try { return /(^|\.)ferndocs\.com$/i.test(new URL(url).hostname); } catch { return false; } })(); + const rawSlug = slugParam || (effectiveName && !urlPath && !isFernHost ? nameToSlug(effectiveName) : urlToSlug(url)); // Redirect known duplicate slugs to the canonical leaderboard entry (e.g. "monday" → "developer-monday-com-apps"). const effectiveSlug = resolveSlugAlias(rawSlug); console.log("[score] resolved slug:", effectiveSlug, "name:", effectiveName, rawSlug !== effectiveSlug ? `(aliased from ${rawSlug})` : ''); From b7d8370d878d81e9d7df1e510f83a69dc1ac0828 Mon Sep 17 00:00:00 2001 From: Kapil Gowru Date: Thu, 4 Jun 2026 19:19:31 -0400 Subject: [PATCH 4/6] fix: route monday.com to canonical api-reference leaderboard entry (#25) * fix: route monday.com to canonical api-reference leaderboard entry The "monday" alias pointed at developer-monday-com-apps, which has since been hidden (score 68/D). Repoint to developer-monday-com-api-reference, the visible canonical entry (score 91/A). Co-Authored-By: Claude Opus 4.8 (1M context) * fix: make slug alias a lookup redirect, never a scoring target Previously a forced rerun (or dev run) of monday.com would score https://monday.com and upsert it under the aliased slug developer-monday-com-api-reference, clobbering the curated A-grade entry. Now the alias only redirects cache lookups to the existing canonical entry; scoring always stores under the raw slug. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) --- app/api/score/route.ts | 29 +++++++++++++++++------------ lib/slug-aliases.ts | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/app/api/score/route.ts b/app/api/score/route.ts index fe912d8..f2024fc 100644 --- a/app/api/score/route.ts +++ b/app/api/score/route.ts @@ -422,22 +422,27 @@ export async function POST(request: Request) { // canonical live company entry — otherwise e.g. docusign.ferndocs.com collapses onto the "docusign" slug. const isFernHost = (() => { try { return /(^|\.)ferndocs\.com$/i.test(new URL(url).hostname); } catch { return false; } })(); const rawSlug = slugParam || (effectiveName && !urlPath && !isFernHost ? nameToSlug(effectiveName) : urlToSlug(url)); - // Redirect known duplicate slugs to the canonical leaderboard entry (e.g. "monday" → "developer-monday-com-apps"). - const effectiveSlug = resolveSlugAlias(rawSlug); - console.log("[score] resolved slug:", effectiveSlug, "name:", effectiveName, rawSlug !== effectiveSlug ? `(aliased from ${rawSlug})` : ''); - - // Return cached result if company already exists (skip when force=true or in development) + // Alias a likely-typed domain (e.g. "monday" → "developer-monday-com-api-reference") to a curated + // leaderboard entry. This is a *redirect for lookups only*: we surface the existing canonical entry + // but never score/overwrite it. Actual scoring always stores under the raw slug (see runJob below). + const aliasSlug = resolveSlugAlias(rawSlug); + console.log("[score] resolved slug:", rawSlug, "name:", effectiveName, rawSlug !== aliasSlug ? `(alias → ${aliasSlug})` : ''); + + // Return cached result if company already exists (skip when force=true or in development). + // Prefer the alias target so a typed domain points at the curated entry. if (!force && process.env.NODE_ENV !== 'development') { try { - const existing = await getScoreBySlug(effectiveSlug); + const existing = + (await getScoreBySlug(aliasSlug)) ?? + (aliasSlug !== rawSlug ? await getScoreBySlug(rawSlug) : null); if (existing) { - console.log("[score] company already exists, returning cached result:", effectiveSlug); + console.log("[score] company already exists, returning cached result:", existing.slug); const jobId = crypto.randomUUID(); writeJob(jobId, { status: "complete", score: existing.score, grade: existing.grade, - slug: effectiveSlug, + slug: existing.slug, summary: { total: existing.checks.total, pass: existing.checks.pass, @@ -446,7 +451,7 @@ export async function POST(request: Request) { }, results: existing.results, }); - return NextResponse.json({ jobId, slug: effectiveSlug, cached: true }); + return NextResponse.json({ jobId, slug: existing.slug, cached: true }); } } catch { /* Supabase check failed — proceed with scoring */ } } @@ -474,13 +479,13 @@ export async function POST(request: Request) { console.log("[score] job created:", jobId); if (process.env.NODE_ENV === 'development') { - runJob(jobId, url, effectiveSlug, effectiveName ?? undefined, hidden).catch(console.error); + runJob(jobId, url, rawSlug, effectiveName ?? undefined, hidden).catch(console.error); } else { - waitUntil(runJob(jobId, url, effectiveSlug, effectiveName ?? undefined, hidden)); + waitUntil(runJob(jobId, url, rawSlug, effectiveName ?? undefined, hidden)); } // Set updated rate limit cookie - const response = NextResponse.json({ jobId, slug: effectiveSlug }); + const response = NextResponse.json({ jobId, slug: rawSlug }); response.headers.set('Set-Cookie', buildRateLimitCookie(rlTimestamps)); return response; } catch (error) { diff --git a/lib/slug-aliases.ts b/lib/slug-aliases.ts index 032dce1..6fa2ceb 100644 --- a/lib/slug-aliases.ts +++ b/lib/slug-aliases.ts @@ -2,7 +2,7 @@ // Use this when a domain a user is likely to type (e.g. monday.com → "monday") resolves // to a weaker entry than the curated leaderboard entry we'd rather surface. export const SLUG_ALIASES: Record = { - monday: 'developer-monday-com-apps', + monday: 'developer-monday-com-api-reference', }; export function resolveSlugAlias(slug: string): string { From 5a85a4cff33e47411f23ac597c04117875d4d34b Mon Sep 17 00:00:00 2001 From: Kapil Gowru Date: Thu, 4 Jun 2026 19:48:12 -0400 Subject: [PATCH 5/6] fix: reject apex marketing sites that ship an llms.txt (e.g. monday.com) (#27) An llms.txt returning 200 short-circuited docs detection, so marketing homepages that now ship one (monday.com) were graded as docs sites. At a bare apex/root, llms.txt is no longer sufficient on its own; we defer to the homepage content check (only a visible marketing page is rejected, and deeper paths / docs subdomains still pass). Also drop the now-unneeded monday slug alias. Co-authored-by: Claude Opus 4.8 (1M context) --- app/api/score/route.ts | 12 +++++++++++- lib/slug-aliases.ts | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/app/api/score/route.ts b/app/api/score/route.ts index f2024fc..328ffb5 100644 --- a/app/api/score/route.ts +++ b/app/api/score/route.ts @@ -135,13 +135,20 @@ async function detectDocsUrl(url: string): Promise<{ isLikely: boolean; warning? if (DOCS_PATHS.test(pathStr)) return { isLikely: true }; if (DOCS_PLATFORMS.test(host + parsed.pathname)) return { isLikely: true }; + // An llms.txt is a strong docs signal — but marketing sites increasingly ship one + // too (e.g. monday.com serves /llms.txt from its product homepage). For a bare apex + // root it's not sufficient on its own; defer to the homepage content check below so + // a marketing landing page can still be rejected. For any deeper path it stands. + const isRoot = parsed.pathname === "/" || parsed.pathname === ""; + let hasLlms = false; try { const r = await fetch(`${parsed.origin}/llms.txt`, { signal: AbortSignal.timeout(5000), headers: { "User-Agent": "Mozilla/5.0 (compatible; AgentScore/1.0)" }, }); - if (r.ok) return { isLikely: true }; + hasLlms = r.ok; } catch { /* ignore */ } + if (hasLlms && !isRoot) return { isLikely: true }; try { const r = await fetch(url, { @@ -163,6 +170,9 @@ async function detectDocsUrl(url: string): Promise<{ isLikely: boolean; warning? suggestion: `docs.${baseDomain}, ${parsed.origin}/docs, or ${parsed.origin}/api`, }; } catch { + // Couldn't analyze the page — if it advertised an llms.txt, trust that rather + // than reject on a fetch failure (only a *visible* marketing page is rejected). + if (hasLlms) return { isLikely: true }; return { isLikely: false, warning: `Could not fetch the URL — it may be protected by bot-detection.`, diff --git a/lib/slug-aliases.ts b/lib/slug-aliases.ts index 6fa2ceb..5a6757b 100644 --- a/lib/slug-aliases.ts +++ b/lib/slug-aliases.ts @@ -2,7 +2,7 @@ // Use this when a domain a user is likely to type (e.g. monday.com → "monday") resolves // to a weaker entry than the curated leaderboard entry we'd rather surface. export const SLUG_ALIASES: Record = { - monday: 'developer-monday-com-api-reference', + // e.g. monday: 'developer-monday-com-api-reference', }; export function resolveSlugAlias(slug: string): string { From 18aee75677102bf2c1eaad63954de8b584baa42a Mon Sep 17 00:00:00 2001 From: Kapil Gowru Date: Thu, 4 Jun 2026 21:17:18 -0400 Subject: [PATCH 6/6] fix: hard-block monday.com apex so it can never be graded as docs (#28) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The detection layer already rejects monday.com, but the cache lookup runs before detection — a stale cached row would still be served. Add an apex-only block (exact host + www, subdomains excluded) that runs before the cache and scoring, guaranteeing the marketing apex is rejected while developers.monday.com and other docs subdomains stay gradeable. Co-authored-by: Claude Opus 4.8 (1M context) --- lib/blocked-domains.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/blocked-domains.ts b/lib/blocked-domains.ts index d1713cc..bc09fa0 100644 --- a/lib/blocked-domains.ts +++ b/lib/blocked-domains.ts @@ -9,12 +9,21 @@ const BLOCKED_DOMAINS = new Set([ 'porntrex.com', 'anysex.com', 'fuq.com', 'ixxx.com', 'rulertube.com', ]); +// Marketing/landing sites that should never be graded as docs themselves, but whose +// docs subdomains (e.g. developers.monday.com) ARE eligible. Matched on the exact apex +// host only (plus www) — subdomains are NOT blocked. This is the pre-cache, pre-scoring +// guard: it guarantees rejection even if a stale cached row exists or detection flakes. +const BLOCKED_APEX_ONLY = new Set([ + 'monday.com', +]); + export function isBlockedDomain(url: string): boolean { try { const normalized = /^https?:\/\//i.test(url) ? url : `https://${url}`; const { hostname } = new URL(normalized); const host = hostname.replace(/^www\./, '').toLowerCase(); if (BLOCKED_TLDS.has('.' + host.split('.').pop())) return true; + if (BLOCKED_APEX_ONLY.has(host)) return true; if (BLOCKED_DOMAINS.has(host)) return true; for (const d of Array.from(BLOCKED_DOMAINS)) { if (host === d || host.endsWith('.' + d)) return true;