Skip to content
45 changes: 33 additions & 12 deletions app/api/score/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { computeScore } from "afdocs";
import { AFDOCS_VERSION } from "@/lib/scoring";
import { inferCategory } from "@/lib/categorize";
import { isBlockedDomain } from "@/lib/blocked-domains";
import { resolveSlugAlias } from "@/lib/slug-aliases";

export const runtime = "nodejs";
export const maxDuration = 300;
Expand Down Expand Up @@ -134,13 +135,20 @@ async function detectDocsUrl(url: string): Promise<{ isLikely: boolean; warning?
if (DOCS_PATHS.test(pathStr)) return { isLikely: true };
if (DOCS_PLATFORMS.test(host + parsed.pathname)) return { isLikely: true };

// An llms.txt is a strong docs signal — but marketing sites increasingly ship one
// too (e.g. monday.com serves /llms.txt from its product homepage). For a bare apex
// root it's not sufficient on its own; defer to the homepage content check below so
// a marketing landing page can still be rejected. For any deeper path it stands.
const isRoot = parsed.pathname === "/" || parsed.pathname === "";
let hasLlms = false;
try {
const r = await fetch(`${parsed.origin}/llms.txt`, {
signal: AbortSignal.timeout(5000),
headers: { "User-Agent": "Mozilla/5.0 (compatible; AgentScore/1.0)" },
});
if (r.ok) return { isLikely: true };
hasLlms = r.ok;
} catch { /* ignore */ }
if (hasLlms && !isRoot) return { isLikely: true };

try {
const r = await fetch(url, {
Expand All @@ -162,6 +170,9 @@ async function detectDocsUrl(url: string): Promise<{ isLikely: boolean; warning?
suggestion: `docs.${baseDomain}, ${parsed.origin}/docs, or ${parsed.origin}/api`,
};
} catch {
// Couldn't analyze the page — if it advertised an llms.txt, trust that rather
// than reject on a fetch failure (only a *visible* marketing page is rejected).
if (hasLlms) return { isLikely: true };
return {
isLikely: false,
warning: `Could not fetch the URL — it may be protected by bot-detection.`,
Expand Down Expand Up @@ -417,21 +428,31 @@ export async function POST(request: Request) {
// When the URL has a meaningful path (e.g. docs.nvidia.com/dynamo vs docs.nvidia.com/heavyai),
// use the full URL slug so path-scoped sites don't collide on the domain-derived name slug.
const urlPath = (() => { try { return new URL(url).pathname.replace(/^\/|\/$/g, ''); } catch { return ''; } })();
const effectiveSlug = slugParam || (effectiveName && !urlPath ? nameToSlug(effectiveName) : urlToSlug(url));
console.log("[score] resolved slug:", effectiveSlug, "name:", effectiveName);

// Return cached result if company already exists (skip when force=true or in development)
// Fern preview/staging hosts (*.ferndocs.com) always slug by URL so they stay distinct from the
// canonical live company entry — otherwise e.g. docusign.ferndocs.com collapses onto the "docusign" slug.
const isFernHost = (() => { try { return /(^|\.)ferndocs\.com$/i.test(new URL(url).hostname); } catch { return false; } })();
const rawSlug = slugParam || (effectiveName && !urlPath && !isFernHost ? nameToSlug(effectiveName) : urlToSlug(url));
// Alias a likely-typed domain (e.g. "monday" → "developer-monday-com-api-reference") to a curated
// leaderboard entry. This is a *redirect for lookups only*: we surface the existing canonical entry
// but never score/overwrite it. Actual scoring always stores under the raw slug (see runJob below).
const aliasSlug = resolveSlugAlias(rawSlug);
console.log("[score] resolved slug:", rawSlug, "name:", effectiveName, rawSlug !== aliasSlug ? `(alias → ${aliasSlug})` : '');

// Return cached result if company already exists (skip when force=true or in development).
// Prefer the alias target so a typed domain points at the curated entry.
if (!force && process.env.NODE_ENV !== 'development') {
try {
const existing = await getScoreBySlug(effectiveSlug);
const existing =
(await getScoreBySlug(aliasSlug)) ??
(aliasSlug !== rawSlug ? await getScoreBySlug(rawSlug) : null);
if (existing) {
console.log("[score] company already exists, returning cached result:", effectiveSlug);
console.log("[score] company already exists, returning cached result:", existing.slug);
const jobId = crypto.randomUUID();
writeJob(jobId, {
status: "complete",
score: existing.score,
grade: existing.grade,
slug: effectiveSlug,
slug: existing.slug,
summary: {
total: existing.checks.total,
pass: existing.checks.pass,
Expand All @@ -440,7 +461,7 @@ export async function POST(request: Request) {
},
results: existing.results,
});
return NextResponse.json({ jobId, slug: effectiveSlug, cached: true });
return NextResponse.json({ jobId, slug: existing.slug, cached: true });
}
} catch { /* Supabase check failed — proceed with scoring */ }
}
Expand Down Expand Up @@ -468,13 +489,13 @@ export async function POST(request: Request) {
console.log("[score] job created:", jobId);

if (process.env.NODE_ENV === 'development') {
runJob(jobId, url, effectiveSlug, effectiveName ?? undefined, hidden).catch(console.error);
runJob(jobId, url, rawSlug, effectiveName ?? undefined, hidden).catch(console.error);
} else {
waitUntil(runJob(jobId, url, effectiveSlug, effectiveName ?? undefined, hidden));
waitUntil(runJob(jobId, url, rawSlug, effectiveName ?? undefined, hidden));
}

// Set updated rate limit cookie
const response = NextResponse.json({ jobId, slug: effectiveSlug });
const response = NextResponse.json({ jobId, slug: rawSlug });
response.headers.set('Set-Cookie', buildRateLimitCookie(rlTimestamps));
return response;
} catch (error) {
Expand Down
7 changes: 6 additions & 1 deletion app/company/[slug]/page.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { getCompanyWithFallback } from '@/lib/scores';
import type { CheckResult } from '@/lib/scores';
import { notFound } from 'next/navigation';
import { notFound, redirect } from 'next/navigation';
import { resolveSlugAlias } from '@/lib/slug-aliases';
import Link from 'next/link';
import type { Metadata } from 'next';
import ScoreRing from './ScoreRing';
Expand Down Expand Up @@ -125,6 +126,10 @@ function buildSummary(company: { name: string; score: number; grade: string; che
}

export default async function CompanyPage({ params }: { params: { slug: string } }) {
// Redirect known duplicate slugs (e.g. /company/monday) to the canonical leaderboard entry.
const canonical = resolveSlugAlias(params.slug);
if (canonical !== params.slug) redirect(`/agent-score/company/${canonical}`);

const company = await getCompanyWithFallback(params.slug);
if (!company) notFound();

Expand Down
9 changes: 9 additions & 0 deletions lib/blocked-domains.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,21 @@ const BLOCKED_DOMAINS = new Set([
'porntrex.com', 'anysex.com', 'fuq.com', 'ixxx.com', 'rulertube.com',
]);

// Marketing/landing sites that should never be graded as docs themselves, but whose
// docs subdomains (e.g. developers.monday.com) ARE eligible. Matched on the exact apex
// host only (plus www) — subdomains are NOT blocked. This is the pre-cache, pre-scoring
// guard: it guarantees rejection even if a stale cached row exists or detection flakes.
const BLOCKED_APEX_ONLY = new Set([
'monday.com',
]);

export function isBlockedDomain(url: string): boolean {
try {
const normalized = /^https?:\/\//i.test(url) ? url : `https://${url}`;
const { hostname } = new URL(normalized);
const host = hostname.replace(/^www\./, '').toLowerCase();
if (BLOCKED_TLDS.has('.' + host.split('.').pop())) return true;
if (BLOCKED_APEX_ONLY.has(host)) return true;
if (BLOCKED_DOMAINS.has(host)) return true;
for (const d of Array.from(BLOCKED_DOMAINS)) {
if (host === d || host.endsWith('.' + d)) return true;
Expand Down
17 changes: 9 additions & 8 deletions lib/categorize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,14 @@ function matchPatterns(docsUrl: string, name?: string): Category | undefined {

/**
* Ask Claude to classify the company when pattern-matching returns nothing.
* Requires OPENAI_API_KEY in the environment.
* Requires ANTHROPIC_API_KEY in the environment.
* Returns null on any failure so the caller can fall back gracefully.
*/
async function inferCategoryWithLLM(
docsUrl: string,
name?: string,
): Promise<Category | null> {
const apiKey = process.env.OPENAI_API_KEY;
const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) return null;

const prompt = `You are classifying a company's API/developer documentation site into exactly one category.
Expand All @@ -132,27 +132,28 @@ Rules:
- Reply with ONLY the category name, nothing else.`;

try {
const res = await fetch('https://api.openai.com/v1/chat/completions', {
const res = await fetch('https://api.anthropic.com/v1/messages', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
'content-type': 'application/json',
},
body: JSON.stringify({
model: 'gpt-4o-mini',
model: 'claude-haiku-4-5',
max_tokens: 16,
messages: [{ role: 'user', content: prompt }],
}),
signal: AbortSignal.timeout(10_000),
});

if (!res.ok) {
console.warn('[categorize] OpenAI API error:', res.status);
console.warn('[categorize] Anthropic API error:', res.status);
return null;
}

const data = await res.json();
const raw = (data?.choices?.[0]?.message?.content ?? '').trim();
const raw = (data?.content?.[0]?.text ?? '').trim();
const match = CATEGORIES.find(
(c) => c.toLowerCase() === raw.toLowerCase(),
);
Expand All @@ -166,7 +167,7 @@ Rules:
/**
* Infer a category for a docs URL + company name.
* 1. Fast pattern match — if it hits, return immediately (no API call).
* 2. Ask GPT-4o mini if patterns don't match.
* 2. Ask Claude Haiku if patterns don't match.
* 3. Fall back to 'Other' if the API call fails or is unavailable.
*/
export async function inferCategory(
Expand Down
10 changes: 10 additions & 0 deletions lib/slug-aliases.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Maps an auto-generated/typed slug → the canonical leaderboard slug we want to show.
// Use this when a domain a user is likely to type (e.g. monday.com → "monday") resolves
// to a weaker entry than the curated leaderboard entry we'd rather surface.
export const SLUG_ALIASES: Record<string, string> = {
// e.g. monday: 'developer-monday-com-api-reference',
};

export function resolveSlugAlias(slug: string): string {
return SLUG_ALIASES[slug] ?? slug;
}