diff --git a/.env.example b/.env.example index 727672a7..4c13b535 100644 --- a/.env.example +++ b/.env.example @@ -10,6 +10,13 @@ LANGFUSE_SECRET_KEY=sk-lf-... LANGFUSE_PUBLIC_KEY=pk-lf-... LANGFUSE_BASE_URL=https://cloud.langfuse.com +# Chat model overrides (optional, one per service). Defaults live in +# services/models.py; set a var to switch that service's live model without +# redeploying. Accepts an alias (claude-opus, claude-sonnet) or a full model ID. +# APOLLO_GLOBAL_CHAT_MODEL= # global_chat planner +# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat +# APOLLO_JOB_CHAT_MODEL= # job_chat + # HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base # ZILLIZ_URI = https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX # ZILLIZ_TOKEN =db_admin:password (or ApiKey) diff --git a/pyproject.toml b/pyproject.toml index 91f181fa..fff1e25b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ testpaths = [ "services/workflow_chat/tests", "services/job_chat/tests", "services/search_docsite/tests", + "services/tests", "services/tools", ] diff --git a/services/doc_agent_chat/agent.py b/services/doc_agent_chat/agent.py index d9e24856..2e06e437 100644 --- a/services/doc_agent_chat/agent.py +++ b/services/doc_agent_chat/agent.py @@ -7,7 +7,7 @@ from doc_agent_chat.prompt import build_system_prompt from doc_agent_chat.tools import TOOL_DEFINITIONS, search_documents, format_search_results_as_documents from doc_agent_chat.config_loader import ConfigLoader -from models import resolve_model +from models import preferred_chat_model logger = create_logger("agent") @@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None): raise ValueError("API key must be provided") self.client = Anthropic(api_key=self.api_key) - self.model = resolve_model(config.get("model", "claude-fable")) + self.model = preferred_chat_model("doc_agent_chat") self.max_tokens = config.get("max_tokens", 49152) self.max_tool_calls = config.get("max_tool_calls", 10) self.search_top_k = config.get("search_top_k", 5) diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml index 389bf4b0..85202aec 100644 --- a/services/doc_agent_chat/config.yaml +++ b/services/doc_agent_chat/config.yaml @@ -1,5 +1,6 @@ config_version: 1.0 -model: claude-fable +# The chat model is configured in services/models.py (the default; doc_agent has +# no per-service env override), not here. max_tokens: 49152 max_tool_calls: 10 search_top_k: 5 diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index 376ea662..ef59fc80 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -7,7 +7,8 @@ router: temperature: 0.0 # Planner configuration (complex orchestration) +# The planner's chat model is configured in services/models.py (the default plus +# the APOLLO_GLOBAL_CHAT_MODEL env override), not here. planner: - model: "claude-fable" max_tokens: 24576 max_tool_calls: 10 diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py index 4918aae3..9e806a91 100644 --- a/services/global_chat/planner.py +++ b/services/global_chat/planner.py @@ -24,7 +24,7 @@ STATUS_PLANNING, ) from global_chat.config_loader import ConfigLoader -from models import resolve_model +from models import preferred_chat_model from global_chat.tools.tool_definitions import TOOL_DEFINITIONS from global_chat.yaml_utils import stitch_job_code, redact_job_bodies, find_job_in_yaml from tools.search_documentation.search_documentation import search_documentation_tool @@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None): self.tools = TOOL_DEFINITIONS planner_config = config_loader.config.get("planner", {}) - self.model = resolve_model(planner_config.get("model", "claude-fable")) + self.model = preferred_chat_model("global_chat") self.max_tokens = planner_config.get("max_tokens", 24576) self.max_tool_calls = planner_config.get("max_tool_calls", 20) diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py index 7da4d13f..cc569590 100644 --- a/services/job_chat/job_chat.py +++ b/services/job_chat/job_chat.py @@ -1,7 +1,6 @@ import os import json import re -import yaml from typing import List, Optional, Dict, Any from dataclasses import dataclass import httpx @@ -29,13 +28,9 @@ STATUS_WORKING, STATUS_WRITING_CODE, ) -from models import resolve_model +from models import preferred_chat_model -_dir = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(_dir, "rag.yaml")) as _f: - _service_config = yaml.safe_load(_f) - -_MODEL = resolve_model(_service_config.get("model", "claude-fable")) +_MODEL = preferred_chat_model("job_chat") logger = create_logger("job_chat") diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml index 82546592..9d16daed 100644 --- a/services/job_chat/rag.yaml +++ b/services/job_chat/rag.yaml @@ -1,5 +1,7 @@ config_version: 1.0 -model: "claude-fable" +# The main chat model is configured in services/models.py (the default plus the +# APOLLO_JOB_CHAT_MODEL env override), not here. The llm_* keys below are the +# smaller RAG/retrieval models and are separate. llm_search_decision: "claude-sonnet" llm_retrieval: "claude-sonnet" threshold: 0.8 diff --git a/services/models.py b/services/models.py index 10abd40f..e9777598 100644 --- a/services/models.py +++ b/services/models.py @@ -3,18 +3,15 @@ Update values here to change models used across all services. """ +import os + CLAUDE_MODELS: dict[str, str] = { "claude-opus": "claude-opus-4-8", - # Fable rejects temperature/top_p/top_k and any explicit `thinking` - # config other than {"type": "adaptive"}; tokenizer yields ~30% more - # tokens than Sonnet/Opus for the same content. - "claude-fable": "claude-fable-5", "claude-sonnet": "claude-sonnet-4-6", "claude-haiku": "claude-haiku-4-5-20251001", } CLAUDE_OPUS: str = CLAUDE_MODELS["claude-opus"] -CLAUDE_FABLE: str = CLAUDE_MODELS["claude-fable"] CLAUDE_SONNET: str = CLAUDE_MODELS["claude-sonnet"] CLAUDE_HAIKU: str = CLAUDE_MODELS["claude-haiku"] @@ -22,3 +19,49 @@ def resolve_model(alias: str) -> str: """Resolve a model alias to its full ID. Passes through unknown strings unchanged.""" return CLAUDE_MODELS.get(alias, alias) + + +# --- Main chat model selection ---------------------------------------------- +# +# The "main chat model" is the large model that drives user-facing chat +# (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is +# distinct from the smaller models used for RAG/routing (haiku/sonnet), which +# are configured directly and are NOT affected by the helpers below. +# +# The whole per-service model story lives here on purpose, so there is one place +# to read what each service uses and how to override it. Nothing is configured +# in the service yamls. + +# Default chat model for any service without its own entry below. +CHAT_MODEL_DEFAULT = CLAUDE_OPUS + +# Per-service model config. `default` is the built-in choice; `env`, if set at +# runtime, overrides it for that service only (one env var per service, no +# global override). Services not listed (e.g. doc_agent_chat) use +# CHAT_MODEL_DEFAULT and have no runtime override. +CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = { + # workflow_chat forces JSON/YAML output via structured outputs; Sonnet + # handles that better than Opus today, so it defaults to Sonnet. + "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"}, + "job_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_JOB_CHAT_MODEL"}, + "global_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_GLOBAL_CHAT_MODEL"}, +} + + +def preferred_chat_model(service: str | None = None) -> str: + """Resolve the main chat model for `service`. + + Precedence: the service's env var if set, else its per-service default, else + CHAT_MODEL_DEFAULT. Each service's env var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) + is optional and lets us switch that one service's live model without + redeploying. + """ + cfg = CHAT_SERVICE_MODELS.get(service, {}) + + env_name = cfg.get("env") + if env_name: + override = os.getenv(env_name) + if override: + return resolve_model(override) + + return cfg.get("default", CHAT_MODEL_DEFAULT) diff --git a/services/streaming_util.py b/services/streaming_util.py index 7b6a4e78..045aa102 100644 --- a/services/streaming_util.py +++ b/services/streaming_util.py @@ -96,7 +96,7 @@ class StreamManager: block lifecycle and index tracking. Example usage: - manager = StreamManager(model=resolve_model("claude-fable")) + manager = StreamManager(model=resolve_model("claude-opus")) manager.start_stream() manager.send_thinking("Researching...") manager.send_text("Here's what I found...") diff --git a/services/testing/judge.py b/services/testing/judge.py index 747034d9..4600a76b 100644 --- a/services/testing/judge.py +++ b/services/testing/judge.py @@ -34,11 +34,11 @@ from anthropic import Anthropic -from models import CLAUDE_FABLE +from models import CLAUDE_OPUS from testing.judges import load_judge -DEFAULT_MODEL = CLAUDE_FABLE +DEFAULT_MODEL = CLAUDE_OPUS DEFAULT_JUDGE = "general" @@ -277,7 +277,7 @@ def evaluate( guessing. judge: Name of the judge (file at services/testing/judges/.md). Defaults to "general". - model: Model to use. Defaults to CLAUDE_FABLE from services/models.py. + model: Model to use. Defaults to CLAUDE_OPUS from services/models.py. client: Optional Anthropic client. Constructed from env if not given. Returns: diff --git a/services/tests/__init__.py b/services/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/tests/unit/__init__.py b/services/tests/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py new file mode 100644 index 00000000..b7639a22 --- /dev/null +++ b/services/tests/unit/test_models.py @@ -0,0 +1,42 @@ +"""Unit tests for the central chat-model selection in `services/models.py`. + +No real model calls, pure resolution logic. The repo-root conftest marks +everything under a `unit/` dir as `unit` and blocks real client construction. +""" + +import models as m +import pytest + +_WORKFLOW_ENV = m.CHAT_SERVICE_MODELS["workflow_chat"]["env"] + + +@pytest.fixture(autouse=True) +def _clear_env(monkeypatch): + """Clear all per-service overrides so the real environment can't skew tests.""" + for cfg in m.CHAT_SERVICE_MODELS.values(): + monkeypatch.delenv(cfg["env"], raising=False) + + +def test_unlisted_service_uses_default(): + # A service with no entry (e.g. doc_agent_chat, or none at all) uses the default. + assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT + assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT + + +def test_per_service_defaults(): + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET + assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS + assert m.preferred_chat_model("global_chat") == m.CLAUDE_OPUS + + +def test_env_var_overrides_its_service_default(monkeypatch): + # Also proves the env value is alias-resolved ("claude-opus" -> full ID). + monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus") + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS + + +def test_env_var_is_scoped_to_one_service(monkeypatch): + # Setting one service's var must not affect another service. + monkeypatch.setenv(_WORKFLOW_ENV, "claude-haiku") + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_HAIKU + assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS # unaffected diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml index a88d652d..2cead7fd 100644 --- a/services/workflow_chat/gen_project_config.yaml +++ b/services/workflow_chat/gen_project_config.yaml @@ -1,4 +1,5 @@ config_version: 1.0 -model: "claude-fable" +# The chat model is configured in services/models.py (the default plus the +# APOLLO_WORKFLOW_CHAT_MODEL env override), not here. threshold: 0.7 top_k: 5 diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py index 08571bb6..281a2af0 100644 --- a/services/workflow_chat/workflow_chat.py +++ b/services/workflow_chat/workflow_chat.py @@ -6,13 +6,9 @@ from typing import List, Optional, Dict, Any import yaml from dataclasses import dataclass -from models import resolve_model +from models import preferred_chat_model -_dir = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(_dir, "gen_project_config.yaml")) as _f: - _service_config = yaml.safe_load(_f) - -_MODEL = resolve_model(_service_config.get("model", "claude-fable")) +_MODEL = preferred_chat_model("workflow_chat") # JSON schema for structured outputs — guarantees valid JSON from the API _OUTPUT_SCHEMA = {