Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ LANGFUSE_SECRET_KEY=sk-lf-...
LANGFUSE_PUBLIC_KEY=pk-lf-...
LANGFUSE_BASE_URL=https://cloud.langfuse.com

# Chat model overrides (optional, one per service). Defaults live in
# services/models.py; set a var to switch that service's live model without
# redeploying. Accepts an alias (claude-opus, claude-sonnet) or a full model ID.
# APOLLO_GLOBAL_CHAT_MODEL= # global_chat planner
# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat
# APOLLO_JOB_CHAT_MODEL= # job_chat

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These sample env vars don't make sense do that? What does job_chat resolve to?


# HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base
# ZILLIZ_URI = https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX
# ZILLIZ_TOKEN =db_admin:password (or ApiKey)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ testpaths = [
"services/workflow_chat/tests",
"services/job_chat/tests",
"services/search_docsite/tests",
"services/tests",
"services/tools",
]

Expand Down
4 changes: 2 additions & 2 deletions services/doc_agent_chat/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from doc_agent_chat.prompt import build_system_prompt
from doc_agent_chat.tools import TOOL_DEFINITIONS, search_documents, format_search_results_as_documents
from doc_agent_chat.config_loader import ConfigLoader
from models import resolve_model
from models import preferred_chat_model

logger = create_logger("agent")

Expand All @@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None):
raise ValueError("API key must be provided")

self.client = Anthropic(api_key=self.api_key)
self.model = resolve_model(config.get("model", "claude-fable"))
self.model = preferred_chat_model("doc_agent_chat")
self.max_tokens = config.get("max_tokens", 49152)
self.max_tool_calls = config.get("max_tool_calls", 10)
self.search_top_k = config.get("search_top_k", 5)
Expand Down
3 changes: 2 additions & 1 deletion services/doc_agent_chat/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
config_version: 1.0
model: claude-fable
# The chat model is configured in services/models.py (the default; doc_agent has

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment doesn't make sense in isolation: it only makes sense if you know that the model used to be set in config. It's confusing and we should remove it. Same for the other models.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't actually love it 😬 Intuitively it feels that this config should be defaults for all values, and env vars can be used to override it(somehow, it's not entirely practical!)

The split now of some things being envs and some things being config values feels confusing, rigid and arbitrary

# no per-service env override), not here.
max_tokens: 49152
max_tool_calls: 10
search_top_k: 5
Expand Down
3 changes: 2 additions & 1 deletion services/global_chat/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ router:
temperature: 0.0

# Planner configuration (complex orchestration)
# The planner's chat model is configured in services/models.py (the default plus
# the APOLLO_GLOBAL_CHAT_MODEL env override), not here.
planner:
model: "claude-fable"
max_tokens: 24576
max_tool_calls: 10
4 changes: 2 additions & 2 deletions services/global_chat/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
STATUS_PLANNING,
)
from global_chat.config_loader import ConfigLoader
from models import resolve_model
from models import preferred_chat_model
from global_chat.tools.tool_definitions import TOOL_DEFINITIONS
from global_chat.yaml_utils import stitch_job_code, redact_job_bodies, find_job_in_yaml
from tools.search_documentation.search_documentation import search_documentation_tool
Expand Down Expand Up @@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None):
self.tools = TOOL_DEFINITIONS

planner_config = config_loader.config.get("planner", {})
self.model = resolve_model(planner_config.get("model", "claude-fable"))
self.model = preferred_chat_model("global_chat")
self.max_tokens = planner_config.get("max_tokens", 24576)
self.max_tool_calls = planner_config.get("max_tool_calls", 20)

Expand Down
9 changes: 2 additions & 7 deletions services/job_chat/job_chat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import json
import re
import yaml
from typing import List, Optional, Dict, Any
from dataclasses import dataclass
import httpx
Expand Down Expand Up @@ -29,13 +28,9 @@
STATUS_WORKING,
STATUS_WRITING_CODE,
)
from models import resolve_model
from models import preferred_chat_model

_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(_dir, "rag.yaml")) as _f:
_service_config = yaml.safe_load(_f)

_MODEL = resolve_model(_service_config.get("model", "claude-fable"))
_MODEL = preferred_chat_model("job_chat")

logger = create_logger("job_chat")

Expand Down
4 changes: 3 additions & 1 deletion services/job_chat/rag.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
config_version: 1.0
model: "claude-fable"
# The main chat model is configured in services/models.py (the default plus the
# APOLLO_JOB_CHAT_MODEL env override), not here. The llm_* keys below are the
# smaller RAG/retrieval models and are separate.
llm_search_decision: "claude-sonnet"
llm_retrieval: "claude-sonnet"
threshold: 0.8
Expand Down
53 changes: 48 additions & 5 deletions services/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,65 @@
Update values here to change models used across all services.
"""

import os

CLAUDE_MODELS: dict[str, str] = {
"claude-opus": "claude-opus-4-8",
# Fable rejects temperature/top_p/top_k and any explicit `thinking`
# config other than {"type": "adaptive"}; tokenizer yields ~30% more
# tokens than Sonnet/Opus for the same content.
"claude-fable": "claude-fable-5",
"claude-sonnet": "claude-sonnet-4-6",
"claude-haiku": "claude-haiku-4-5-20251001",
}

CLAUDE_OPUS: str = CLAUDE_MODELS["claude-opus"]
CLAUDE_FABLE: str = CLAUDE_MODELS["claude-fable"]
CLAUDE_SONNET: str = CLAUDE_MODELS["claude-sonnet"]
CLAUDE_HAIKU: str = CLAUDE_MODELS["claude-haiku"]


def resolve_model(alias: str) -> str:
"""Resolve a model alias to its full ID. Passes through unknown strings unchanged."""
return CLAUDE_MODELS.get(alias, alias)


# --- Main chat model selection ----------------------------------------------
#
# The "main chat model" is the large model that drives user-facing chat
# (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is
# distinct from the smaller models used for RAG/routing (haiku/sonnet), which
# are configured directly and are NOT affected by the helpers below.
#
# The whole per-service model story lives here on purpose, so there is one place
# to read what each service uses and how to override it. Nothing is configured
# in the service yamls.

# Default chat model for any service without its own entry below.
CHAT_MODEL_DEFAULT = CLAUDE_OPUS

# Per-service model config. `default` is the built-in choice; `env`, if set at
# runtime, overrides it for that service only (one env var per service, no
# global override). Services not listed (e.g. doc_agent_chat) use
# CHAT_MODEL_DEFAULT and have no runtime override.
CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = {
# workflow_chat forces JSON/YAML output via structured outputs; Sonnet
# handles that better than Opus today, so it defaults to Sonnet.
"workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"},
"job_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_JOB_CHAT_MODEL"},
"global_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_GLOBAL_CHAT_MODEL"},
}


def preferred_chat_model(service: str | None = None) -> str:
"""Resolve the main chat model for `service`.

Precedence: the service's env var if set, else its per-service default, else
CHAT_MODEL_DEFAULT. Each service's env var (e.g. APOLLO_WORKFLOW_CHAT_MODEL)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these comments are so so verbose. I think I need to start pushing back on them. The lightning codebase is probably more comment than code now.

Anyway this second sentence I don't like. It's repetitive, plus the "we can switch models without redeploying" thing is misleading. To change an env var you have to configure kubernetes and then restart the service.

It would be more accurate to say you can update it without a rebuild. But I wouldn't even say that at this level.

is optional and lets us switch that one service's live model without
redeploying.
"""
cfg = CHAT_SERVICE_MODELS.get(service, {})

env_name = cfg.get("env")
if env_name:
override = os.getenv(env_name)
if override:
return resolve_model(override)

return cfg.get("default", CHAT_MODEL_DEFAULT)
2 changes: 1 addition & 1 deletion services/streaming_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class StreamManager:
block lifecycle and index tracking.

Example usage:
manager = StreamManager(model=resolve_model("claude-fable"))
manager = StreamManager(model=resolve_model("claude-opus"))
manager.start_stream()
manager.send_thinking("Researching...")
manager.send_text("Here's what I found...")
Expand Down
6 changes: 3 additions & 3 deletions services/testing/judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@

from anthropic import Anthropic

from models import CLAUDE_FABLE
from models import CLAUDE_OPUS
from testing.judges import load_judge


DEFAULT_MODEL = CLAUDE_FABLE
DEFAULT_MODEL = CLAUDE_OPUS
DEFAULT_JUDGE = "general"


Expand Down Expand Up @@ -277,7 +277,7 @@ def evaluate(
guessing.
judge: Name of the judge (file at services/testing/judges/<name>.md).
Defaults to "general".
model: Model to use. Defaults to CLAUDE_FABLE from services/models.py.
model: Model to use. Defaults to CLAUDE_OPUS from services/models.py.
client: Optional Anthropic client. Constructed from env if not given.

Returns:
Expand Down
Empty file added services/tests/__init__.py
Empty file.
Empty file added services/tests/unit/__init__.py
Empty file.
42 changes: 42 additions & 0 deletions services/tests/unit/test_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Unit tests for the central chat-model selection in `services/models.py`.

No real model calls, pure resolution logic. The repo-root conftest marks
everything under a `unit/` dir as `unit` and blocks real client construction.
"""

import models as m
import pytest

_WORKFLOW_ENV = m.CHAT_SERVICE_MODELS["workflow_chat"]["env"]


@pytest.fixture(autouse=True)
def _clear_env(monkeypatch):
"""Clear all per-service overrides so the real environment can't skew tests."""
for cfg in m.CHAT_SERVICE_MODELS.values():
monkeypatch.delenv(cfg["env"], raising=False)


def test_unlisted_service_uses_default():
# A service with no entry (e.g. doc_agent_chat, or none at all) uses the default.
assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT
assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT


def test_per_service_defaults():
assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET
assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS
assert m.preferred_chat_model("global_chat") == m.CLAUDE_OPUS


def test_env_var_overrides_its_service_default(monkeypatch):
# Also proves the env value is alias-resolved ("claude-opus" -> full ID).
monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus")
assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS


def test_env_var_is_scoped_to_one_service(monkeypatch):
# Setting one service's var must not affect another service.
monkeypatch.setenv(_WORKFLOW_ENV, "claude-haiku")
assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_HAIKU
assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS # unaffected
3 changes: 2 additions & 1 deletion services/workflow_chat/gen_project_config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
config_version: 1.0
model: "claude-fable"
# The chat model is configured in services/models.py (the default plus the
# APOLLO_WORKFLOW_CHAT_MODEL env override), not here.
threshold: 0.7
top_k: 5
8 changes: 2 additions & 6 deletions services/workflow_chat/workflow_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,9 @@
from typing import List, Optional, Dict, Any
import yaml
from dataclasses import dataclass
from models import resolve_model
from models import preferred_chat_model

_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(_dir, "gen_project_config.yaml")) as _f:
_service_config = yaml.safe_load(_f)

_MODEL = resolve_model(_service_config.get("model", "claude-fable"))
_MODEL = preferred_chat_model("workflow_chat")

# JSON schema for structured outputs — guarantees valid JSON from the API
_OUTPUT_SCHEMA = {
Expand Down
Loading