-
Notifications
You must be signed in to change notification settings - Fork 5
Use Opus and environment variables for model selection #528
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| config_version: 1.0 | ||
| model: claude-fable | ||
| # The chat model is configured in services/models.py (the default; doc_agent has | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this comment doesn't make sense in isolation: it only makes sense if you know that the model used to be set in config. It's confusing and we should remove it. Same for the other models.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't actually love it 😬 Intuitively it feels that this config should be defaults for all values, and env vars can be used to override it(somehow, it's not entirely practical!) The split now of some things being envs and some things being config values feels confusing, rigid and arbitrary |
||
| # no per-service env override), not here. | ||
| max_tokens: 49152 | ||
| max_tool_calls: 10 | ||
| search_top_k: 5 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,22 +3,65 @@ | |
| Update values here to change models used across all services. | ||
| """ | ||
|
|
||
| import os | ||
|
|
||
| CLAUDE_MODELS: dict[str, str] = { | ||
| "claude-opus": "claude-opus-4-8", | ||
| # Fable rejects temperature/top_p/top_k and any explicit `thinking` | ||
| # config other than {"type": "adaptive"}; tokenizer yields ~30% more | ||
| # tokens than Sonnet/Opus for the same content. | ||
| "claude-fable": "claude-fable-5", | ||
| "claude-sonnet": "claude-sonnet-4-6", | ||
| "claude-haiku": "claude-haiku-4-5-20251001", | ||
| } | ||
|
|
||
| CLAUDE_OPUS: str = CLAUDE_MODELS["claude-opus"] | ||
| CLAUDE_FABLE: str = CLAUDE_MODELS["claude-fable"] | ||
| CLAUDE_SONNET: str = CLAUDE_MODELS["claude-sonnet"] | ||
| CLAUDE_HAIKU: str = CLAUDE_MODELS["claude-haiku"] | ||
|
|
||
|
|
||
| def resolve_model(alias: str) -> str: | ||
| """Resolve a model alias to its full ID. Passes through unknown strings unchanged.""" | ||
| return CLAUDE_MODELS.get(alias, alias) | ||
|
|
||
|
|
||
| # --- Main chat model selection ---------------------------------------------- | ||
| # | ||
| # The "main chat model" is the large model that drives user-facing chat | ||
| # (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is | ||
| # distinct from the smaller models used for RAG/routing (haiku/sonnet), which | ||
| # are configured directly and are NOT affected by the helpers below. | ||
| # | ||
| # The whole per-service model story lives here on purpose, so there is one place | ||
| # to read what each service uses and how to override it. Nothing is configured | ||
| # in the service yamls. | ||
|
|
||
| # Default chat model for any service without its own entry below. | ||
| CHAT_MODEL_DEFAULT = CLAUDE_OPUS | ||
|
|
||
| # Per-service model config. `default` is the built-in choice; `env`, if set at | ||
| # runtime, overrides it for that service only (one env var per service, no | ||
| # global override). Services not listed (e.g. doc_agent_chat) use | ||
| # CHAT_MODEL_DEFAULT and have no runtime override. | ||
| CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = { | ||
| # workflow_chat forces JSON/YAML output via structured outputs; Sonnet | ||
| # handles that better than Opus today, so it defaults to Sonnet. | ||
| "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"}, | ||
| "job_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_JOB_CHAT_MODEL"}, | ||
| "global_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_GLOBAL_CHAT_MODEL"}, | ||
| } | ||
|
|
||
|
|
||
| def preferred_chat_model(service: str | None = None) -> str: | ||
| """Resolve the main chat model for `service`. | ||
|
|
||
| Precedence: the service's env var if set, else its per-service default, else | ||
| CHAT_MODEL_DEFAULT. Each service's env var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these comments are so so verbose. I think I need to start pushing back on them. The lightning codebase is probably more comment than code now. Anyway this second sentence I don't like. It's repetitive, plus the "we can switch models without redeploying" thing is misleading. To change an env var you have to configure kubernetes and then restart the service. It would be more accurate to say you can update it without a rebuild. But I wouldn't even say that at this level. |
||
| is optional and lets us switch that one service's live model without | ||
| redeploying. | ||
| """ | ||
| cfg = CHAT_SERVICE_MODELS.get(service, {}) | ||
|
|
||
| env_name = cfg.get("env") | ||
| if env_name: | ||
| override = os.getenv(env_name) | ||
| if override: | ||
| return resolve_model(override) | ||
|
|
||
| return cfg.get("default", CHAT_MODEL_DEFAULT) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| """Unit tests for the central chat-model selection in `services/models.py`. | ||
|
|
||
| No real model calls, pure resolution logic. The repo-root conftest marks | ||
| everything under a `unit/` dir as `unit` and blocks real client construction. | ||
| """ | ||
|
|
||
| import models as m | ||
| import pytest | ||
|
|
||
| _WORKFLOW_ENV = m.CHAT_SERVICE_MODELS["workflow_chat"]["env"] | ||
|
|
||
|
|
||
| @pytest.fixture(autouse=True) | ||
| def _clear_env(monkeypatch): | ||
| """Clear all per-service overrides so the real environment can't skew tests.""" | ||
| for cfg in m.CHAT_SERVICE_MODELS.values(): | ||
| monkeypatch.delenv(cfg["env"], raising=False) | ||
|
|
||
|
|
||
| def test_unlisted_service_uses_default(): | ||
| # A service with no entry (e.g. doc_agent_chat, or none at all) uses the default. | ||
| assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT | ||
| assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT | ||
|
|
||
|
|
||
| def test_per_service_defaults(): | ||
| assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET | ||
| assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS | ||
| assert m.preferred_chat_model("global_chat") == m.CLAUDE_OPUS | ||
|
|
||
|
|
||
| def test_env_var_overrides_its_service_default(monkeypatch): | ||
| # Also proves the env value is alias-resolved ("claude-opus" -> full ID). | ||
| monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus") | ||
| assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS | ||
|
|
||
|
|
||
| def test_env_var_is_scoped_to_one_service(monkeypatch): | ||
| # Setting one service's var must not affect another service. | ||
| monkeypatch.setenv(_WORKFLOW_ENV, "claude-haiku") | ||
| assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_HAIKU | ||
| assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS # unaffected |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| config_version: 1.0 | ||
| model: "claude-fable" | ||
| # The chat model is configured in services/models.py (the default plus the | ||
| # APOLLO_WORKFLOW_CHAT_MODEL env override), not here. | ||
| threshold: 0.7 | ||
| top_k: 5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These sample env vars don't make sense do that? What does
job_chatresolve to?