diff --git a/.env.example b/.env.example
index 727672a7..4c13b535 100644
--- a/.env.example
+++ b/.env.example
@@ -10,6 +10,13 @@ LANGFUSE_SECRET_KEY=sk-lf-...
 LANGFUSE_PUBLIC_KEY=pk-lf-...
 LANGFUSE_BASE_URL=https://cloud.langfuse.com
 
+# Chat model overrides (optional, one per service). Defaults live in
+# services/models.py; set a var to switch that service's live model without
+# redeploying. Accepts an alias (claude-opus, claude-sonnet) or a full model ID.
+# APOLLO_GLOBAL_CHAT_MODEL=   # global_chat planner
+# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat
+# APOLLO_JOB_CHAT_MODEL=      # job_chat
+
 # HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base
 # ZILLIZ_URI =  https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX
 # ZILLIZ_TOKEN =db_admin:password (or ApiKey)
diff --git a/pyproject.toml b/pyproject.toml
index 91f181fa..fff1e25b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,6 +49,7 @@ testpaths = [
     "services/workflow_chat/tests",
     "services/job_chat/tests",
     "services/search_docsite/tests",
+    "services/tests",
     "services/tools",
 ]
 
diff --git a/services/doc_agent_chat/agent.py b/services/doc_agent_chat/agent.py
index d9e24856..2e06e437 100644
--- a/services/doc_agent_chat/agent.py
+++ b/services/doc_agent_chat/agent.py
@@ -7,7 +7,7 @@
 from doc_agent_chat.prompt import build_system_prompt
 from doc_agent_chat.tools import TOOL_DEFINITIONS, search_documents, format_search_results_as_documents
 from doc_agent_chat.config_loader import ConfigLoader
-from models import resolve_model
+from models import preferred_chat_model
 
 logger = create_logger("agent")
 
@@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None):
             raise ValueError("API key must be provided")
 
         self.client = Anthropic(api_key=self.api_key)
-        self.model = resolve_model(config.get("model", "claude-fable"))
+        self.model = preferred_chat_model("doc_agent_chat")
         self.max_tokens = config.get("max_tokens", 49152)
         self.max_tool_calls = config.get("max_tool_calls", 10)
         self.search_top_k = config.get("search_top_k", 5)
diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml
index 389bf4b0..85202aec 100644
--- a/services/doc_agent_chat/config.yaml
+++ b/services/doc_agent_chat/config.yaml
@@ -1,5 +1,6 @@
 config_version: 1.0
-model: claude-fable
+# The chat model is configured in services/models.py (the default; doc_agent has
+# no per-service env override), not here.
 max_tokens: 49152
 max_tool_calls: 10
 search_top_k: 5
diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml
index 376ea662..ef59fc80 100644
--- a/services/global_chat/config.yaml
+++ b/services/global_chat/config.yaml
@@ -7,7 +7,8 @@ router:
   temperature: 0.0
 
 # Planner configuration (complex orchestration)
+# The planner's chat model is configured in services/models.py (the default plus
+# the APOLLO_GLOBAL_CHAT_MODEL env override), not here.
 planner:
-  model: "claude-fable"
   max_tokens: 24576
   max_tool_calls: 10
diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py
index 4918aae3..9e806a91 100644
--- a/services/global_chat/planner.py
+++ b/services/global_chat/planner.py
@@ -24,7 +24,7 @@
     STATUS_PLANNING,
 )
 from global_chat.config_loader import ConfigLoader
-from models import resolve_model
+from models import preferred_chat_model
 from global_chat.tools.tool_definitions import TOOL_DEFINITIONS
 from global_chat.yaml_utils import stitch_job_code, redact_job_bodies, find_job_in_yaml
 from tools.search_documentation.search_documentation import search_documentation_tool
@@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None):
         self.tools = TOOL_DEFINITIONS
 
         planner_config = config_loader.config.get("planner", {})
-        self.model = resolve_model(planner_config.get("model", "claude-fable"))
+        self.model = preferred_chat_model("global_chat")
         self.max_tokens = planner_config.get("max_tokens", 24576)
         self.max_tool_calls = planner_config.get("max_tool_calls", 20)
 
diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py
index 7da4d13f..cc569590 100644
--- a/services/job_chat/job_chat.py
+++ b/services/job_chat/job_chat.py
@@ -1,7 +1,6 @@
 import os
 import json
 import re
-import yaml
 from typing import List, Optional, Dict, Any
 from dataclasses import dataclass
 import httpx
@@ -29,13 +28,9 @@
     STATUS_WORKING,
     STATUS_WRITING_CODE,
 )
-from models import resolve_model
+from models import preferred_chat_model
 
-_dir = os.path.dirname(os.path.abspath(__file__))
-with open(os.path.join(_dir, "rag.yaml")) as _f:
-    _service_config = yaml.safe_load(_f)
-
-_MODEL = resolve_model(_service_config.get("model", "claude-fable"))
+_MODEL = preferred_chat_model("job_chat")
 
 logger = create_logger("job_chat")
 
diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml
index 82546592..9d16daed 100644
--- a/services/job_chat/rag.yaml
+++ b/services/job_chat/rag.yaml
@@ -1,5 +1,7 @@
 config_version: 1.0
-model: "claude-fable"
+# The main chat model is configured in services/models.py (the default plus the
+# APOLLO_JOB_CHAT_MODEL env override), not here. The llm_* keys below are the
+# smaller RAG/retrieval models and are separate.
 llm_search_decision: "claude-sonnet"
 llm_retrieval: "claude-sonnet"
 threshold: 0.8
diff --git a/services/models.py b/services/models.py
index 10abd40f..e9777598 100644
--- a/services/models.py
+++ b/services/models.py
@@ -3,18 +3,15 @@
 Update values here to change models used across all services.
 """
 
+import os
+
 CLAUDE_MODELS: dict[str, str] = {
     "claude-opus":   "claude-opus-4-8",
-    # Fable rejects temperature/top_p/top_k and any explicit `thinking`
-    # config other than {"type": "adaptive"}; tokenizer yields ~30% more
-    # tokens than Sonnet/Opus for the same content.
-    "claude-fable":  "claude-fable-5",
     "claude-sonnet": "claude-sonnet-4-6",
     "claude-haiku":  "claude-haiku-4-5-20251001",
 }
 
 CLAUDE_OPUS:   str = CLAUDE_MODELS["claude-opus"]
-CLAUDE_FABLE:  str = CLAUDE_MODELS["claude-fable"]
 CLAUDE_SONNET: str = CLAUDE_MODELS["claude-sonnet"]
 CLAUDE_HAIKU:  str = CLAUDE_MODELS["claude-haiku"]
 
@@ -22,3 +19,49 @@
 def resolve_model(alias: str) -> str:
     """Resolve a model alias to its full ID. Passes through unknown strings unchanged."""
     return CLAUDE_MODELS.get(alias, alias)
+
+
+# --- Main chat model selection ----------------------------------------------
+#
+# The "main chat model" is the large model that drives user-facing chat
+# (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is
+# distinct from the smaller models used for RAG/routing (haiku/sonnet), which
+# are configured directly and are NOT affected by the helpers below.
+#
+# The whole per-service model story lives here on purpose, so there is one place
+# to read what each service uses and how to override it. Nothing is configured
+# in the service yamls.
+
+# Default chat model for any service without its own entry below.
+CHAT_MODEL_DEFAULT = CLAUDE_OPUS
+
+# Per-service model config. `default` is the built-in choice; `env`, if set at
+# runtime, overrides it for that service only (one env var per service, no
+# global override). Services not listed (e.g. doc_agent_chat) use
+# CHAT_MODEL_DEFAULT and have no runtime override.
+CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = {
+    # workflow_chat forces JSON/YAML output via structured outputs; Sonnet
+    # handles that better than Opus today, so it defaults to Sonnet.
+    "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"},
+    "job_chat":      {"default": CLAUDE_OPUS,   "env": "APOLLO_JOB_CHAT_MODEL"},
+    "global_chat":   {"default": CLAUDE_OPUS,   "env": "APOLLO_GLOBAL_CHAT_MODEL"},
+}
+
+
+def preferred_chat_model(service: str | None = None) -> str:
+    """Resolve the main chat model for `service`.
+
+    Precedence: the service's env var if set, else its per-service default, else
+    CHAT_MODEL_DEFAULT. Each service's env var (e.g. APOLLO_WORKFLOW_CHAT_MODEL)
+    is optional and lets us switch that one service's live model without
+    redeploying.
+    """
+    cfg = CHAT_SERVICE_MODELS.get(service, {})
+
+    env_name = cfg.get("env")
+    if env_name:
+        override = os.getenv(env_name)
+        if override:
+            return resolve_model(override)
+
+    return cfg.get("default", CHAT_MODEL_DEFAULT)
diff --git a/services/streaming_util.py b/services/streaming_util.py
index 7b6a4e78..045aa102 100644
--- a/services/streaming_util.py
+++ b/services/streaming_util.py
@@ -96,7 +96,7 @@ class StreamManager:
     block lifecycle and index tracking.
 
     Example usage:
-        manager = StreamManager(model=resolve_model("claude-fable"))
+        manager = StreamManager(model=resolve_model("claude-opus"))
         manager.start_stream()
         manager.send_thinking("Researching...")
         manager.send_text("Here's what I found...")
diff --git a/services/testing/judge.py b/services/testing/judge.py
index 747034d9..4600a76b 100644
--- a/services/testing/judge.py
+++ b/services/testing/judge.py
@@ -34,11 +34,11 @@
 
 from anthropic import Anthropic
 
-from models import CLAUDE_FABLE
+from models import CLAUDE_OPUS
 from testing.judges import load_judge
 
 
-DEFAULT_MODEL = CLAUDE_FABLE
+DEFAULT_MODEL = CLAUDE_OPUS
 DEFAULT_JUDGE = "general"
 
 
@@ -277,7 +277,7 @@ def evaluate(
             guessing.
         judge: Name of the judge (file at services/testing/judges/<name>.md).
             Defaults to "general".
-        model: Model to use. Defaults to CLAUDE_FABLE from services/models.py.
+        model: Model to use. Defaults to CLAUDE_OPUS from services/models.py.
         client: Optional Anthropic client. Constructed from env if not given.
 
     Returns:
diff --git a/services/tests/__init__.py b/services/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/services/tests/unit/__init__.py b/services/tests/unit/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py
new file mode 100644
index 00000000..b7639a22
--- /dev/null
+++ b/services/tests/unit/test_models.py
@@ -0,0 +1,42 @@
+"""Unit tests for the central chat-model selection in `services/models.py`.
+
+No real model calls, pure resolution logic. The repo-root conftest marks
+everything under a `unit/` dir as `unit` and blocks real client construction.
+"""
+
+import models as m
+import pytest
+
+_WORKFLOW_ENV = m.CHAT_SERVICE_MODELS["workflow_chat"]["env"]
+
+
+@pytest.fixture(autouse=True)
+def _clear_env(monkeypatch):
+    """Clear all per-service overrides so the real environment can't skew tests."""
+    for cfg in m.CHAT_SERVICE_MODELS.values():
+        monkeypatch.delenv(cfg["env"], raising=False)
+
+
+def test_unlisted_service_uses_default():
+    # A service with no entry (e.g. doc_agent_chat, or none at all) uses the default.
+    assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT
+    assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT
+
+
+def test_per_service_defaults():
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET
+    assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS
+    assert m.preferred_chat_model("global_chat") == m.CLAUDE_OPUS
+
+
+def test_env_var_overrides_its_service_default(monkeypatch):
+    # Also proves the env value is alias-resolved ("claude-opus" -> full ID).
+    monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus")
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS
+
+
+def test_env_var_is_scoped_to_one_service(monkeypatch):
+    # Setting one service's var must not affect another service.
+    monkeypatch.setenv(_WORKFLOW_ENV, "claude-haiku")
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_HAIKU
+    assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS  # unaffected
diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml
index a88d652d..2cead7fd 100644
--- a/services/workflow_chat/gen_project_config.yaml
+++ b/services/workflow_chat/gen_project_config.yaml
@@ -1,4 +1,5 @@
 config_version: 1.0
-model: "claude-fable"
+# The chat model is configured in services/models.py (the default plus the
+# APOLLO_WORKFLOW_CHAT_MODEL env override), not here.
 threshold: 0.7
 top_k: 5
diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py
index 08571bb6..281a2af0 100644
--- a/services/workflow_chat/workflow_chat.py
+++ b/services/workflow_chat/workflow_chat.py
@@ -6,13 +6,9 @@
 from typing import List, Optional, Dict, Any
 import yaml
 from dataclasses import dataclass
-from models import resolve_model
+from models import preferred_chat_model
 
-_dir = os.path.dirname(os.path.abspath(__file__))
-with open(os.path.join(_dir, "gen_project_config.yaml")) as _f:
-    _service_config = yaml.safe_load(_f)
-
-_MODEL = resolve_model(_service_config.get("model", "claude-fable"))
+_MODEL = preferred_chat_model("workflow_chat")
 
 # JSON schema for structured outputs — guarantees valid JSON from the API
 _OUTPUT_SCHEMA = {