Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/twelve-lemons-tan.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"apollo": minor
---

upgrade to opus in planner and job chat
5 changes: 2 additions & 3 deletions services/global_chat/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ router:

# Planner configuration (complex orchestration)
planner:
model: "claude-sonnet"
max_tokens: 8192
temperature: 1.0
model: "claude-opus"
max_tokens: 24576
max_tool_calls: 10
13 changes: 9 additions & 4 deletions services/global_chat/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import List, Dict, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
import httpx
from anthropic import Anthropic
import sentry_sdk

Expand Down Expand Up @@ -59,9 +60,8 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None):
self.tools = TOOL_DEFINITIONS

planner_config = config_loader.config.get("planner", {})
self.model = resolve_model(planner_config.get("model", "claude-sonnet"))
self.max_tokens = planner_config.get("max_tokens", 8192)
self.temperature = planner_config.get("temperature", 1.0)
self.model = resolve_model(planner_config.get("model", "claude-opus"))
self.max_tokens = planner_config.get("max_tokens", 24576)
self.max_tool_calls = planner_config.get("max_tool_calls", 20)

self.current_yaml: Optional[str] = None
Expand Down Expand Up @@ -285,6 +285,7 @@ def _call_api(self, system_prompt, messages, stream):
messages=messages,
tools=self.tools,
thinking={"type": "adaptive"},
output_config={"effort": "medium"},
) as stream_obj:
for event in stream_obj:
if event.type == "content_block_delta":
Expand All @@ -299,7 +300,11 @@ def _call_api(self, system_prompt, messages, stream):
messages=messages,
tools=self.tools,
thinking={"type": "adaptive"},
output_config={"effort": "high"},
output_config={"effort": "medium"},
# Per-request timeout (same values as the SDK default):
# required for non-streaming calls with max_tokens > ~21k,
# which the SDK otherwise rejects.
timeout=httpx.Timeout(600.0, connect=5.0),
betas=["context-management-2025-06-27"],
context_management={
"edits": [
Expand Down
7 changes: 6 additions & 1 deletion services/job_chat/job_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import yaml
from typing import List, Optional, Dict, Any
from dataclasses import dataclass
import httpx
from anthropic import (
Anthropic,
APIConnectionError,
Expand Down Expand Up @@ -138,7 +139,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Payload":
@dataclass
class ChatConfig:
model: str = _MODEL
max_tokens: int = 16384
max_tokens: int = 24576
api_key: Optional[str] = None


Expand Down Expand Up @@ -288,6 +289,10 @@ def generate(
max_tokens=self.config.max_tokens, messages=prompt, model=self.config.model, system=system_message,
thinking={"type": "adaptive"},
output_config=output_config,
# Per-request timeout (same values as the SDK default):
# required for non-streaming calls with max_tokens > ~21k,
# which the SDK otherwise rejects.
timeout=httpx.Timeout(600.0, connect=5.0),
**tool_kwargs
)
message = self.client.messages.create(**create_kwargs)
Expand Down
2 changes: 1 addition & 1 deletion services/job_chat/rag.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
config_version: 1.0
model: "claude-sonnet"
model: "claude-opus"
llm_search_decision: "claude-sonnet"
llm_retrieval: "claude-sonnet"
threshold: 0.8
Expand Down