From 0d0a26360cdf559a59b1af4e5b839e3614210fc8 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 18 Jun 2026 20:43:24 -0700 Subject: [PATCH 1/4] feat: attribute outbound Databricks requests to AppKit consistently Only the service and user WorkspaceClients carried the AppKit User-Agent (@databricks/appkit/). The cache client sent unknown/0.0.0, and the two raw-fetch sites (files upload, MCP connector) sent no AppKit User-Agent at all, since they bypass the SDK apiClient. Extract getClientOptions() into a shared context/client-options module (adding APPKIT_USER_AGENT for fetch sites with no apiClient), pass it to the cache client, and stamp the User-Agent on the files upload fetch (via apiClient.userAgent()) and both MCP fetches. Signed-off-by: MarioCadenas --- packages/appkit/src/cache/index.ts | 3 +- .../appkit/src/connectors/files/client.ts | 3 ++ .../src/connectors/files/tests/client.test.ts | 12 +++++++ packages/appkit/src/connectors/mcp/client.ts | 7 ++++ .../src/connectors/mcp/tests/client.test.ts | 4 +++ packages/appkit/src/context/client-options.ts | 34 +++++++++++++++++++ .../appkit/src/context/service-context.ts | 19 +---------- 7 files changed, 63 insertions(+), 19 deletions(-) create mode 100644 packages/appkit/src/context/client-options.ts diff --git a/packages/appkit/src/cache/index.ts b/packages/appkit/src/cache/index.ts index 7a948a822..641b59fac 100644 --- a/packages/appkit/src/cache/index.ts +++ b/packages/appkit/src/cache/index.ts @@ -2,6 +2,7 @@ import { createHash } from "node:crypto"; import { ApiError, WorkspaceClient } from "@databricks/sdk-experimental"; import type { CacheConfig, CacheEntry, CacheStorage } from "shared"; import { createLakebasePool } from "../connectors/lakebase"; +import { getClientOptions } from "../context/client-options"; import { AppKitError, ExecutionError, InitializationError } from "../errors"; import { createLogger } from "../logging/logger"; import type { Counter, TelemetryProvider } from "../telemetry"; @@ -170,7 +171,7 @@ export class CacheManager { // try to use lakebase storage try { - const workspaceClient = new WorkspaceClient({}); + const workspaceClient = new WorkspaceClient({}, getClientOptions()); const pool = createLakebasePool({ workspaceClient }); const persistentStorage = new PersistentStorage(config, pool); diff --git a/packages/appkit/src/connectors/files/client.ts b/packages/appkit/src/connectors/files/client.ts index 93203fdb6..78b3b2de3 100644 --- a/packages/appkit/src/connectors/files/client.ts +++ b/packages/appkit/src/connectors/files/client.ts @@ -311,6 +311,9 @@ export class FilesConnector { const headers = new Headers({ "Content-Type": "application/octet-stream", + // This raw fetch bypasses apiClient, which would otherwise stamp the + // User-Agent; set it explicitly so the upload is attributed to AppKit. + "User-Agent": client.apiClient.userAgent(), }); const fetchOptions: RequestInit = { method: "PUT", headers, body }; diff --git a/packages/appkit/src/connectors/files/tests/client.test.ts b/packages/appkit/src/connectors/files/tests/client.test.ts index e7a4264c3..560764703 100644 --- a/packages/appkit/src/connectors/files/tests/client.test.ts +++ b/packages/appkit/src/connectors/files/tests/client.test.ts @@ -20,9 +20,13 @@ const { mockFilesApi, mockConfig, mockClient, MockApiError } = vi.hoisted( authenticate: vi.fn(), }; + const mockApiClient = { + userAgent: vi.fn(() => "@databricks/appkit/9.9.9"), + }; const mockClient = { files: mockFilesApi, config: mockConfig, + apiClient: mockApiClient, } as unknown as WorkspaceClient; class MockApiError extends Error { @@ -538,6 +542,14 @@ describe("FilesConnector", () => { expect(mockConfig.authenticate).toHaveBeenCalledWith(expect.any(Headers)); }); + test("stamps the AppKit User-Agent from the SDK apiClient", async () => { + await connector.upload(mockClient, "file.txt", "data"); + + const init = fetchSpy.mock.calls[0][1] as RequestInit; + const headers = init.headers as Headers; + expect(headers.get("User-Agent")).toBe("@databricks/appkit/9.9.9"); + }); + test("builds URL from client.config.host", async () => { await connector.upload(mockClient, "file.txt", "data"); diff --git a/packages/appkit/src/connectors/mcp/client.ts b/packages/appkit/src/connectors/mcp/client.ts index 5b80997c9..75baf531c 100644 --- a/packages/appkit/src/connectors/mcp/client.ts +++ b/packages/appkit/src/connectors/mcp/client.ts @@ -23,6 +23,7 @@ * transport. */ import type { AgentToolDefinition } from "shared"; +import { APPKIT_USER_AGENT } from "../../context/client-options"; import { createLogger } from "../../logging/logger"; import { assertResolvedHostSafe, @@ -423,6 +424,9 @@ export class AppKitMcpClient { const authHeaders = await this.resolveAuthHeaders(options); const headers: Record = { + // Raw fetch bypasses the SDK's apiClient; stamp the AppKit User-Agent so + // MCP traffic is attributed to AppKit. + "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", Accept: "application/json, text/event-stream", ...authHeaders, @@ -497,6 +501,9 @@ export class AppKitMcpClient { const authHeaders = await this.resolveAuthHeaders(options); const headers: Record = { + // Raw fetch bypasses the SDK's apiClient; stamp the AppKit User-Agent so + // MCP traffic is attributed to AppKit. + "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", Accept: "application/json, text/event-stream", ...authHeaders, diff --git a/packages/appkit/src/connectors/mcp/tests/client.test.ts b/packages/appkit/src/connectors/mcp/tests/client.test.ts index 839d14f95..b6e1dd849 100644 --- a/packages/appkit/src/connectors/mcp/tests/client.test.ts +++ b/packages/appkit/src/connectors/mcp/tests/client.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, test, vi } from "vitest"; +import { APPKIT_USER_AGENT } from "../../../context/client-options"; import { AppKitMcpClient } from "../client"; import type { DnsLookup, McpHostPolicy } from "../host-policy"; @@ -143,7 +144,10 @@ describe("AppKitMcpClient — host allowlist", () => { for (const call of calls) { const headers = call.init.headers as Record; expect(headers.Authorization).toBe("Bearer SP-TOKEN"); + // Every MCP request is attributed to AppKit via User-Agent. + expect(headers["User-Agent"]).toBe(APPKIT_USER_AGENT); } + expect(APPKIT_USER_AGENT).toMatch(/^@databricks\/appkit\//); expect(client.canForwardWorkspaceAuth("genie-1")).toBe(true); }); diff --git a/packages/appkit/src/context/client-options.ts b/packages/appkit/src/context/client-options.ts new file mode 100644 index 000000000..0e41daa25 --- /dev/null +++ b/packages/appkit/src/context/client-options.ts @@ -0,0 +1,34 @@ +import type { ClientOptions } from "@databricks/sdk-experimental"; +import { coerce } from "semver"; +import { + name as productName, + version as productVersion, +} from "../../package.json"; + +/** + * SDK client options that stamp every `apiClient.request()` with an AppKit + * User-Agent (`@databricks/appkit/`), so outbound Databricks traffic + * is attributable to AppKit. Use this for every `WorkspaceClient` AppKit + * constructs at runtime. + */ +export function getClientOptions(): ClientOptions { + const isDev = process.env.NODE_ENV === "development"; + const semver = coerce(productVersion); + const normalizedVersion = (semver?.version ?? + productVersion) as ClientOptions["productVersion"]; + + return { + product: productName, + productVersion: normalizedVersion, + ...(isDev && { userAgentExtra: { mode: "dev" } }), + }; +} + +/** + * Product/version User-Agent string matching the SDK stamp, for raw `fetch` + * call sites that bypass the SDK's `apiClient` and have no client to derive it + * from (e.g. the MCP connector). + */ +export const APPKIT_USER_AGENT = `${productName}/${ + coerce(productVersion)?.version ?? productVersion +}`; diff --git a/packages/appkit/src/context/service-context.ts b/packages/appkit/src/context/service-context.ts index fa2f9c3ef..b3c13f422 100644 --- a/packages/appkit/src/context/service-context.ts +++ b/packages/appkit/src/context/service-context.ts @@ -5,16 +5,12 @@ import { type sql, WorkspaceClient, } from "@databricks/sdk-experimental"; -import { coerce } from "semver"; -import { - name as productName, - version as productVersion, -} from "../../package.json"; import { AuthenticationError, ConfigurationError, InitializationError, } from "../errors"; +import { getClientOptions } from "./client-options"; import type { UserContext } from "./user-context"; /** @@ -32,19 +28,6 @@ export interface ServiceContextState { workspaceId: Promise; } -function getClientOptions(): ClientOptions { - const isDev = process.env.NODE_ENV === "development"; - const semver = coerce(productVersion); - const normalizedVersion = (semver?.version ?? - productVersion) as ClientOptions["productVersion"]; - - return { - product: productName, - productVersion: normalizedVersion, - ...(isDev && { userAgentExtra: { mode: "dev" } }), - }; -} - /** * ServiceContext is a singleton that manages the service principal's * WorkspaceClient and shared resources like warehouse/workspace IDs. From a792917d1d287ec77f8eafd1a4db36d391ec90a7 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 1 Jul 2026 11:21:35 +0200 Subject: [PATCH 2/4] fix: attribute agent serving + lakebase SP requests to AppKit Close two remaining outbound-request attribution gaps and fix the OBO upload test mock: - agents: fromModelServing() auto-created WorkspaceClient now gets getClientOptions(); the adapter raw-fetch path stamps User-Agent. - lakebase: inject an attributed SP WorkspaceClient into the pool config so the username lookup and OAuth credential generation are attributed (OBO already used the context client). - files plugin test: add apiClient.userAgent to the OBO upload userClient mock so upload reaches authenticate() (fixes the failing unit test). Signed-off-by: MarioCadenas --- packages/appkit/src/agents/databricks.ts | 3 +++ packages/appkit/src/connectors/files/client.ts | 2 -- packages/appkit/src/connectors/mcp/client.ts | 4 ---- packages/appkit/src/plugins/files/tests/plugin.test.ts | 7 +++++-- packages/appkit/src/plugins/lakebase/lakebase.ts | 9 ++++++++- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/packages/appkit/src/agents/databricks.ts b/packages/appkit/src/agents/databricks.ts index 6e2e78d60..83f89a59b 100644 --- a/packages/appkit/src/agents/databricks.ts +++ b/packages/appkit/src/agents/databricks.ts @@ -6,6 +6,7 @@ import type { AgentToolDefinition, } from "shared"; import { stream as servingStream } from "../connectors/serving/client"; +import { APPKIT_USER_AGENT, getClientOptions } from "../context/client-options"; /** Default cap for a single incomplete SSE line tail (DoS guard). */ const DEFAULT_MAX_SSE_LINE_CHARS = 1024 * 1024; @@ -262,6 +263,7 @@ export class DatabricksAdapter implements AgentAdapter { const response = await fetch(endpointUrl, { method: "POST", headers: { + "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", ...authHeaders, }, @@ -359,6 +361,7 @@ export class DatabricksAdapter implements AgentAdapter { const sdk = await import("@databricks/sdk-experimental"); workspaceClient = new sdk.WorkspaceClient( {}, + getClientOptions(), ) as unknown as WorkspaceClientLike; } diff --git a/packages/appkit/src/connectors/files/client.ts b/packages/appkit/src/connectors/files/client.ts index 78b3b2de3..ddb7ecdec 100644 --- a/packages/appkit/src/connectors/files/client.ts +++ b/packages/appkit/src/connectors/files/client.ts @@ -311,8 +311,6 @@ export class FilesConnector { const headers = new Headers({ "Content-Type": "application/octet-stream", - // This raw fetch bypasses apiClient, which would otherwise stamp the - // User-Agent; set it explicitly so the upload is attributed to AppKit. "User-Agent": client.apiClient.userAgent(), }); const fetchOptions: RequestInit = { method: "PUT", headers, body }; diff --git a/packages/appkit/src/connectors/mcp/client.ts b/packages/appkit/src/connectors/mcp/client.ts index 75baf531c..cbdc82d58 100644 --- a/packages/appkit/src/connectors/mcp/client.ts +++ b/packages/appkit/src/connectors/mcp/client.ts @@ -424,8 +424,6 @@ export class AppKitMcpClient { const authHeaders = await this.resolveAuthHeaders(options); const headers: Record = { - // Raw fetch bypasses the SDK's apiClient; stamp the AppKit User-Agent so - // MCP traffic is attributed to AppKit. "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", Accept: "application/json, text/event-stream", @@ -501,8 +499,6 @@ export class AppKitMcpClient { const authHeaders = await this.resolveAuthHeaders(options); const headers: Record = { - // Raw fetch bypasses the SDK's apiClient; stamp the AppKit User-Agent so - // MCP traffic is attributed to AppKit. "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", Accept: "application/json, text/event-stream", diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index 3136a21c0..a90c4f3ee 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -2732,8 +2732,11 @@ describe("FilesPlugin", () => { }), }, // `_handleUpload` only routes through the connector's `upload()`, - // which uses host + authenticate + fetch. No `files.*` accessor is - // touched on the user client during this path. + // which uses host + authenticate + apiClient.userAgent() + fetch. No + // `files.*` accessor is touched on the user client during this path. + apiClient: { + userAgent: vi.fn(() => "@databricks/appkit/9.9.9"), + }, }; // Wire `_buildUserContextOrNull → ServiceContext.createUserContext` to diff --git a/packages/appkit/src/plugins/lakebase/lakebase.ts b/packages/appkit/src/plugins/lakebase/lakebase.ts index b8b1b16be..9fac029a6 100644 --- a/packages/appkit/src/plugins/lakebase/lakebase.ts +++ b/packages/appkit/src/plugins/lakebase/lakebase.ts @@ -1,3 +1,4 @@ +import { WorkspaceClient } from "@databricks/sdk-experimental"; import type { QueryResult, QueryResultRow } from "pg"; import type { AgentToolDefinition, ToolProvider } from "shared"; import { z } from "zod"; @@ -11,6 +12,7 @@ import { type LakebasePoolManager, RoutingPool, } from "../../connectors/lakebase"; +import { getClientOptions } from "../../context/client-options"; import { getUserContext } from "../../context/execution-context"; import { buildToolkitEntries } from "../../core/agent/build-toolkit"; import { @@ -78,7 +80,12 @@ export class LakebasePlugin extends Plugin implements ToolProvider { * context (set by `Plugin.asUser(req)` via AsyncLocalStorage). */ async setup() { - const poolConfig = this.config.pool; + const poolConfig = { + ...this.config.pool, + workspaceClient: + this.config.pool?.workspaceClient ?? + new WorkspaceClient({}, getClientOptions()), + }; const user = await getUsernameWithApiLookup(poolConfig); const spPool = createLakebasePool({ ...poolConfig, user }); From 3cbf5f300f3329de2f42a5376bfdb16050d3eccd Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 1 Jul 2026 12:25:45 +0200 Subject: [PATCH 3/4] test: verify AppKit User-Agent on the wire (SDK + raw-fetch paths) In-process http.Server captures the User-Agent from a real WorkspaceClient (SDK apiClient.request) and the real raw-fetch upload connector, asserting both carry @databricks/appkit/ plus the SDK segments, and that dev mode adds mode/dev. Closes the one gap mocks can't cover: that the SDK actually turns getClientOptions() into the wire header. Signed-off-by: MarioCadenas --- .../tests/user-agent-wire.integration.test.ts | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 packages/appkit/src/context/tests/user-agent-wire.integration.test.ts diff --git a/packages/appkit/src/context/tests/user-agent-wire.integration.test.ts b/packages/appkit/src/context/tests/user-agent-wire.integration.test.ts new file mode 100644 index 000000000..0c5315310 --- /dev/null +++ b/packages/appkit/src/context/tests/user-agent-wire.integration.test.ts @@ -0,0 +1,106 @@ +import http, { type Server } from "node:http"; +import { WorkspaceClient } from "@databricks/sdk-experimental"; +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + test, +} from "vitest"; +import { FilesConnector } from "../../connectors/files/client"; +import { getClientOptions } from "../client-options"; + +/** + * Wire-level attribution check. Everything else asserts against mocks; this + * points a REAL `WorkspaceClient` (SDK path) and the REAL raw-`fetch` upload + * connector at an in-process HTTP server and inspects the User-Agent that + * actually leaves the process — the one contract mocks can't prove, since the + * SDK is what turns `getClientOptions()` into the wire header. + */ +describe("User-Agent attribution (wire)", () => { + let server: Server; + let host: string; + let capturedUserAgents: string[]; + + const newClient = () => + new WorkspaceClient( + { host, token: "test-token", authType: "pat" }, + getClientOptions(), + ); + + beforeAll(async () => { + server = http.createServer((req, res) => { + capturedUserAgents.push(req.headers["user-agent"] ?? ""); + req.resume(); // drain the body so the socket can complete + req.on("end", () => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end("{}"); + }); + }); + await new Promise((resolve) => + server.listen(0, "127.0.0.1", resolve), + ); + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("Failed to bind test server"); + } + host = `http://127.0.0.1:${address.port}`; + }); + + afterAll(async () => { + server.closeAllConnections(); + await new Promise((resolve) => server.close(() => resolve())); + }); + + beforeEach(() => { + capturedUserAgents = []; + }); + + test("SDK apiClient.request() stamps the AppKit User-Agent on the wire", async () => { + await newClient().apiClient.request({ + path: "/api/2.0/preview/scim/v2/Me", + method: "GET", + headers: new Headers(), + raw: false, + }); + + expect(capturedUserAgents).toHaveLength(1); + const ua = capturedUserAgents[0]; + // AppKit product first, and the real SDK segments after it — proving this + // is the SDK-composed User-Agent, not a value a mock stubbed in. + expect(ua).toMatch(/^@databricks\/appkit\/\d+\.\d+\.\d+ /); + expect(ua).toContain("databricks-sdk-js/"); + }); + + test("raw-fetch upload path stamps the AppKit User-Agent on the wire", async () => { + const connector = new FilesConnector({}); + await connector.upload( + newClient(), + "/Volumes/catalog/schema/vol/wire-test.bin", + "hello", + ); + + expect(capturedUserAgents).toHaveLength(1); + expect(capturedUserAgents[0]).toMatch( + /^@databricks\/appkit\/\d+\.\d+\.\d+/, + ); + }); + + test("dev mode adds the mode/dev User-Agent segment", async () => { + const prev = process.env.NODE_ENV; + process.env.NODE_ENV = "development"; + try { + await newClient().apiClient.request({ + path: "/api/2.0/preview/scim/v2/Me", + method: "GET", + headers: new Headers(), + raw: false, + }); + } finally { + process.env.NODE_ENV = prev; + } + + expect(capturedUserAgents[0]).toContain("mode/dev"); + }); +}); From 52d984bde9865e334fd133d9773105fc66575d09 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 1 Jul 2026 13:09:27 +0200 Subject: [PATCH 4/4] refactor: dedupe UA version coercion and drop tautological test assertion Compute the normalized product version once in client-options.ts and reuse it for both getClientOptions() and APPKIT_USER_AGENT. Remove the redundant APPKIT_USER_AGENT format assertion in the MCP client test, which duplicated the header equality check above it. Signed-off-by: MarioCadenas --- .../appkit/src/connectors/mcp/tests/client.test.ts | 1 - packages/appkit/src/context/client-options.ts | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/packages/appkit/src/connectors/mcp/tests/client.test.ts b/packages/appkit/src/connectors/mcp/tests/client.test.ts index b6e1dd849..1835d90f3 100644 --- a/packages/appkit/src/connectors/mcp/tests/client.test.ts +++ b/packages/appkit/src/connectors/mcp/tests/client.test.ts @@ -147,7 +147,6 @@ describe("AppKitMcpClient — host allowlist", () => { // Every MCP request is attributed to AppKit via User-Agent. expect(headers["User-Agent"]).toBe(APPKIT_USER_AGENT); } - expect(APPKIT_USER_AGENT).toMatch(/^@databricks\/appkit\//); expect(client.canForwardWorkspaceAuth("genie-1")).toBe(true); }); diff --git a/packages/appkit/src/context/client-options.ts b/packages/appkit/src/context/client-options.ts index 0e41daa25..673984bb2 100644 --- a/packages/appkit/src/context/client-options.ts +++ b/packages/appkit/src/context/client-options.ts @@ -5,6 +5,9 @@ import { version as productVersion, } from "../../package.json"; +const normalizedVersion = (coerce(productVersion)?.version ?? + productVersion) as ClientOptions["productVersion"]; + /** * SDK client options that stamp every `apiClient.request()` with an AppKit * User-Agent (`@databricks/appkit/`), so outbound Databricks traffic @@ -13,9 +16,6 @@ import { */ export function getClientOptions(): ClientOptions { const isDev = process.env.NODE_ENV === "development"; - const semver = coerce(productVersion); - const normalizedVersion = (semver?.version ?? - productVersion) as ClientOptions["productVersion"]; return { product: productName, @@ -29,6 +29,4 @@ export function getClientOptions(): ClientOptions { * call sites that bypass the SDK's `apiClient` and have no client to derive it * from (e.g. the MCP connector). */ -export const APPKIT_USER_AGENT = `${productName}/${ - coerce(productVersion)?.version ?? productVersion -}`; +export const APPKIT_USER_AGENT = `${productName}/${normalizedVersion}`;