diff --git a/api/doc/agg-query/schema.json b/api/doc/agg-query/schema.json index 14db247..9056518 100644 --- a/api/doc/agg-query/schema.json +++ b/api/doc/agg-query/schema.json @@ -39,11 +39,16 @@ {"type": "array", "items": {"type": "string"}, "minItems": 1} ] }, + "refererCategory": { + "oneOf": [ + {"type": "string"}, + {"type": "array", "items": {"type": "string"}, "minItems": 1} + ] + }, "topicId": {"type": "string"}, - "processingId": {"type": "string"}, "split": { "type": "array", - "items": { "type": "string", "enum": ["day", "refererApp", "processing", "resource", "refererDomain", "userClass"] } + "items": { "type": "string", "enum": ["day", "resource", "userClass", "refererApp", "refererDomain", "refererCategory"] } } } } \ No newline at end of file diff --git a/api/doc/agg-result/schema.json b/api/doc/agg-result/schema.json index 3583f03..8273dd9 100644 --- a/api/doc/agg-result/schema.json +++ b/api/doc/agg-result/schema.json @@ -35,9 +35,9 @@ "additionalProperties": false, "properties": { "refererApp": {"type": "string"}, - "processing": {"$ref": "https://github.com/data-fair/metrics/daily-api-metric#/$defs/processing"}, "resource": {"$ref": "https://github.com/data-fair/metrics/daily-api-metric#/$defs/resourceDef"}, "refererDomain": {"type": "string"}, + "refererCategory": {"type": "string"}, "userClass": {"$ref": "https://github.com/data-fair/metrics/daily-api-metric#/$defs/userClass"} } }, diff --git a/api/src/daily-api-metrics/service.ts b/api/src/daily-api-metrics/service.ts index 7abc8ea..5d320f6 100644 --- a/api/src/daily-api-metrics/service.ts +++ b/api/src/daily-api-metrics/service.ts @@ -43,7 +43,11 @@ export const agg = async (account: Account, query: AggQuery) => { if (query.resourceType) $match['resource.type'] = query.resourceType if (query.resourceId) $match['resource.id'] = Array.isArray(query.resourceId) ? { $in: query.resourceId } : query.resourceId if (query.refererDomain) $match.refererDomain = Array.isArray(query.refererDomain) ? { $in: query.refererDomain } : query.refererDomain - if (query.processingId) $match['processing._id'] = query.processingId + if (query.refererCategory) { + const refererCategories = Array.isArray(query.refererCategory) ? query.refererCategory : [query.refererCategory] + // legacy data recorded before refererCategory was introduced has no such field, treat it as 'other' + $match.refererCategory = { $in: refererCategories.includes('other') ? [...refererCategories, null] : refererCategories } + } const $group: Record = { _id: {}, @@ -60,17 +64,14 @@ export const agg = async (account: Account, query: AggQuery) => { if (part === 'refererApp') { $match.refererApp = { $ne: null } } - if (part === 'processing') { - $match['processing._id'] = { $ne: null } - $group._id.processingId = '$processing._id' - $group.processing = { $last: '$processing' } - } else if (part === 'resource') { + if (part === 'resource') { $group._id.resourceType = '$resource.type' $group._id.resourceId = '$resource.id' $group.resource = { $last: '$resource' } } else { if (part !== 'day') seriesKey.push(camelCase(part)) - $group._id[camelCase(part)] = '$' + part + // legacy data recorded before refererCategory was introduced has no such field, group it with 'other' + $group._id[camelCase(part)] = part === 'refererCategory' ? { $ifNull: ['$' + part, 'other'] } : '$' + part } } @@ -102,7 +103,6 @@ export const agg = async (account: Account, query: AggQuery) => { for (const item of items) { const key = seriesKey.reduce((a, key) => { a[key] = item[key]; return a }, {} as Record) if (item.resource) key.resource = item.resource - if (item.processing) key.processing = item.processing let serie = result.series.find((s) => equal(s.key, key)) if (!serie) { serie = { diff --git a/api/types/daily-api-metric/schema.json b/api/types/daily-api-metric/schema.json index e535f81..5bc26c2 100644 --- a/api/types/daily-api-metric/schema.json +++ b/api/types/daily-api-metric/schema.json @@ -12,6 +12,7 @@ "statusClass", "userClass", "refererDomain", + "refererCategory", "nbRequests", "bytes", "duration" @@ -28,6 +29,7 @@ "userClass": {"$ref": "#/$defs/userClass"}, "refererDomain":{"type":"string"}, "refererApp":{"type":"string"}, + "refererCategory":{"$ref": "#/$defs/refererCategory"}, "processing":{"$ref": "#/$defs/processing"}, "nbRequests":{ "type":"integer", @@ -107,6 +109,10 @@ "userClass": { "type":"string", "enum": ["anonymous", "owner", "external", "ownerProcessing", "externalProcessing", "ownerAPIKey", "externalAPIKey"] + }, + "refererCategory": { + "type": "string", + "enum": ["backoffice", "embed", "app", "mcp", "other"] } } } \ No newline at end of file diff --git a/daemon/src/mongo.ts b/daemon/src/mongo.ts index 2e21a59..3b96182 100644 --- a/daemon/src/mongo.ts +++ b/daemon/src/mongo.ts @@ -33,6 +33,7 @@ export class MetricsMongo { userClass: 1, refererDomain: 1, refererApp: 1, + refererCategory: 1, 'processing._id': 1 }, { unique: true } diff --git a/daemon/src/service.ts b/daemon/src/service.ts index b26ab10..01df3dc 100644 --- a/daemon/src/service.ts +++ b/daemon/src/service.ts @@ -63,11 +63,23 @@ const getUserClass = (line: LogLine, user: UserRef | null, ownerType: string, ow else userClass = 'external' if (user && line[8]) userClass += 'APIKey' - if (user && line[10]) userClass += 'Processing' return userClass } -const getRefererInfo = (line: LogLine): [string, string | undefined] => { +type RefererCategory = 'backoffice' | 'embed' | 'app' | 'mcp' | 'other' + +// classifies internal traffic by path; a referer whose host doesn't match our +// own host can't be trusted to be internal, no matter what its path looks like +const getRefererCategory = (url: URL, host: string): RefererCategory => { + if (url.hostname !== host) return 'other' + if (url.pathname.startsWith('/mcp')) return 'mcp' + if (url.pathname.startsWith('/data-fair/embed')) return 'embed' + if (url.pathname.startsWith('/data-fair/app/')) return 'app' + if (url.pathname.startsWith('/data-fair/')) return 'backoffice' + return 'other' +} + +const getRefererInfo = (line: LogLine): [string, string | undefined, RefererCategory] => { if (line[1]) { try { const url = new URL(line[1]) @@ -78,19 +90,19 @@ const getRefererInfo = (line: LogLine): [string, string | undefined] => { const searchParamReferer = url.searchParams.get('referer') if (searchParamReferer) refererDomain = searchParamReferer if (url.pathname.startsWith('/data-fair/app/')) refererApp = /** @type {string} */(url.pathname.replace('/data-fair/app/', '').split('/').shift()) - return [refererDomain, refererApp] + const refererCategory = getRefererCategory(url, line[0]) + return [refererDomain, refererApp, refererCategory] } catch (err) { - return [line[1], undefined] + return [line[1], undefined, 'other'] } } else { - return ['none', undefined] + return ['none', undefined, 'other'] } } // cf https://stackoverflow.com/a/14350155 // using regexp is faster and prevents lots of object affectations, garbage collecting, etc const idPropRegexp = /"id":"((\\"|[^"])*)"/ -const _idPropRegexp = /"_id":"((\\"|[^"])*)"/ const depPropRegexp = /"department":"((\\"|[^"])*)"/ const typePropRegexp = /"type":"((\\"|[^"])*)"/ const trackPropRegexp = /"track":"((\\"|[^"])*)"/ @@ -113,11 +125,10 @@ export function pushLogLine (line: LogLine) { const resourceId = line[12].match(idPropRegexp)?.[1] if (!operationId || !operationTrack || !ownerType || !ownerId || !resourceType || !resourceId) return const ownerDep = line[1].match(depPropRegexp)?.[1] - const processingId = line[10].match(_idPropRegexp)?.[1] const statusClass = getStatusClass(line[4]) const user = getUser(line) const userClass = getUserClass(line, user, ownerType, ownerId) - const [refererDomain, refererApp] = getRefererInfo(line) + const [refererDomain, refererApp, refererCategory] = getRefererInfo(line) let bytesSent = line[3] if (line[14] && line[14] !== '-') { @@ -139,11 +150,11 @@ export function pushLogLine (line: LogLine) { operationTrack, statusClass, userClass, - refererDomain + refererDomain, + refererCategory } if (refererApp) patchKey.refererApp = refererApp if (ownerDep) patchKey['owner.department'] = ownerDep - if (processingId) patchKey['processing._id'] = processingId const existingPatch = patches.find(p => equal(p[0], patchKey)) if (existingPatch) { @@ -153,8 +164,6 @@ export function pushLogLine (line: LogLine) { } else { const resource = JSON.parse(line[12]) if (resource.title) resource.title = decodeURIComponent(resource.title) - const processing = line[10] ? JSON.parse(line[10]) : undefined - if (processing?.title) processing.title = decodeURIComponent(processing.title) const set: Record = { owner: JSON.parse(line[5]), @@ -163,9 +172,9 @@ export function pushLogLine (line: LogLine) { operationTrack, statusClass, userClass, - refererDomain + refererDomain, + refererCategory } - if (processing) set.processing = processing if (refererApp) set.refererApp = refererApp patches.push([patchKey, { diff --git a/test-it/01-daily-api-metrics.ts b/test-it/01-daily-api-metrics.ts index 84d5b37..b31429f 100644 --- a/test-it/01-daily-api-metrics.ts +++ b/test-it/01-daily-api-metrics.ts @@ -1,6 +1,7 @@ import { describe, it, before, after, beforeEach } from 'node:test' import { strict as assert } from 'node:assert' import * as testSpies from '@data-fair/lib-node/test-spies.js' +import mongo from '@data-fair/lib-node/mongo.js' import { axiosAuth, clean, startApiServer, stopApiServer, startDaemonServer, stopDaemonServer } from './utils/index.ts' testSpies.registerModuleHooks() @@ -34,4 +35,31 @@ describe('daily api metrics', () => { const bulk1 = await testSpies.waitFor('sentBulkDelay') assert.equal(bulk1, 1) }) + + describe('referer category classification', () => { + const cases: Array<{ referer?: string, expected: string }> = [ + { referer: 'http://localhost/data-fair/dataset/some-dataset', expected: 'backoffice' }, + { referer: 'http://localhost/data-fair/embed/dataset/some-dataset', expected: 'embed' }, + { referer: 'http://localhost/data-fair/app/507f1f77bcf86cd799439011/', expected: 'app' }, + { referer: 'http://localhost/mcp', expected: 'mcp' }, + { referer: 'http://external-site.com/data-fair/embed/dataset/some-dataset', expected: 'other' }, + { referer: undefined, expected: 'other' } + ] + + for (const { referer, expected } of cases) { + it(`classifies referer "${referer ?? 'none'}" as "${expected}"`, async () => { + const dataset = (await adminAx.post('http://localhost:5600/data-fair/api/v1/datasets', { + isRest: true, + title: 'd1', + schema: [{ key: 'prop1', type: 'string' }] + })).data + await Promise.all([ + testSpies.waitFor('sentBulkDelay'), + adminAx.get(`/data-fair/api/v1/datasets/${dataset.id}/lines`, referer ? { headers: { Referer: referer } } : {}) + ]) + const doc = await mongo.db.collection('daily-api-metrics').findOne({ 'resource.id': dataset.id }) + assert.equal(doc?.refererCategory, expected) + }) + } + }) }) diff --git a/ui/src/components/chart/chart-categories.vue b/ui/src/components/chart/chart-categories.vue index d302222..8448d61 100644 --- a/ui/src/components/chart/chart-categories.vue +++ b/ui/src/components/chart/chart-categories.vue @@ -20,15 +20,12 @@ const userClasses: Record = { owner: 'Propriétaire', external: 'Utilisateur externe', ownerAPIKey: "Propriétaire (clé d'API)", - externalAPIKey: "Utilisateur externe (clé d'API)", - ownerProcessing: 'Propriétaire (traitement)', - externalProcessing: 'Utilisateur externe (traitement)' + externalAPIKey: "Utilisateur externe (clé d'API)" } const getLabel = (serie: any, category: string, labels: Record | null) => { if (serie.label) return serie.label if (category === 'resource') return safeDecodeUriComponent(serie.key.resource.title) - if (category === 'processing') return safeDecodeUriComponent(serie.key.processing.title) if (category === 'userClass') return userClasses[serie.key.userClass] || serie.key.userClass if (serie.key[category] === 'none') return 'Inconnu' if (serie.key[category] === null || serie.key[category] === undefined) return 'Aucune' diff --git a/ui/src/components/daily-api-metrics.vue b/ui/src/components/daily-api-metrics.vue index cd9aa0f..823c3ee 100644 --- a/ui/src/components/daily-api-metrics.vue +++ b/ui/src/components/daily-api-metrics.vue @@ -73,9 +73,7 @@ const userClasses = [ { value: 'owner', title: 'Propriétaire' }, { value: 'external', title: 'utilisateur externe' }, { value: 'ownerAPIKey', title: "Propriétaire (clé d'API)" }, - { value: 'externalAPIKey', title: "utilisateur externe (clé d'API)" }, - { value: 'ownerProcessing', title: 'Propriétaire (traitement)' }, - { value: 'externalProcessing', title: 'utilisateur externe (traitement)' } + { value: 'externalAPIKey', title: "utilisateur externe (clé d'API)" } ] const splitItems = [ diff --git a/ui/src/pages/embed/home.vue b/ui/src/pages/embed/home.vue index cf49a1b..0a10934 100644 --- a/ui/src/pages/embed/home.vue +++ b/ui/src/pages/embed/home.vue @@ -197,10 +197,11 @@ :labels="appLabels" /> @@ -229,9 +230,15 @@ const userClassLabels: Record = { owner: 'Propriétaire', external: 'Utilisateur externe', ownerAPIKey: "Propriétaire (clé d'API)", - externalAPIKey: "Utilisateur externe (clé d'API)", - ownerProcessing: 'Propriétaire (traitement)', - externalProcessing: 'Utilisateur externe (traitement)' + externalAPIKey: "Utilisateur externe (clé d'API)" +} + +const refererCategoryLabels: Record = { + backoffice: 'Back-office', + embed: 'Vues embarquées', + app: 'Applications', + mcp: 'MCP', + other: 'Autre' } const datasetItems = computed(() => {