Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions api/doc/agg-query/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,16 @@
{"type": "array", "items": {"type": "string"}, "minItems": 1}
]
},
"refererCategory": {
"oneOf": [
{"type": "string"},
{"type": "array", "items": {"type": "string"}, "minItems": 1}
]
},
"topicId": {"type": "string"},
"processingId": {"type": "string"},
"split": {
"type": "array",
"items": { "type": "string", "enum": ["day", "refererApp", "processing", "resource", "refererDomain", "userClass"] }
"items": { "type": "string", "enum": ["day", "resource", "userClass", "refererApp", "refererDomain", "refererCategory"] }
}
}
}
2 changes: 1 addition & 1 deletion api/doc/agg-result/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
"additionalProperties": false,
"properties": {
"refererApp": {"type": "string"},
"processing": {"$ref": "https://github.com/data-fair/metrics/daily-api-metric#/$defs/processing"},
"resource": {"$ref": "https://github.com/data-fair/metrics/daily-api-metric#/$defs/resourceDef"},
"refererDomain": {"type": "string"},
"refererCategory": {"type": "string"},
"userClass": {"$ref": "https://github.com/data-fair/metrics/daily-api-metric#/$defs/userClass"}
}
},
Expand Down
16 changes: 8 additions & 8 deletions api/src/daily-api-metrics/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ export const agg = async (account: Account, query: AggQuery) => {
if (query.resourceType) $match['resource.type'] = query.resourceType
if (query.resourceId) $match['resource.id'] = Array.isArray(query.resourceId) ? { $in: query.resourceId } : query.resourceId
if (query.refererDomain) $match.refererDomain = Array.isArray(query.refererDomain) ? { $in: query.refererDomain } : query.refererDomain
if (query.processingId) $match['processing._id'] = query.processingId
if (query.refererCategory) {
const refererCategories = Array.isArray(query.refererCategory) ? query.refererCategory : [query.refererCategory]
// legacy data recorded before refererCategory was introduced has no such field, treat it as 'other'
$match.refererCategory = { $in: refererCategories.includes('other') ? [...refererCategories, null] : refererCategories }
}

const $group: Record<string, any> = {
_id: {},
Expand All @@ -60,17 +64,14 @@ export const agg = async (account: Account, query: AggQuery) => {
if (part === 'refererApp') {
$match.refererApp = { $ne: null }
}
if (part === 'processing') {
$match['processing._id'] = { $ne: null }
$group._id.processingId = '$processing._id'
$group.processing = { $last: '$processing' }
} else if (part === 'resource') {
if (part === 'resource') {
$group._id.resourceType = '$resource.type'
$group._id.resourceId = '$resource.id'
$group.resource = { $last: '$resource' }
} else {
if (part !== 'day') seriesKey.push(camelCase(part))
$group._id[camelCase(part)] = '$' + part
// legacy data recorded before refererCategory was introduced has no such field, group it with 'other'
$group._id[camelCase(part)] = part === 'refererCategory' ? { $ifNull: ['$' + part, 'other'] } : '$' + part
}
}

Expand Down Expand Up @@ -102,7 +103,6 @@ export const agg = async (account: Account, query: AggQuery) => {
for (const item of items) {
const key = seriesKey.reduce((a, key) => { a[key] = item[key]; return a }, {} as Record<string, string>)
if (item.resource) key.resource = item.resource
if (item.processing) key.processing = item.processing
let serie = result.series.find((s) => equal(s.key, key))
if (!serie) {
serie = {
Expand Down
6 changes: 6 additions & 0 deletions api/types/daily-api-metric/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"statusClass",
"userClass",
"refererDomain",
"refererCategory",
"nbRequests",
"bytes",
"duration"
Expand All @@ -28,6 +29,7 @@
"userClass": {"$ref": "#/$defs/userClass"},
"refererDomain":{"type":"string"},
"refererApp":{"type":"string"},
"refererCategory":{"$ref": "#/$defs/refererCategory"},
"processing":{"$ref": "#/$defs/processing"},
"nbRequests":{
"type":"integer",
Expand Down Expand Up @@ -107,6 +109,10 @@
"userClass": {
"type":"string",
"enum": ["anonymous", "owner", "external", "ownerProcessing", "externalProcessing", "ownerAPIKey", "externalAPIKey"]
},
"refererCategory": {
"type": "string",
"enum": ["backoffice", "embed", "app", "mcp", "other"]
}
}
}
1 change: 1 addition & 0 deletions daemon/src/mongo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export class MetricsMongo {
userClass: 1,
refererDomain: 1,
refererApp: 1,
refererCategory: 1,
'processing._id': 1
},
{ unique: true }
Expand Down
37 changes: 23 additions & 14 deletions daemon/src/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,23 @@ const getUserClass = (line: LogLine, user: UserRef | null, ownerType: string, ow
else userClass = 'external'

if (user && line[8]) userClass += 'APIKey'
if (user && line[10]) userClass += 'Processing'
return userClass
}

const getRefererInfo = (line: LogLine): [string, string | undefined] => {
type RefererCategory = 'backoffice' | 'embed' | 'app' | 'mcp' | 'other'

// classifies internal traffic by path; a referer whose host doesn't match our
// own host can't be trusted to be internal, no matter what its path looks like
const getRefererCategory = (url: URL, host: string): RefererCategory => {
if (url.hostname !== host) return 'other'
if (url.pathname.startsWith('/mcp')) return 'mcp'
if (url.pathname.startsWith('/data-fair/embed')) return 'embed'
if (url.pathname.startsWith('/data-fair/app/')) return 'app'
if (url.pathname.startsWith('/data-fair/')) return 'backoffice'
return 'other'
}

const getRefererInfo = (line: LogLine): [string, string | undefined, RefererCategory] => {
if (line[1]) {
try {
const url = new URL(line[1])
Expand All @@ -78,19 +90,19 @@ const getRefererInfo = (line: LogLine): [string, string | undefined] => {
const searchParamReferer = url.searchParams.get('referer')
if (searchParamReferer) refererDomain = searchParamReferer
if (url.pathname.startsWith('/data-fair/app/')) refererApp = /** @type {string} */(url.pathname.replace('/data-fair/app/', '').split('/').shift())
return [refererDomain, refererApp]
const refererCategory = getRefererCategory(url, line[0])
return [refererDomain, refererApp, refererCategory]
} catch (err) {
return [line[1], undefined]
return [line[1], undefined, 'other']
}
} else {
return ['none', undefined]
return ['none', undefined, 'other']
}
}

// cf https://stackoverflow.com/a/14350155
// using regexp is faster and prevents lots of object affectations, garbage collecting, etc
const idPropRegexp = /"id":"((\\"|[^"])*)"/
const _idPropRegexp = /"_id":"((\\"|[^"])*)"/
const depPropRegexp = /"department":"((\\"|[^"])*)"/
const typePropRegexp = /"type":"((\\"|[^"])*)"/
const trackPropRegexp = /"track":"((\\"|[^"])*)"/
Expand All @@ -113,11 +125,10 @@ export function pushLogLine (line: LogLine) {
const resourceId = line[12].match(idPropRegexp)?.[1]
if (!operationId || !operationTrack || !ownerType || !ownerId || !resourceType || !resourceId) return
const ownerDep = line[1].match(depPropRegexp)?.[1]
const processingId = line[10].match(_idPropRegexp)?.[1]
const statusClass = getStatusClass(line[4])
const user = getUser(line)
const userClass = getUserClass(line, user, ownerType, ownerId)
const [refererDomain, refererApp] = getRefererInfo(line)
const [refererDomain, refererApp, refererCategory] = getRefererInfo(line)

let bytesSent = line[3]
if (line[14] && line[14] !== '-') {
Expand All @@ -139,11 +150,11 @@ export function pushLogLine (line: LogLine) {
operationTrack,
statusClass,
userClass,
refererDomain
refererDomain,
refererCategory
}
if (refererApp) patchKey.refererApp = refererApp
if (ownerDep) patchKey['owner.department'] = ownerDep
if (processingId) patchKey['processing._id'] = processingId

const existingPatch = patches.find(p => equal(p[0], patchKey))
if (existingPatch) {
Expand All @@ -153,8 +164,6 @@ export function pushLogLine (line: LogLine) {
} else {
const resource = JSON.parse(line[12])
if (resource.title) resource.title = decodeURIComponent(resource.title)
const processing = line[10] ? JSON.parse(line[10]) : undefined
if (processing?.title) processing.title = decodeURIComponent(processing.title)

const set: Record<string, any> = {
owner: JSON.parse(line[5]),
Expand All @@ -163,9 +172,9 @@ export function pushLogLine (line: LogLine) {
operationTrack,
statusClass,
userClass,
refererDomain
refererDomain,
refererCategory
}
if (processing) set.processing = processing
if (refererApp) set.refererApp = refererApp

patches.push([patchKey, {
Expand Down
28 changes: 28 additions & 0 deletions test-it/01-daily-api-metrics.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, it, before, after, beforeEach } from 'node:test'
import { strict as assert } from 'node:assert'
import * as testSpies from '@data-fair/lib-node/test-spies.js'
import mongo from '@data-fair/lib-node/mongo.js'
import { axiosAuth, clean, startApiServer, stopApiServer, startDaemonServer, stopDaemonServer } from './utils/index.ts'

testSpies.registerModuleHooks()
Expand Down Expand Up @@ -34,4 +35,31 @@ describe('daily api metrics', () => {
const bulk1 = await testSpies.waitFor('sentBulkDelay')
assert.equal(bulk1, 1)
})

describe('referer category classification', () => {
const cases: Array<{ referer?: string, expected: string }> = [
{ referer: 'http://localhost/data-fair/dataset/some-dataset', expected: 'backoffice' },
{ referer: 'http://localhost/data-fair/embed/dataset/some-dataset', expected: 'embed' },
{ referer: 'http://localhost/data-fair/app/507f1f77bcf86cd799439011/', expected: 'app' },
{ referer: 'http://localhost/mcp', expected: 'mcp' },
{ referer: 'http://external-site.com/data-fair/embed/dataset/some-dataset', expected: 'other' },
{ referer: undefined, expected: 'other' }
]

for (const { referer, expected } of cases) {
it(`classifies referer "${referer ?? 'none'}" as "${expected}"`, async () => {
const dataset = (await adminAx.post('http://localhost:5600/data-fair/api/v1/datasets', {
isRest: true,
title: 'd1',
schema: [{ key: 'prop1', type: 'string' }]
})).data
await Promise.all([
testSpies.waitFor('sentBulkDelay'),
adminAx.get(`/data-fair/api/v1/datasets/${dataset.id}/lines`, referer ? { headers: { Referer: referer } } : {})
])
const doc = await mongo.db.collection('daily-api-metrics').findOne({ 'resource.id': dataset.id })
assert.equal(doc?.refererCategory, expected)
})
}
})
})
5 changes: 1 addition & 4 deletions ui/src/components/chart/chart-categories.vue
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,12 @@ const userClasses: Record<string, string> = {
owner: 'Propriétaire',
external: 'Utilisateur externe',
ownerAPIKey: "Propriétaire (clé d'API)",
externalAPIKey: "Utilisateur externe (clé d'API)",
ownerProcessing: 'Propriétaire (traitement)',
externalProcessing: 'Utilisateur externe (traitement)'
externalAPIKey: "Utilisateur externe (clé d'API)"
}

const getLabel = (serie: any, category: string, labels: Record<string, string> | null) => {
if (serie.label) return serie.label
if (category === 'resource') return safeDecodeUriComponent(serie.key.resource.title)
if (category === 'processing') return safeDecodeUriComponent(serie.key.processing.title)
if (category === 'userClass') return userClasses[serie.key.userClass] || serie.key.userClass
if (serie.key[category] === 'none') return 'Inconnu'
if (serie.key[category] === null || serie.key[category] === undefined) return 'Aucune'
Expand Down
4 changes: 1 addition & 3 deletions ui/src/components/daily-api-metrics.vue
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ const userClasses = [
{ value: 'owner', title: 'Propriétaire' },
{ value: 'external', title: 'utilisateur externe' },
{ value: 'ownerAPIKey', title: "Propriétaire (clé d'API)" },
{ value: 'externalAPIKey', title: "utilisateur externe (clé d'API)" },
{ value: 'ownerProcessing', title: 'Propriétaire (traitement)' },
{ value: 'externalProcessing', title: 'utilisateur externe (traitement)' }
{ value: 'externalAPIKey', title: "utilisateur externe (clé d'API)" }
]

const splitItems = [
Expand Down
17 changes: 12 additions & 5 deletions ui/src/pages/embed/home.vue
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,11 @@
:labels="appLabels"
/>
<chart-categories
title="Requêtes / traitement"
category="processing"
title="Requêtes / consommateurs"
category="refererCategory"
:filter="baseFilter"
:periods="periods"
:labels="refererCategoryLabels"
/>
</v-row>
</template>
Expand Down Expand Up @@ -229,9 +230,15 @@ const userClassLabels: Record<string, string> = {
owner: 'Propriétaire',
external: 'Utilisateur externe',
ownerAPIKey: "Propriétaire (clé d'API)",
externalAPIKey: "Utilisateur externe (clé d'API)",
ownerProcessing: 'Propriétaire (traitement)',
externalProcessing: 'Utilisateur externe (traitement)'
externalAPIKey: "Utilisateur externe (clé d'API)"
}

const refererCategoryLabels: Record<string, string> = {
backoffice: 'Back-office',
embed: 'Vues embarquées',
app: 'Applications',
mcp: 'MCP',
other: 'Autre'
}

const datasetItems = computed(() => {
Expand Down
Loading