From 9c61ba7551de5c707620938129d1f61fae21bb0f Mon Sep 17 00:00:00 2001
From: miroslavpojer <miroslav.pojer@absa.africa>
Date: Wed, 24 Jun 2026 14:27:49 +0200
Subject: [PATCH 1/5] Initial version

---
 skills/test-data-management/SKILL.md          | 158 ++++++++++++++++++
 skills/test-data-management/evals/evals.json  | 115 +++++++++++++
 .../evals/files/copy-paste-data-setup.py      | 101 +++++++++++
 .../evals/files/separate-variation-tests.py   |  61 +++++++
 .../test-data-management/evals/fixture-map.md |  33 ++++
 .../evals/trigger-eval.json                   |  98 +++++++++++
 6 files changed, 566 insertions(+)
 create mode 100644 skills/test-data-management/SKILL.md
 create mode 100644 skills/test-data-management/evals/evals.json
 create mode 100644 skills/test-data-management/evals/files/copy-paste-data-setup.py
 create mode 100644 skills/test-data-management/evals/files/separate-variation-tests.py
 create mode 100644 skills/test-data-management/evals/fixture-map.md
 create mode 100644 skills/test-data-management/evals/trigger-eval.json

diff --git a/skills/test-data-management/SKILL.md b/skills/test-data-management/SKILL.md
new file mode 100644
index 0000000..763a9bb
--- /dev/null
+++ b/skills/test-data-management/SKILL.md
@@ -0,0 +1,158 @@
+---
+name: test-data-management
+description: >
+  Test data setup and management. Activate when writing tests that require complex data setup —
+  domain objects, database records, API payloads, or configuration structures with multiple input
+  combinations. Prefers parametrised and data-driven test patterns. Covers factory functions,
+  deterministic seeds, fixture reuse, and no-production-data rules. Triggers on: "how should I
+  manage test data", "test data factory", "fixture builder", "parametrize this test", "how to
+  avoid duplicating test data", "builder pattern for tests", "seed data for tests",
+  "how to vary inputs across tests", "data-driven tests", "my test setup is duplicated everywhere",
+  "how do I inject a fixed timestamp in tests", "expected value changes each run",
+  "can I use production data in tests".
+  Does NOT trigger for: choosing test doubles (use test-mocking-patterns), writing test logic
+  (use test-unit-write), reviewing tests against standards (use test-unit-review),
+  debugging test runtime errors.
+  Pairs with test-unit-write and test-mocking-patterns.
+license: Proprietary
+compatibility: GitHub Copilot
+---
+
+# Test Data Management
+
+## Prefer data-driven and parametrised tests
+
+When a behaviour must be tested with multiple input combinations, prefer parametrised tests over
+duplicated test methods. One parametrised test with a data table is clearer, easier to extend,
+and reduces duplication.
+
+| Language | Tool | Pattern |
+|----------|------|---------|
+| Python | `pytest.mark.parametrize` | `@pytest.mark.parametrize("input,expected", [...])` |
+| Scala | ScalaTest `TableDrivenPropertyChecks` | `forAll(table) { (input, expected) => ... }` |
+| .NET | xUnit `[Theory]` + `[InlineData]` / `[MemberData]` | `[Theory] [InlineData(1, 2)]` |
+| TypeScript | Jest `test.each` | `test.each([[input, expected]])` |
+| Java | JUnit 5 `@ParameterizedTest` | `@ParameterizedTest @MethodSource` |
+
+**Use** parametrised tests when: the same behaviour is tested with ≥ 3 input combinations, or when
+combinations form a clear equivalence class table.
+
+**Do not use** parametrised tests when: each case requires fundamentally different setup or
+assertions — use separate named tests instead.
+
+## Use factory and builder patterns
+
+- Create factory functions or builder classes that produce valid default objects
+- Override only the fields relevant to the specific test case
+- Shared hardcoded data causes **cross-test coupling**: a change to a shared dict or object breaks every test that references it — tests should never share mutable data structures
+- Place factories in the nearest shared location:
+  - Python: `conftest.py` factory fixture or `tests/factories.py`
+  - Scala: `TestFactories.scala` object
+  - .NET: `TestDataBuilder.cs`
+  - TypeScript: `test-factories.ts`
+- Document the factory's defaults and what each parameter controls
+
+```python
+# ✅ — factory with keyword overrides
+def make_order(*, order_id="ORD-1", user_id="u1", sku="SKU-1", quantity=1, status="pending"):
+    return Order(order_id=order_id, user_id=user_id, sku=sku, quantity=quantity, status=status)
+
+# test uses only the fields that matter
+def test_cancel_order_when_shipped_returns_false():
+    order = make_order(status="shipped")
+    assert service.cancel(order.order_id) is False
+```
+
+### Composable nested factories
+
+When an object graph is deeply nested, create a factory per level and compose them:
+
+```python
+# ✅ — each factory owns one level; override at any depth
+def make_address(*, street="1 Test St", city="Cape Town", country="ZA"):
+    return Address(street=street, city=city, country=country)
+
+def make_customer(*, customer_id="CUST-1", email="test@example.com", address=None):
+    return Customer(customer_id=customer_id, email=email,
+                    address=address or make_address())
+
+def make_order(*, order_id="ORD-1", user_id="u1", sku="SKU-1", quantity=1,
+               status="pending", customer=None):
+    return Order(order_id=order_id, user_id=user_id, sku=sku, quantity=quantity,
+                 status=status, customer=customer or make_customer())
+
+# Override at any level without affecting other tests
+order = make_order(customer=make_customer(address=make_address(country="UK")))
+```
+
+Do not hardcode deeply-nested dict literals — they are impossible to override and cannot be reused.
+
+## Never use production data
+
+- Must never use real production data in tests — not even anonymised exports
+- Generate synthetic data that represents the shape and constraints of production data
+- If a realistic dataset is needed, write a generator script and commit the generator, not the data
+
+## Keep data deterministic
+
+- No random values without a fixed seed
+- Timestamps must be fixed or injected — never call `datetime.now()`, `new Date()`, or
+  `LocalDateTime.now()` directly in test setup
+- Inject a clock or timestamp provider that can be fixed per test
+
+```python
+# ✅ — fixed timestamp injected
+FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
+
+def test_order_placed_at_is_set(mocker):
+    mocker.patch("myapp.services.order_service.datetime")
+           .now.return_value = FIXED_NOW
+    order = service.place_order("u1", "SKU-1", 1)
+    assert order["placed_at"] == FIXED_NOW.isoformat()
+
+# ❌ — non-deterministic; test may pass or fail depending on timing
+def test_order_placed_at_is_set():
+    order = service.place_order("u1", "SKU-1", 1)
+    assert order["placed_at"] is not None   # always passes; proves nothing
+```
+
+## Keep data minimal
+
+- Use the smallest dataset that exercises the behaviour under test
+- Avoid large data files checked into the repo — generate programmatically
+- Prefer inline data for simple cases; external files only for complex domain fixtures with
+  many fields (e.g. realistic JSON payloads, XML documents)
+
+## Clean up after integration tests
+
+- Integration tests must clean up created data after each test run
+- Strategies: transactions rolled back after each test, temp tables, test containers with
+  per-test teardown, or database truncation in `afterEach`
+- Unit tests do not need cleanup — they use no real resources
+
+## Test data for consumer-driven contract tests
+
+When using Pact (see `test-api-standards`) the consumer-side contract is generated from test data
+defined in the Pact interaction builder. Apply the same factory / builder patterns:
+
+```python
+# ✅ — Pact interaction uses factory-produced payload
+from tests.factories import make_order_payload
+
+(
+    pact
+    .given("a valid order exists")
+    .upon_receiving("a request to place an order")
+    .with_request("POST", "/orders", body=make_order_payload())
+    .will_respond_with(201, body={"id": "ORD-1", "status": "placed"})
+)
+```
+
+- Keep Pact interaction data in the same factory layer as all other test data
+- Never hardcode raw dicts in the Pact builder — use overridable factory functions
+
+## Routing
+
+- Choosing which test double to use (mock, stub, spy, fake) → use **test-mocking-patterns**
+- Writing the test logic itself → use **test-unit-write**
+- Reviewing tests against standards → use **test-unit-review**
diff --git a/skills/test-data-management/evals/evals.json b/skills/test-data-management/evals/evals.json
new file mode 100644
index 0000000..e48796a
--- /dev/null
+++ b/skills/test-data-management/evals/evals.json
@@ -0,0 +1,115 @@
+{
+  "skill_name": "test-data-management",
+  "evals": [
+    {
+      "id": 1,
+      "category": "happy-path",
+      "prompt": "The first six tests in evals/files/separate-variation-tests.py all test the same apply_tax() behaviour with different rate values. How should I restructure this?",
+      "expected_output": "Recommend collapsing the six separate test functions into a single @pytest.mark.parametrize test with a data table: @pytest.mark.parametrize('amount,rate,expected', [(Decimal('100.00'), 0, Decimal('100.00')), (Decimal('100.00'), 5, Decimal('105.00')), ...]). The two failure-path tests at the bottom test distinct failure logic and should remain as separate named tests.",
+      "files": [
+        "evals/files/separate-variation-tests.py"
+      ],
+      "expectations": [
+        "Parametrize is recommended as the restructuring approach",
+        "The data table includes all six rate variations",
+        "Parametrize syntax is correct for pytest",
+        "The two failure-path tests are identified as exceptions that should stay separate",
+        "Explanation of why to use vs not use parametrize is given"
+      ]
+    },
+    {
+      "id": 2,
+      "category": "regression",
+      "prompt": "Review the test data setup in evals/files/copy-paste-data-setup.py. The Package construction is repeated everywhere — how do I fix this?",
+      "expected_output": "Recommend a make_package() factory function with keyword overrides for the fields that vary per test. Place it in conftest.py as a fixture or in a shared factories.py module. Each test then calls make_package(weight_kg=10.0) and only specifies the field under test. Also flag the non-deterministic datetime.now() in test_shipping_audit_log_timestamp — it should be patched to a fixed value.",
+      "files": [
+        "evals/files/copy-paste-data-setup.py"
+      ],
+      "expectations": [
+        "Factory function pattern is recommended",
+        "Factory function signature uses keyword overrides with sensible defaults",
+        "Placement in conftest.py or factories.py is mentioned",
+        "Each test example only overrides the relevant field",
+        "Non-deterministic datetime.now() is flagged",
+        "Fixed clock / patched datetime is recommended as the fix"
+      ]
+    },
+    {
+      "id": 3,
+      "category": "happy-path",
+      "prompt": "Can I use an export of production customer records as test data? It's anonymised.",
+      "expected_output": "No. Must never use real production data in tests — not even anonymised exports. Generate synthetic data instead using a factory function or a generator script that produces data with the same shape and constraints. If a realistic dataset is needed, write and commit the generator script, not the generated data.",
+      "files": [],
+      "expectations": [
+        "Production data — even anonymised — is explicitly prohibited",
+        "Synthetic data generation is recommended as the alternative",
+        "Generator script pattern (commit the generator, not the data) is mentioned"
+      ]
+    },
+    {
+      "id": 4,
+      "category": "happy-path",
+      "prompt": "How should I handle timestamps in my test data? My tests keep breaking because the expected 'placed_at' value changes every run.",
+      "expected_output": "Inject a fixed clock or patch datetime.now() to a deterministic value. Show the mocker.patch pattern for pytest, fixing the datetime to a constant like FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0, tzinfo=timezone.utc). Assert the exact isoformat value instead of checking is not None.",
+      "files": [],
+      "expectations": [
+        "Root cause identified: non-deterministic datetime.now() in production code",
+        "mocker.patch or monkeypatch recommended for pytest",
+        "FIXED_NOW constant pattern demonstrated",
+        "Assertion on exact timestamp value (not is-not-None) shown",
+        "Clock injection as a design alternative is mentioned"
+      ]
+    },
+    {
+      "id": 5,
+      "category": "negative",
+      "prompt": "Should I use a mock or a stub for the payment gateway?",
+      "expected_output": "This is a test double selection question that routes to test-mocking-patterns. test-data-management covers data setup strategy, not double selection.",
+      "files": [],
+      "expectations": [
+        "Skill correctly identifies this as a mocking question",
+        "Response defers to test-mocking-patterns for double selection"
+      ]
+    },
+    {
+      "id": 6,
+      "category": "paraphrase",
+      "prompt": "All my tests use the same hardcoded order data. When I change one test, it breaks others. How do I fix this?",
+      "expected_output": "Agent identifies the root cause as shared mutable test data or shared hardcoded values. Recommends the factory function pattern: create a make_order() factory with sensible defaults and keyword overrides. Each test calls make_order(status='PENDING') and only overrides the fields it cares about. Hardcoded data shared across tests causes cross-test coupling — a change to the shared dict/object breaks all tests that reference it. Place the factory in conftest.py.",
+      "files": [],
+      "expectations": [
+        "Identifies shared hardcoded data as the root cause",
+        "Recommends factory function with keyword overrides",
+        "Explains cross-test coupling as the failure mechanism",
+        "Notes conftest.py placement"
+      ]
+    },
+    {
+      "id": 7,
+      "category": "edge-case",
+      "prompt": "My test factory creates an order with an embedded Customer object. The Customer itself has an embedded Address. How deep should the factory defaults go?",
+      "expected_output": "Factories should be composable: create a make_address() factory and a make_customer(address=make_address()) factory, then make_order(customer=make_customer()). This allows any test to override at any level: make_order(customer=make_customer(address=make_address(country='ZA'))). Do not hardcode nested objects as deeply-nested dict literals — they are hard to override and impossible to reuse. Each factory owns its own level of the object graph.",
+      "files": [],
+      "expectations": [
+        "Recommends composable nested factories",
+        "Shows override at any level of nesting",
+        "Warns against deeply-nested dict literals",
+        "Notes each factory owns one level of the object graph"
+      ]
+    },
+    {
+      "id": 8,
+      "category": "output-format",
+      "prompt": "Show me the factory pattern for creating test Order objects with sensible defaults and override support.",
+      "expected_output": "The factory function is in a single fenced ```python code block. The function is named make_order() and accepts keyword arguments with defaults. It returns a dict or dataclass with at least: order_id, user_id, sku, quantity, status. A usage example showing override syntax (make_order(status='CANCELLED')) follows in the same or a second code block. No test assertions appear in the factory definition.",
+      "files": [],
+      "expectations": [
+        "Factory in fenced ```python code block",
+        "Named make_order() with keyword defaults",
+        "Returns dict or dataclass with order_id, user_id, sku, quantity, status",
+        "Usage example with override syntax shown",
+        "No assertions in the factory body"
+      ]
+    }
+  ]
+}
diff --git a/skills/test-data-management/evals/files/copy-paste-data-setup.py b/skills/test-data-management/evals/files/copy-paste-data-setup.py
new file mode 100644
index 0000000..8079358
--- /dev/null
+++ b/skills/test-data-management/evals/files/copy-paste-data-setup.py
@@ -0,0 +1,101 @@
+"""
+ShippingCalculator tests — copy-pasted data setup (factory/fixture candidate).
+
+The make_package() call is duplicated across all tests. The weight/dimensions
+override pattern is embedded inside each test body instead of using a factory
+or parametrize. Also contains a non-deterministic datetime.now() call.
+"""
+
+from datetime import datetime, timezone
+from dataclasses import dataclass
+from decimal import Decimal
+
+
+@dataclass
+class Package:
+    weight_kg: float
+    length_cm: float
+    width_cm: float
+    height_cm: float
+    destination: str
+
+
+def calculate_shipping(pkg: Package, express: bool = False) -> Decimal:
+    """Calculate shipping cost for a package."""
+    volume = pkg.length_cm * pkg.width_cm * pkg.height_cm / 5000
+    chargeable = max(pkg.weight_kg, volume)
+    base_rate = Decimal("2.50")
+    cost = base_rate * Decimal(str(chargeable))
+    if express:
+        cost *= Decimal("1.5")
+    return round(cost, 2)
+
+
+# --- copy-pasted setup in every test; should use a factory function ---
+
+def test_standard_shipping_for_light_parcel():
+    # copy-pasted Package construction
+    pkg = Package(
+        weight_kg=1.0,
+        length_cm=20.0,
+        width_cm=15.0,
+        height_cm=10.0,
+        destination="ZA",
+    )
+    result = calculate_shipping(pkg, express=False)
+    assert result == Decimal("2.50")
+
+
+def test_express_shipping_adds_50_percent():
+    # copy-pasted Package construction (same fields, same defaults)
+    pkg = Package(
+        weight_kg=1.0,
+        length_cm=20.0,
+        width_cm=15.0,
+        height_cm=10.0,
+        destination="ZA",
+    )
+    result = calculate_shipping(pkg, express=True)
+    assert result == Decimal("3.75")
+
+
+def test_volumetric_weight_used_when_higher():
+    # copy-pasted Package construction with only length changed
+    pkg = Package(
+        weight_kg=0.1,
+        length_cm=50.0,   # only this field is different
+        width_cm=15.0,
+        height_cm=10.0,
+        destination="ZA",
+    )
+    result = calculate_shipping(pkg, express=False)
+    assert result > Decimal("2.50")
+
+
+def test_heavy_package_uses_actual_weight():
+    # copy-pasted Package construction with only weight changed
+    pkg = Package(
+        weight_kg=10.0,   # only this field is different
+        length_cm=20.0,
+        width_cm=15.0,
+        height_cm=10.0,
+        destination="ZA",
+    )
+    result = calculate_shipping(pkg, express=False)
+    assert result == Decimal("25.00")
+
+
+def test_shipping_audit_log_timestamp():
+    """Non-deterministic datetime.now() in test setup."""
+    pkg = Package(
+        weight_kg=1.0,
+        length_cm=20.0,
+        width_cm=15.0,
+        height_cm=10.0,
+        destination="ZA",
+    )
+    cost = calculate_shipping(pkg)
+    # ❌ datetime.now() in test assertion — non-deterministic
+    logged_at = datetime.now(timezone.utc).isoformat()
+    assert cost > Decimal("0")
+    assert logged_at is not None   # always passes, proves nothing
diff --git a/skills/test-data-management/evals/files/separate-variation-tests.py b/skills/test-data-management/evals/files/separate-variation-tests.py
new file mode 100644
index 0000000..ccfd531
--- /dev/null
+++ b/skills/test-data-management/evals/files/separate-variation-tests.py
@@ -0,0 +1,61 @@
+"""
+TaxCalculator tests — separate test function per variation (parametrize candidate).
+
+Each of the first six test functions tests the same apply_tax() behaviour with a
+different rate. They should be collapsed into a single @pytest.mark.parametrize test.
+The last two tests (boundary cases) test fundamentally different failure logic and
+are fine as standalone named tests.
+"""
+
+from decimal import Decimal
+
+
+def apply_tax(amount: Decimal, rate_pct: float) -> Decimal:
+    """Apply a tax rate to an amount. rate_pct is a percentage (e.g. 15 for 15%)."""
+    if amount < 0:
+        raise ValueError("amount must be non-negative")
+    if rate_pct < 0 or rate_pct > 100:
+        raise ValueError("rate_pct must be between 0 and 100")
+    return round(amount * Decimal(1 + rate_pct / 100), 2)
+
+
+# --- same logic, six separate functions — should be parametrized ---
+
+def test_apply_tax_with_0_percent_rate():
+    assert apply_tax(Decimal("100.00"), 0) == Decimal("100.00")
+
+
+def test_apply_tax_with_5_percent_rate():
+    assert apply_tax(Decimal("100.00"), 5) == Decimal("105.00")
+
+
+def test_apply_tax_with_10_percent_rate():
+    assert apply_tax(Decimal("100.00"), 10) == Decimal("110.00")
+
+
+def test_apply_tax_with_15_percent_rate():
+    assert apply_tax(Decimal("100.00"), 15) == Decimal("115.00")
+
+
+def test_apply_tax_with_20_percent_rate():
+    assert apply_tax(Decimal("100.00"), 20) == Decimal("120.00")
+
+
+def test_apply_tax_with_100_percent_rate():
+    assert apply_tax(Decimal("100.00"), 100) == Decimal("200.00")
+
+
+# --- different failure logic — fine as standalone named tests ---
+
+def test_apply_tax_with_negative_amount_raises_value_error():
+    """Failure path: negative amount raises ValueError."""
+    import pytest
+    with pytest.raises(ValueError, match="non-negative"):
+        apply_tax(Decimal("-1.00"), 10)
+
+
+def test_apply_tax_with_rate_above_100_raises_value_error():
+    """Failure path: rate > 100 raises ValueError."""
+    import pytest
+    with pytest.raises(ValueError, match="rate_pct"):
+        apply_tax(Decimal("100.00"), 101)
diff --git a/skills/test-data-management/evals/fixture-map.md b/skills/test-data-management/evals/fixture-map.md
new file mode 100644
index 0000000..0f624f5
--- /dev/null
+++ b/skills/test-data-management/evals/fixture-map.md
@@ -0,0 +1,33 @@
+# Test Data Management — Evals Fixture Map
+
+Links each eval test case to its fixture file(s).
+
+| Test ID | Category   | Fixture |
+|---------|------------|---------|
+| 1       | happy-path | evals/files/separate-variation-tests.py |
+| 2       | regression | evals/files/copy-paste-data-setup.py |
+| 3       | happy-path | *(no file — production data policy question)* |
+| 4       | happy-path | *(no file — timestamp determinism question)* |
+| 5       | negative   | *(no file — double selection routed away)* |
+
+## Fixture → Scenario mapping
+
+| Fixture file | Scenario exercised |
+|---|---|
+| separate-variation-tests.py | Six test functions testing the same logic → collapse to parametrize; two failure tests correctly remain separate |
+| copy-paste-data-setup.py | Repeated Package construction → extract factory function; non-deterministic datetime.now() → patch to fixed value |
+
+## Coverage summary
+
+- happy-path: 3
+- regression: 1
+- negative: 1
+- **total: 5**
+
+## Trigger eval coverage
+
+| Direction | Count |
+|---|---|
+| should_trigger = true | 12 |
+| should_trigger = false | 4 |
+| **total** | **16** |
diff --git a/skills/test-data-management/evals/trigger-eval.json b/skills/test-data-management/evals/trigger-eval.json
new file mode 100644
index 0000000..00c3877
--- /dev/null
+++ b/skills/test-data-management/evals/trigger-eval.json
@@ -0,0 +1,98 @@
+[
+  {
+    "id": "d-t01-manage-test-data",
+    "query": "How should I manage test data for this service?",
+    "should_trigger": true,
+    "reason": "'How should I manage test data' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t02-test-data-factory",
+    "query": "How do I set up a test data factory for my domain objects?",
+    "should_trigger": true,
+    "reason": "'Test data factory' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t03-fixture-builder",
+    "query": "I need a fixture builder for generating test orders.",
+    "should_trigger": true,
+    "reason": "'Fixture builder' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t04-parametrize-test",
+    "query": "Can you help me parametrize this test to avoid duplication?",
+    "should_trigger": true,
+    "reason": "'Parametrize this test' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t05-avoid-duplicating",
+    "query": "How do I avoid duplicating test data setup across ten test functions?",
+    "should_trigger": true,
+    "reason": "'How to avoid duplicating test data' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t06-builder-pattern",
+    "query": "I want to use the builder pattern for my test data objects.",
+    "should_trigger": true,
+    "reason": "'Builder pattern for tests' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t07-seed-data",
+    "query": "What's the best way to seed data for integration tests?",
+    "should_trigger": true,
+    "reason": "'Seed data for tests' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t08-data-driven",
+    "query": "How do I write data-driven tests for this calculator?",
+    "should_trigger": true,
+    "reason": "'Data-driven tests' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t09-vary-inputs",
+    "query": "How do I vary the inputs across multiple test cases without copy-paste?",
+    "should_trigger": true,
+    "reason": "'How to vary inputs across tests' is a listed trigger phrase."
+  },
+  {
+    "id": "d-t10-prod-data",
+    "query": "Can I use production data in my tests if I anonymise it first?",
+    "should_trigger": true,
+    "reason": "No-production-data rule is core to this skill; question directly triggers it."
+  },
+  {
+    "id": "d-t11-timestamp-in-tests",
+    "query": "My test keeps failing because the expected timestamp changes every run.",
+    "should_trigger": true,
+    "reason": "Paraphrase of 'how do I inject a fixed timestamp' — deterministic data rule."
+  },
+  {
+    "id": "d-t12-setup-duplicated",
+    "query": "My test setup is duplicated everywhere — every test creates the same User object.",
+    "should_trigger": true,
+    "reason": "'My test setup is duplicated everywhere' is a listed trigger phrase."
+  },
+  {
+    "id": "d-n01-spy-vs-mock",
+    "query": "Should I use a spy or a mock for the inventory repository?",
+    "should_trigger": false,
+    "reason": "Test-double selection — routes to test-mocking-patterns."
+  },
+  {
+    "id": "d-n02-write-unit-tests",
+    "query": "Write unit tests for the ShippingCalculator class.",
+    "should_trigger": false,
+    "reason": "Test-writing request — routes to test-unit-write."
+  },
+  {
+    "id": "d-n03-review-tests",
+    "query": "Review this test file for standards compliance.",
+    "should_trigger": false,
+    "reason": "Test review request — routes to test-unit-review."
+  },
+  {
+    "id": "d-n04-debug-test",
+    "query": "Why is my parametrize test throwing a TypeError on the second argument?",
+    "should_trigger": false,
+    "reason": "Runtime debugging of a specific error — not test data strategy."
+  }
+]

From 455ea40ac4392b0f814562e42ea4060d39a69337 Mon Sep 17 00:00:00 2001
From: miroslavpojer <miroslav.pojer@absa.africa>
Date: Wed, 24 Jun 2026 15:09:36 +0200
Subject: [PATCH 2/5] feat: add test-data-management skill with documentation
 and evals

---
 README.md                                     |  1 +
 docs/README.md                                |  5 +-
 docs/test-data-management.md                  | 62 +++++++++++++++++++
 skills/test-data-management/SKILL.md          | 52 ++++------------
 skills/test-data-management/evals/evals.json  | 33 +++++++++-
 .../test-data-management/evals/fixture-map.md | 32 ++++++----
 .../evals/trigger-eval.json                   | 24 ++++++-
 7 files changed, 151 insertions(+), 58 deletions(-)
 create mode 100644 docs/test-data-management.md

diff --git a/README.md b/README.md
index 6b1f500..19e386a 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,7 @@ its purpose, trigger phrases, and full instructions.
 | Skill                                                | Description                                                                                                                         |
 |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|
 | **[pr-review](./skills/pr-review/)**                 | Pull request code review — reviews diffs for risk, security issues, API contract changes, dependency bumps, CI/CD and infrastructure changes. Produces concise Blocker / Important / Nit comments. |
+| **[test-data-management](./skills/test-data-management/)** | Test data setup and management — factory functions, parametrised tests, deterministic seeds, fixture reuse, and production-data rules for unit and integration tests. |
 | **[token-saving](./skills/token-saving/)**           | Always-active response discipline — enforces brevity, no filler openers or closers, structured output, and a What/Why/How footer on code responses. Suspends on explicit "full detail" requests. |
 
 ## Finding More Skills
diff --git a/docs/README.md b/docs/README.md
index 1388ae5..a374d8f 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -24,8 +24,9 @@ Navigation hub for all guides in this repository. Browse by category below.
 
 | Guide | Description |
 |----|----|
-| [PR Review](./pr-review.md)             | How the PR review skill works, what sections it applies, and how to trigger it     |
-| [Token Saving](./token-saving.md)       | Keeping AI responses concise — how the token-saving skill works and when it applies |
+| [PR Review](./pr-review.md)                         | How the PR review skill works, what sections it applies, and how to trigger it     |
+| [Test Data Management](./test-data-management.md)   | How the test-data-management skill works, what it covers, and when it fires |
+| [Token Saving](./token-saving.md)                   | Keeping AI responses concise — how the token-saving skill works and when it applies |
 
 > **Keep this index up to date.** When you add a new guide, add a row to the appropriate table above.
 
diff --git a/docs/test-data-management.md b/docs/test-data-management.md
new file mode 100644
index 0000000..a750f45
--- /dev/null
+++ b/docs/test-data-management.md
@@ -0,0 +1,62 @@
+# Test Data Management Skill
+
+The `test-data-management` skill guides consistent, maintainable test data setup across unit and integration tests. It activates when the question is about _how_ to structure or supply data to tests — not about writing the tests themselves.
+
+---
+
+## What it covers
+
+| Topic | Guidance |
+|---|---|
+| Parametrised tests | When to collapse repeated test functions into a single data-driven test |
+| Factory / builder pattern | Default-value factories with keyword overrides; composable nested factories |
+| Production data | Why to never use production data (even anonymised); generator-script pattern |
+| Deterministic data | Injecting fixed timestamps; avoiding `datetime.now()` in test setup |
+| Minimal data | Inline data for simple cases; external fixtures only for complex payloads |
+| Integration test cleanup | Transaction rollback, truncation, test containers, run-scoped IDs |
+
+---
+
+## When it fires
+
+The skill activates on intent — it does not require exact phrasing:
+
+```
+my test setup is duplicated everywhere
+how do I avoid copy-pasting test data across 10 tests?
+can I use production data in tests?
+my test keeps breaking because the expected timestamp changes every run
+how do I create a factory for test orders?
+how should I seed data for integration tests?
+how do I clean up after an integration test?
+```
+
+---
+
+## When it does not fire
+
+| Situation | Correct path |
+|---|---|
+| Choosing mock, stub, spy, or fake | Test mocking / doubles guide |
+| Writing test logic and assertions | Test authoring guide |
+| Reviewing tests for standards compliance | Test review guide |
+| Debugging a test runtime error | General debugging |
+| Configuring test infrastructure (containers, DBs) | Infrastructure / DevOps guide |
+
+---
+
+## Language support
+
+The skill covers patterns for Python, TypeScript/JavaScript, Scala, Java, and .NET. Examples default to Python but the language table in the skill body maps each pattern to the idiomatic tool for each ecosystem.
+
+---
+
+## Evals
+
+The skill ships with 10 functional evals (`evals/evals.json`) and 19 trigger evals (`evals/trigger-eval.json`). Run them to validate behaviour after edits — see [Skill Testing](./skill-testing.md).
+
+---
+
+## Installation
+
+See [Getting Started](./getting-started.md) for the full install guide.
diff --git a/skills/test-data-management/SKILL.md b/skills/test-data-management/SKILL.md
index 763a9bb..8a3403e 100644
--- a/skills/test-data-management/SKILL.md
+++ b/skills/test-data-management/SKILL.md
@@ -1,19 +1,13 @@
 ---
 name: test-data-management
 description: >
-  Test data setup and management. Activate when writing tests that require complex data setup —
-  domain objects, database records, API payloads, or configuration structures with multiple input
-  combinations. Prefers parametrised and data-driven test patterns. Covers factory functions,
-  deterministic seeds, fixture reuse, and no-production-data rules. Triggers on: "how should I
-  manage test data", "test data factory", "fixture builder", "parametrize this test", "how to
-  avoid duplicating test data", "builder pattern for tests", "seed data for tests",
-  "how to vary inputs across tests", "data-driven tests", "my test setup is duplicated everywhere",
-  "how do I inject a fixed timestamp in tests", "expected value changes each run",
-  "can I use production data in tests".
-  Does NOT trigger for: choosing test doubles (use test-mocking-patterns), writing test logic
-  (use test-unit-write), reviewing tests against standards (use test-unit-review),
-  debugging test runtime errors.
-  Pairs with test-unit-write and test-mocking-patterns.
+  Test data setup and management. Activate when test setup is duplicated, inputs need
+  parametrisation, factories/builders are needed, timestamps are non-deterministic, or
+  production data rules apply. Triggers on: "test data factory", "fixture builder",
+  "parametrize this test", "data-driven tests", "test setup is duplicated",
+  "inject fixed timestamp", "can I use production data",
+  "clean up after integration tests". Does NOT trigger for: test double selection,
+  writing or reviewing tests, debug errors.
 license: Proprietary
 compatibility: GitHub Copilot
 ---
@@ -65,7 +59,7 @@ def test_cancel_order_when_shipped_returns_false():
 
 ### Composable nested factories
 
-When an object graph is deeply nested, create a factory per level and compose them:
+Create a factory per level and compose them:
 
 ```python
 # ✅ — each factory owns one level; override at any depth
@@ -128,31 +122,11 @@ def test_order_placed_at_is_set():
 - Integration tests must clean up created data after each test run
 - Strategies: transactions rolled back after each test, temp tables, test containers with
   per-test teardown, or database truncation in `afterEach`
+- Use a unique run-scoped prefix or ID for all created records so cleanup is scoped and safe
 - Unit tests do not need cleanup — they use no real resources
 
-## Test data for consumer-driven contract tests
+## Out of scope
 
-When using Pact (see `test-api-standards`) the consumer-side contract is generated from test data
-defined in the Pact interaction builder. Apply the same factory / builder patterns:
-
-```python
-# ✅ — Pact interaction uses factory-produced payload
-from tests.factories import make_order_payload
-
-(
-    pact
-    .given("a valid order exists")
-    .upon_receiving("a request to place an order")
-    .with_request("POST", "/orders", body=make_order_payload())
-    .will_respond_with(201, body={"id": "ORD-1", "status": "placed"})
-)
-```
-
-- Keep Pact interaction data in the same factory layer as all other test data
-- Never hardcode raw dicts in the Pact builder — use overridable factory functions
-
-## Routing
-
-- Choosing which test double to use (mock, stub, spy, fake) → use **test-mocking-patterns**
-- Writing the test logic itself → use **test-unit-write**
-- Reviewing tests against standards → use **test-unit-review**
+- Choosing test doubles (mock, stub, spy, fake) — handle in your project's mocking guide
+- Writing test logic and assertions — handled separately
+- Reviewing tests for standards compliance — handled separately
diff --git a/skills/test-data-management/evals/evals.json b/skills/test-data-management/evals/evals.json
index e48796a..d51ea81 100644
--- a/skills/test-data-management/evals/evals.json
+++ b/skills/test-data-management/evals/evals.json
@@ -64,11 +64,11 @@
       "id": 5,
       "category": "negative",
       "prompt": "Should I use a mock or a stub for the payment gateway?",
-      "expected_output": "This is a test double selection question that routes to test-mocking-patterns. test-data-management covers data setup strategy, not double selection.",
+      "expected_output": "This is a test double selection question — out of scope for test-data-management. Response should redirect to a mocking/test-doubles guide and not provide factory or parametrize advice.",
       "files": [],
       "expectations": [
         "Skill correctly identifies this as a mocking question",
-        "Response defers to test-mocking-patterns for double selection"
+        "Response defers to a mocking/test-doubles guide, not test-data-management"
       ]
     },
     {
@@ -110,6 +110,35 @@
         "Usage example with override syntax shown",
         "No assertions in the factory body"
       ]
+    },
+    {
+      "id": 9,
+      "category": "happy-path",
+      "prompt": "My integration tests are leaving records in the database. How do I clean up after each test?",
+      "expected_output": "Recommend at least two cleanup strategies from: (1) wrap each test in a transaction and roll back after, (2) truncate tables in afterEach/teardown, (3) use test containers with per-test teardown, (4) use a unique run-scoped ID/prefix for all created records so cleanup is targeted. State that unit tests do not need cleanup. Warn that failing to clean up causes test pollution and non-deterministic failures.",
+      "files": [],
+      "expectations": [
+        "At least two cleanup strategies named",
+        "Transaction rollback is mentioned as an option",
+        "Test containers or afterEach teardown mentioned",
+        "Run-scoped prefix/ID strategy mentioned",
+        "States unit tests do not need cleanup",
+        "Test pollution / non-determinism risk is explained"
+      ]
+    },
+    {
+      "id": 10,
+      "category": "edge-case",
+      "prompt": "I'm writing tests in TypeScript with Jest. How do I parametrize a test that checks the same discount calculation with five different discount rates?",
+      "expected_output": "Recommend Jest test.each with an inline data table. Show the test.each([[rate, input, expected], ...]) syntax and the it/test function signature receiving the parameters. Mention that this is the TypeScript/Jest equivalent of pytest.mark.parametrize and xUnit Theory. The five rate variations should collapse into a single test block.",
+      "files": [],
+      "expectations": [
+        "Jest test.each is recommended",
+        "Correct test.each([[...], ...]) syntax shown",
+        "Test function signature using destructured or positional params shown",
+        "Equivalence to pytest.mark.parametrize or xUnit Theory noted",
+        "All five rate variations collapse into one test block"
+      ]
     }
   ]
 }
diff --git a/skills/test-data-management/evals/fixture-map.md b/skills/test-data-management/evals/fixture-map.md
index 0f624f5..6f1d31d 100644
--- a/skills/test-data-management/evals/fixture-map.md
+++ b/skills/test-data-management/evals/fixture-map.md
@@ -2,13 +2,18 @@
 
 Links each eval test case to its fixture file(s).
 
-| Test ID | Category   | Fixture |
-|---------|------------|---------|
-| 1       | happy-path | evals/files/separate-variation-tests.py |
-| 2       | regression | evals/files/copy-paste-data-setup.py |
-| 3       | happy-path | *(no file — production data policy question)* |
-| 4       | happy-path | *(no file — timestamp determinism question)* |
-| 5       | negative   | *(no file — double selection routed away)* |
+| Test ID | Category     | Fixture |
+|---------|--------------|---------|
+| 1       | happy-path   | evals/files/separate-variation-tests.py |
+| 2       | regression   | evals/files/copy-paste-data-setup.py |
+| 3       | happy-path   | *(no file — production data policy question)* |
+| 4       | happy-path   | *(no file — timestamp determinism question)* |
+| 5       | negative     | *(no file — double selection routed away)* |
+| 6       | paraphrase   | *(no file — cross-test coupling / factory question)* |
+| 7       | edge-case    | *(no file — composable nested factories)* |
+| 8       | output-format| *(no file — factory output format check)* |
+| 9       | happy-path   | *(no file — integration test cleanup strategies)* |
+| 10      | edge-case    | *(no file — TypeScript/Jest parametrize)* |
 
 ## Fixture → Scenario mapping
 
@@ -19,15 +24,18 @@ Links each eval test case to its fixture file(s).
 
 ## Coverage summary
 
-- happy-path: 3
+- happy-path: 4
 - regression: 1
 - negative: 1
-- **total: 5**
+- paraphrase: 1
+- edge-case: 2
+- output-format: 1
+- **total: 10**
 
 ## Trigger eval coverage
 
 | Direction | Count |
 |---|---|
-| should_trigger = true | 12 |
-| should_trigger = false | 4 |
-| **total** | **16** |
+| should_trigger = true | 14 |
+| should_trigger = false | 5 |
+| **total** | **19** |
diff --git a/skills/test-data-management/evals/trigger-eval.json b/skills/test-data-management/evals/trigger-eval.json
index 00c3877..7dbfe71 100644
--- a/skills/test-data-management/evals/trigger-eval.json
+++ b/skills/test-data-management/evals/trigger-eval.json
@@ -75,24 +75,42 @@
     "id": "d-n01-spy-vs-mock",
     "query": "Should I use a spy or a mock for the inventory repository?",
     "should_trigger": false,
-    "reason": "Test-double selection — routes to test-mocking-patterns."
+    "reason": "Test-double selection — out of scope for test-data-management."
   },
   {
     "id": "d-n02-write-unit-tests",
     "query": "Write unit tests for the ShippingCalculator class.",
     "should_trigger": false,
-    "reason": "Test-writing request — routes to test-unit-write."
+    "reason": "Test-writing request — does not involve data setup strategy."
   },
   {
     "id": "d-n03-review-tests",
     "query": "Review this test file for standards compliance.",
     "should_trigger": false,
-    "reason": "Test review request — routes to test-unit-review."
+    "reason": "Test review request — out of scope for test-data-management."
   },
   {
     "id": "d-n04-debug-test",
     "query": "Why is my parametrize test throwing a TypeError on the second argument?",
     "should_trigger": false,
     "reason": "Runtime debugging of a specific error — not test data strategy."
+  },
+  {
+    "id": "d-t13-cleanup-integration",
+    "query": "How do I clean up test data after each integration test run?",
+    "should_trigger": true,
+    "reason": "'Clean up after integration tests' is a listed trigger phrase and covered in the integration cleanup section."
+  },
+  {
+    "id": "d-t14-polluted-db",
+    "query": "My integration tests are polluting the database — records from one test affect the next.",
+    "should_trigger": true,
+    "reason": "Test data pollution is a data isolation/cleanup concern — core to the skill."
+  },
+  {
+    "id": "d-n05-setup-test-container",
+    "query": "How do I configure a Testcontainers PostgreSQL instance for my tests?",
+    "should_trigger": false,
+    "reason": "Test infrastructure/environment setup — not test data management strategy."
   }
 ]

From 081bfa4ba2e964f3ae20db0dba753d81234b926b Mon Sep 17 00:00:00 2001
From: miroslavpojer <miroslav.pojer@absa.africa>
Date: Wed, 24 Jun 2026 15:48:36 +0200
Subject: [PATCH 3/5] feat: update test-data-management skill description and
 eval queries for clarity

---
 skills/test-data-management/SKILL.md          |  9 +--
 .../evals/trigger-eval.json                   | 56 +++++++++----------
 2 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/skills/test-data-management/SKILL.md b/skills/test-data-management/SKILL.md
index 8a3403e..beb0c78 100644
--- a/skills/test-data-management/SKILL.md
+++ b/skills/test-data-management/SKILL.md
@@ -1,13 +1,6 @@
 ---
 name: test-data-management
-description: >
-  Test data setup and management. Activate when test setup is duplicated, inputs need
-  parametrisation, factories/builders are needed, timestamps are non-deterministic, or
-  production data rules apply. Triggers on: "test data factory", "fixture builder",
-  "parametrize this test", "data-driven tests", "test setup is duplicated",
-  "inject fixed timestamp", "can I use production data",
-  "clean up after integration tests". Does NOT trigger for: test double selection,
-  writing or reviewing tests, debug errors.
+description: Test data setup factories parametrize
 license: Proprietary
 compatibility: GitHub Copilot
 ---
diff --git a/skills/test-data-management/evals/trigger-eval.json b/skills/test-data-management/evals/trigger-eval.json
index 7dbfe71..66222f8 100644
--- a/skills/test-data-management/evals/trigger-eval.json
+++ b/skills/test-data-management/evals/trigger-eval.json
@@ -1,75 +1,75 @@
 [
   {
     "id": "d-t01-manage-test-data",
-    "query": "How should I manage test data for this service?",
+    "query": "I've got 50 test functions in my service, and each one creates a Customer object with different fields. The test setup code is scattered everywhere and makes tests hard to maintain. How can I organize this?",
     "should_trigger": true,
-    "reason": "'How should I manage test data' is a listed trigger phrase."
+    "reason": "Concrete problem: fragmented test setup across many tests → needs data management strategy"
   },
   {
     "id": "d-t02-test-data-factory",
-    "query": "How do I set up a test data factory for my domain objects?",
+    "query": "I'm writing tests for my domain model and I keep creating Order objects with the same fields repeated in every test. I want to extract this into something reusable. How do I set up a factory for my domain objects?",
     "should_trigger": true,
-    "reason": "'Test data factory' is a listed trigger phrase."
+    "reason": "Concrete problem: duplicated object creation → needs factory pattern guidance"
   },
   {
     "id": "d-t03-fixture-builder",
-    "query": "I need a fixture builder for generating test orders.",
+    "query": "My e-commerce tests need to generate test orders with different combinations of items, customers, and payment methods. Writing out all combinations manually in each test is getting unwieldy. How do I build flexible test data fixtures?",
     "should_trigger": true,
-    "reason": "'Fixture builder' is a listed trigger phrase."
+    "reason": "Concrete problem: complex test data combinations → needs builder/fixture pattern"
   },
   {
     "id": "d-t04-parametrize-test",
-    "query": "Can you help me parametrize this test to avoid duplication?",
+    "query": "I've got a function that calculates shipping costs. I need to test it with 10 different inputs (different regions, weights, speeds) but I don't want to write 10 separate test methods. How do I parametrize this?",
     "should_trigger": true,
-    "reason": "'Parametrize this test' is a listed trigger phrase."
+    "reason": "Concrete problem: multiple test inputs without duplication → needs parametrization guidance"
   },
   {
     "id": "d-t05-avoid-duplicating",
-    "query": "How do I avoid duplicating test data setup across ten test functions?",
+    "query": "I have 15 integration tests for my user service. Every single one starts by creating a test user with the same fields (name, email, role). I'm copying this code into each test. This feels wrong. How do I avoid this duplication?",
     "should_trigger": true,
-    "reason": "'How to avoid duplicating test data' is a listed trigger phrase."
+    "reason": "Concrete problem: repeated setup code across tests → needs data sharing/factory strategy"
   },
   {
     "id": "d-t06-builder-pattern",
-    "query": "I want to use the builder pattern for my test data objects.",
+    "query": "My Order objects have 10 required fields plus 5 optional ones. In my tests, I want to create different variations (just the required ones, or with some optional fields set, etc.) without having to specify all fields every time. What's a good pattern?",
     "should_trigger": true,
-    "reason": "'Builder pattern for tests' is a listed trigger phrase."
+    "reason": "Concrete problem: flexible object construction with defaults → needs builder pattern guidance"
   },
   {
     "id": "d-t07-seed-data",
-    "query": "What's the best way to seed data for integration tests?",
+    "query": "I'm setting up integration tests for my API. Before each test, I need to populate the database with base data (5 test users, 3 regions, 20 products). After the test, I need to clean it up. What's the best approach?",
     "should_trigger": true,
-    "reason": "'Seed data for tests' is a listed trigger phrase."
+    "reason": "Concrete problem: pre-populating and cleaning database for tests → needs seeding/cleanup strategy"
   },
   {
     "id": "d-t08-data-driven",
-    "query": "How do I write data-driven tests for this calculator?",
+    "query": "I have a tax calculation service. I need to verify it works correctly for 12 different scenarios (different tax rates, holidays, regions). Currently I have 12 separate test methods with almost identical code. How can I write this more efficiently?",
     "should_trigger": true,
-    "reason": "'Data-driven tests' is a listed trigger phrase."
+    "reason": "Concrete problem: multiple scenarios with different inputs → needs data-driven testing approach"
   },
   {
     "id": "d-t09-vary-inputs",
-    "query": "How do I vary the inputs across multiple test cases without copy-paste?",
+    "query": "I'm testing a discount calculator. It needs to work correctly for 8 different inputs (different customer types, product types, discount codes, quantities). I could write 8 tests but that's a lot of repetition. How do I test multiple input combinations cleanly?",
     "should_trigger": true,
-    "reason": "'How to vary inputs across tests' is a listed trigger phrase."
+    "reason": "Concrete problem: testing many input combinations → needs parametrization/variation strategy"
   },
   {
     "id": "d-t10-prod-data",
-    "query": "Can I use production data in my tests if I anonymise it first?",
+    "query": "I have a database backup from production that I want to use as test data. I've anonymized the personal info, but I'm not sure if using real production data in my test suite is a good idea. What should I do?",
     "should_trigger": true,
-    "reason": "No-production-data rule is core to this skill; question directly triggers it."
+    "reason": "Concrete problem: production data usage decision → needs guidance on data sourcing policy"
   },
   {
     "id": "d-t11-timestamp-in-tests",
-    "query": "My test keeps failing because the expected timestamp changes every run.",
+    "query": "I'm testing a date-sensitive feature (e.g., expired coupons). My test works fine in the morning but fails in the afternoon because the date changes. How do I make my tests deterministic?",
     "should_trigger": true,
-    "reason": "Paraphrase of 'how do I inject a fixed timestamp' — deterministic data rule."
+    "reason": "Concrete problem: time-dependent test behavior → needs deterministic test data strategy"
   },
   {
     "id": "d-t12-setup-duplicated",
-    "query": "My test setup is duplicated everywhere — every test creates the same User object.",
+    "query": "I'm reviewing my test suite and I see the same pattern everywhere: every test creates a User object with `id=1, name='Test User', email='test@example.com', role='admin'`. This code is duplicated across maybe 30 tests. What's the best way to fix this?",
     "should_trigger": true,
-    "reason": "'My test setup is duplicated everywhere' is a listed trigger phrase."
+    "reason": "Concrete problem: duplicated object creation across many tests → needs factory/fixture strategy"
   },
   {
     "id": "d-n01-spy-vs-mock",
@@ -97,15 +97,15 @@
   },
   {
     "id": "d-t13-cleanup-integration",
-    "query": "How do I clean up test data after each integration test run?",
+    "query": "My integration tests run against a real database. Each test inserts records during setup. After it finishes, I need to clean up so the next test starts fresh. Currently I'm manually deleting each record type. How should I approach this?",
     "should_trigger": true,
-    "reason": "'Clean up after integration tests' is a listed trigger phrase and covered in the integration cleanup section."
+    "reason": "Concrete problem: post-test database cleanup → needs cleanup strategy"
   },
   {
     "id": "d-t14-polluted-db",
-    "query": "My integration tests are polluting the database — records from one test affect the next.",
+    "query": "I'm debugging my integration test suite and noticed that tests sometimes pass when run individually but fail when run as a suite. I think it's because records created in earlier tests are affecting later tests. How do I fix this data isolation issue?",
     "should_trigger": true,
-    "reason": "Test data pollution is a data isolation/cleanup concern — core to the skill."
+    "reason": "Concrete problem: test data leakage between tests → needs isolation/cleanup strategy"
   },
   {
     "id": "d-n05-setup-test-container",

From f5de0e90700492e2e5cb2476ed0773dac85e50e5 Mon Sep 17 00:00:00 2001
From: miroslavpojer <miroslav.pojer@absa.africa>
Date: Wed, 24 Jun 2026 15:55:58 +0200
Subject: [PATCH 4/5] feat: enhance test-data-management skill description for
 clarity and coverage

---
 skills/test-data-management/SKILL.md | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/skills/test-data-management/SKILL.md b/skills/test-data-management/SKILL.md
index beb0c78..ab6fdff 100644
--- a/skills/test-data-management/SKILL.md
+++ b/skills/test-data-management/SKILL.md
@@ -1,8 +1,12 @@
 ---
 name: test-data-management
-description: Test data setup factories parametrize
-license: Proprietary
-compatibility: GitHub Copilot
+description: |
+  Test data management: parametrize tests, use factories and builders, avoid duplication and test pollution.
+  Use this skill whenever the user asks about: parametrizing tests, creating test fixtures, test data factories,
+  test builders, avoiding duplicated test setup, cleaning up test data, avoiding test pollution, seeding databases,
+  using production data in tests, making tests deterministic, isolating tests, or any other test data strategy.
+  Invoke this skill even if the user doesn't explicitly ask for "test data" — if they're talking about test setup,
+  fixtures, factories, builders, parametrization, or data isolation in tests, this is the right skill.
 ---
 
 # Test Data Management

From 0d501a370f68e4bd76e18f291c886cd931bea9e5 Mon Sep 17 00:00:00 2001
From: miroslavpojer <miroslav.pojer@absa.africa>
Date: Wed, 24 Jun 2026 16:00:39 +0200
Subject: [PATCH 5/5] fix: correct spelling of "parametrized" in
 test-data-management documentation

---
 skills/test-data-management/SKILL.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/skills/test-data-management/SKILL.md b/skills/test-data-management/SKILL.md
index ab6fdff..0d8d43c 100644
--- a/skills/test-data-management/SKILL.md
+++ b/skills/test-data-management/SKILL.md
@@ -11,10 +11,10 @@ description: |
 
 # Test Data Management
 
-## Prefer data-driven and parametrised tests
+## Prefer data-driven and parametrized tests
 
-When a behaviour must be tested with multiple input combinations, prefer parametrised tests over
-duplicated test methods. One parametrised test with a data table is clearer, easier to extend,
+When a behaviour must be tested with multiple input combinations, prefer parametrized tests over
+duplicated test methods. One parametrized test with a data table is clearer, easier to extend,
 and reduces duplication.
 
 | Language | Tool | Pattern |
@@ -25,10 +25,10 @@ and reduces duplication.
 | TypeScript | Jest `test.each` | `test.each([[input, expected]])` |
 | Java | JUnit 5 `@ParameterizedTest` | `@ParameterizedTest @MethodSource` |
 
-**Use** parametrised tests when: the same behaviour is tested with ≥ 3 input combinations, or when
+**Use** parametrized tests when: the same behaviour is tested with ≥ 3 input combinations, or when
 combinations form a clear equivalence class table.
 
-**Do not use** parametrised tests when: each case requires fundamentally different setup or
+**Do not use** parametrized tests when: each case requires fundamentally different setup or
 assertions — use separate named tests instead.
 
 ## Use factory and builder patterns
@@ -124,6 +124,6 @@ def test_order_placed_at_is_set():
 
 ## Out of scope
 
-- Choosing test doubles (mock, stub, spy, fake) — handle in your project's mocking guide
+- Choosing test doubles (mock, stub, spy, fake)
 - Writing test logic and assertions — handled separately
 - Reviewing tests for standards compliance — handled separately