From c22b7ce4309d8de759ebd4a740b4e36ecc07509e Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Wed, 24 Jun 2026 13:18:01 +0200 Subject: [PATCH 1/4] Initial version. --- skills/test-mocking-patterns/SKILL.md | 223 ++++++++++++++++++ skills/test-mocking-patterns/evals/evals.json | 128 ++++++++++ .../evals/files/source-audit-service.py | 25 ++ .../source-email-notification-service.py | 34 +++ .../evals/files/wrong-patch-target.py | 51 ++++ .../evals/fixture-map.md | 35 +++ .../evals/trigger-eval.json | 110 +++++++++ 7 files changed, 606 insertions(+) create mode 100644 skills/test-mocking-patterns/SKILL.md create mode 100644 skills/test-mocking-patterns/evals/evals.json create mode 100644 skills/test-mocking-patterns/evals/files/source-audit-service.py create mode 100644 skills/test-mocking-patterns/evals/files/source-email-notification-service.py create mode 100644 skills/test-mocking-patterns/evals/files/wrong-patch-target.py create mode 100644 skills/test-mocking-patterns/evals/fixture-map.md create mode 100644 skills/test-mocking-patterns/evals/trigger-eval.json diff --git a/skills/test-mocking-patterns/SKILL.md b/skills/test-mocking-patterns/SKILL.md new file mode 100644 index 0000000..5e8d367 --- /dev/null +++ b/skills/test-mocking-patterns/SKILL.md @@ -0,0 +1,223 @@ +--- +name: test-mocking-patterns +description: > + Guides selection and implementation of test doubles (mock, stub, spy, fake, dummy) in unit + tests. Activate when deciding which double type to use, implementing a specific double, + debugging a non-working mock or stub, or choosing a patching strategy (where to patch, + scope, partial mocking, resetting between tests). + Triggers on: "should I use a spy or mock", "what test double should I use", "how do I mock + this", "how to stub HTTP calls", "fake vs mock vs stub", "how to patch", "where should I + patch", "mock not being called", "my mock isn't working", "how to spy on a method", + "stub vs mock", "when to use a fake", "how to mock a class method", "how to mock an + environment variable", "how do I verify a method was called". + Does NOT trigger for: writing full test suites (use test-unit-write), reviewing test files + against standards (use test-unit-review), managing test data and fixture builders + (use test-data-management), debugging test errors (TypeError, KeyError). + Pairs with test-unit-write and test-unit-review. +license: Proprietary +compatibility: GitHub Copilot +--- + +# Test Mocking Patterns + +## Step 1 — Classify the dependency + +Determine what kind of interaction the test needs to control or verify: + +| Situation | Recommended double | +|---|---| +| Dependency returns a value; no need to verify it was called | **Stub** | +| Dependency is called for its side effect; verify it was called | **Mock** | +| Need to verify the call AND use the real return value | **Spy** | +| Stateful in-process replacement of an interface | **Fake** | +| Argument must satisfy a type signature but is never used | **Dummy** | + +> Prefer stubs over mocks when possible — stubs make fewer assumptions about how the unit under test works internally, keeping tests less brittle. + +## Step 2 — Implement by language + +### Python — pytest-mock / unittest.mock + +| Double | How | +|---|---| +| **Stub** | `mocker.patch('module.Class.method', return_value=...)` | +| **Mock** | `mocker.patch('module.Class.method')` → assert with `.assert_called_once_with(...)` | +| **Spy** | `mocker.spy(obj, 'method_name')` — calls through, records calls | +| **Fake** | Implement a minimal in-memory class satisfying the same interface | +| **Dummy** | `MagicMock()` passed in but never called | + +#### Where to patch (Python) + +Patch the name **as it is imported in the module under test**, not where the object is defined. + +```python +# ✅ — patch where 'requests' is imported inside myapp.services.user_service +mocker.patch("myapp.services.user_service.requests.get", return_value=stub_response) + +# ❌ — patching the source module has no effect on the module under test +mocker.patch("requests.get", return_value=stub_response) +``` + +#### Scope and cleanup (Python) + +`mocker` fixtures from `pytest-mock` are automatically cleaned up after each test. +For `unittest.mock.patch`, use `with patch(...)` or the `@patch` decorator — both clean up on exit. + +```python +# ✅ — mocker fixture (auto cleanup) +def test_something(mocker): + mocker.patch("myapp.service.Client.send", return_value={"ok": True}) + ... + +# ✅ — context manager (explicit cleanup) +def test_something(): + with patch("myapp.service.Client.send", return_value={"ok": True}): + ... + +# ❌ — manual patch without cleanup leaks into subsequent tests +patcher = patch("myapp.service.Client.send") +mock_send = patcher.start() +# missing patcher.stop() +``` + +#### Assertions on mocks (Python) + +```python +# ✅ — specific argument assertions +mock_send.assert_called_once_with("user@example.com", subject="Welcome") +mock_send.assert_called_with(ANY, subject=ANY) # use ANY for irrelevant args + +# ❌ — always True; proves nothing +assert mock_send.called is not None +assert mock_send.call_count >= 0 +``` + +--- + +### JavaScript / TypeScript — Jest + +| Double | How | +|---|---| +| **Stub** | `jest.fn().mockReturnValue(...)` or `jest.spyOn(obj, 'method').mockReturnValue(...)` | +| **Mock** | `jest.mock('../path/to/module')` — full module replacement | +| **Spy** | `jest.spyOn(obj, 'method')` — calls through by default, records calls | +| **Fake** | Implement a class satisfying the interface | + +#### Module mock (Jest) + +```typescript +// ✅ — jest.mock is hoisted before imports +jest.mock('../httpClient'); +import { httpClient } from '../httpClient'; + +const mockPost = httpClient.post as jest.Mock; + +beforeEach(() => { + jest.clearAllMocks(); + mockPost.mockResolvedValue({ data: { id: 'order_1' } }); +}); + +// ❌ — inline mock does not intercept the already-imported module +it('places order', async () => { + const mockPost = jest.fn().mockResolvedValue(...); // never injected +}); +``` + +#### Cleanup (Jest) + +Call `jest.clearAllMocks()` in `beforeEach` to reset call records. Use `jest.resetAllMocks()` +to also remove implementations. Use `jest.restoreAllMocks()` to restore `jest.spyOn` originals. + +```typescript +beforeEach(() => { + jest.clearAllMocks(); // reset call counts and recorded calls +}); +``` + +#### Assertions (Jest) + +```typescript +// ✅ +expect(mockSend).toHaveBeenCalledWith('user@example.com', expect.objectContaining({ subject: 'Welcome' })); +expect(mockSend).toHaveBeenCalledTimes(1); + +// ❌ — always truthy; proves nothing +expect(mockSend.mock.calls.length).not.toBe(0); +``` + +--- + +### Scala — mockito-scala + +```scala +import org.mockito.MockitoSugar._ + +// Stub +val repo = mock[UserRepository] +when(repo.findById(any[UserId])).thenReturn(Some(testUser)) + +// Mock (verify call) +val notifier = mock[Notifier] +service.process(userId) +verify(notifier).send(eqTo(userId), any[String]) + +// Spy (calls through, records) +val realCache = new InMemoryCache() +val spyCache = spy(realCache) +service.process(userId) +verify(spyCache).get(eqTo(userId)) + +// Fake +class InMemoryUserRepository extends UserRepository { + private val store = mutable.Map.empty[UserId, User] + override def findById(id: UserId): Option[User] = store.get(id) + override def save(user: User): Unit = store.update(user.id, user) +} +``` + +Reset mocks between tests with `reset(mock)` in `beforeEach`, or declare mocks inside the test +body so they are function-scoped. + +## Step 3 — Verify the double is wired correctly + +Common mistakes to check regardless of language: + +| Symptom | Likely cause | Fix | +|---|---|---| +| Mock method never called but test passes | Mock not injected into the unit under test | Confirm the unit receives the mock, not the real object | +| `assert_called` fails but call is visible in logs | Patching the wrong namespace (Python) | Patch where the name is imported, not defined | +| Mock state bleeds between tests | No cleanup between tests | Use `clearAllMocks` / `mocker` fixture / reset | +| Test is brittle — breaks on unrelated internal changes | Over-specified arguments in assertion | Use matchers (`any()`, `expect.any(String)`) for args the test doesn't depend on | +| Test directly calls a private/internal method | Private methods are implementation detail | Test private logic indirectly through the public interface — verify the side effect on the injected collaborator instead | +| Stub returns wrong type | `return_value` is a raw dict but code calls `.json()` on it | Use `MagicMock(json=lambda: {...})` or return a correctly-shaped object | + +## Mocking environment variables (Python) + +When a module reads `os.environ` at call time, use `mocker.patch.dict` or `monkeypatch.setenv` — +both are automatically cleaned up after the test: + +```python +# ✅ — patch.dict approach (pytest-mock) +def test_uses_promotions_url(mocker): + mocker.patch.dict("os.environ", {"PROMOTIONS_API_URL": "http://test-promos"}) + result = service.fetch_promotions() + assert result is not None + +# ✅ — monkeypatch approach (built-in pytest fixture) +def test_uses_promotions_url(monkeypatch): + monkeypatch.setenv("PROMOTIONS_API_URL", "http://test-promos") + result = service.fetch_promotions() + assert result is not None + +# ❌ — direct mutation leaks state into subsequent tests +os.environ["PROMOTIONS_API_URL"] = "http://test-promos" +``` + +> Prefer `monkeypatch.setenv` for single variables; `mocker.patch.dict("os.environ", {...})` when +> setting multiple keys at once. + +## Routing + +- Writing a full test suite (generating all test methods) → use **test-unit-write** +- Reviewing a test file for standards violations → use **test-unit-review** +- Managing test data setup, factories, parametrisation → use **test-data-management** diff --git a/skills/test-mocking-patterns/evals/evals.json b/skills/test-mocking-patterns/evals/evals.json new file mode 100644 index 0000000..0409dfd --- /dev/null +++ b/skills/test-mocking-patterns/evals/evals.json @@ -0,0 +1,128 @@ +{ + "skill_name": "test-mocking-patterns", + "evals": [ + { + "id": 1, + "category": "happy-path", + "prompt": "I'm writing tests for EmailNotificationService in evals/files/source-email-notification-service.py. Should I use a mock or a stub for the HTTP client? What about the SMTP client and the metrics recorder?", + "expected_output": "The HTTP client should be stubbed (it returns a value that the code uses — a preferences response — so it's a query dependency). The SMTP client should be mocked (it's called for its side effect — sending the email — and the test needs to verify it was called with the correct address and message). The metrics recorder can be a mock if the test needs to verify the metric was recorded, or a dummy MagicMock if it's irrelevant to the scenario under test.", + "files": [ + "evals/files/source-email-notification-service.py" + ], + "expectations": [ + "HTTP client is classified as a stub (query — return value used by the code)", + "SMTP client is classified as a mock (command — side effect to be verified)", + "Metrics recorder is classified as mock or dummy depending on test intent", + "Explanation distinguishes between stubs (return value control) and mocks (call verification)", + "Specific implementation guidance for Python pytest-mock is provided" + ] + }, + { + "id": 2, + "category": "regression", + "prompt": "My mock isn't being called — the test is passing but I can see the real HTTP request going out. Here's the test file: evals/files/wrong-patch-target.py", + "expected_output": "The problem is that requests.get is being patched at its source (requests.get) instead of where it is imported in the module under test. The module imports requests as http_lib, so the correct patch target is 'source_email_notification_service.http_lib.get'. Patching 'requests.get' directly has no effect because the module under test already holds a reference to the function through its local import.", + "files": [ + "evals/files/wrong-patch-target.py" + ], + "expectations": [ + "Root cause identified: patching the source module instead of the import location", + "Correct patch target 'source_email_notification_service.http_lib.get' is given", + "Explanation of why the wrong-target patch has no effect is provided", + "Fixed test code with the correct patch target is shown" + ] + }, + { + "id": 3, + "category": "happy-path", + "prompt": "For the AuditService in evals/files/source-audit-service.py, should I use a spy or a mock for audit_writer? I want to verify that the formatted string was passed correctly.", + "expected_output": "Use a mock for audit_writer — it is an injected external dependency called for its side effect (write). To verify the formatted string, assert on mock_writer.write.assert_called_once_with('[AUDIT] user=u1 action=delete resource=doc-42'). A spy is not needed here because audit_writer is not a real object being partially replaced — it is an injected collaborator. The formatting logic (_format_entry) is internal and is best tested indirectly through the mock call assertion on audit_writer.", + "files": [ + "evals/files/source-audit-service.py" + ], + "expectations": [ + "Mock is recommended for audit_writer (injected external dependency, side-effect call)", + "Assertion on write called with the formatted string is demonstrated", + "Explanation of why spy is not needed for an injected dependency is given", + "Note that _format_entry is private and should not be tested directly" + ] + }, + { + "id": 4, + "category": "happy-path", + "prompt": "How do I mock an environment variable in a Python pytest test? The service reads os.environ['PROMOTIONS_API_URL'] at call time.", + "expected_output": "Use mocker.patch.dict('os.environ', {'PROMOTIONS_API_URL': 'http://test-promos'}) or monkeypatch.setenv('PROMOTIONS_API_URL', 'http://test-promos'). Both approaches are automatically cleaned up after the test. Do not set os.environ directly in the test body — that would leak state into subsequent tests.", + "files": [], + "expectations": [ + "mocker.patch.dict or monkeypatch.setenv is recommended", + "Automatic cleanup after the test is mentioned", + "Warning against direct os.environ mutation is given", + "Example code snippet is provided" + ] + }, + { + "id": 5, + "category": "negative", + "prompt": "Write the full unit tests for EmailNotificationService including all the mocks.", + "expected_output": "This is a test-writing request that routes to test-unit-write. test-mocking-patterns owns double-selection guidance, not full test suite generation.", + "files": [ + "evals/files/source-email-notification-service.py" + ], + "expectations": [ + "Skill correctly identifies this as a write task", + "Skill either defers to test-unit-write or notes that full suite generation is handled there", + "Skill may offer mock selection guidance as a precursor but does not generate the full test file" + ] + }, + { + "id": 6, + "category": "negative", + "prompt": "Review my test file and tell me if there are any standards violations.", + "expected_output": "This is a test review request that routes to test-unit-review. test-mocking-patterns does not apply the full standards checklist.", + "files": [], + "expectations": [ + "Skill correctly identifies this as a review task", + "Skill defers to test-unit-review rather than applying mocking guidance to a review request" + ] + }, + { + "id": 7, + "category": "paraphrase", + "prompt": "I want to isolate the OrderService from the database in my tests. What do I need to do?", + "expected_output": "Agent identifies this as a test double selection question. The database is a query dependency (it returns data that the service uses) so use a stub — configure it to return specific values for each test. Use mocker.MagicMock() for the repository and configure .find_by_id.return_value = {...}. Do not use a real database in unit tests. Notes: if the goal is to test the SQL queries themselves, that is an integration test — routes to test-integration-standards.", + "files": [], + "expectations": [ + "Identifies the database as a query dependency — use a stub", + "Provides mocker.MagicMock() as the stub implementation", + "Shows .return_value configuration", + "Notes that testing SQL queries requires Testcontainers (integration)" + ] + }, + { + "id": 8, + "category": "edge-case", + "prompt": "I'm patching a method on a class but the patch doesn't seem to be working — the real method is still being called. The class is imported at the top of the module under test as 'from payments import StripeGateway'.", + "expected_output": "The patch target must be the name in the module under test, not the source module. Because the module imports StripeGateway directly via 'from payments import StripeGateway', the correct patch target is 'order_service.StripeGateway' (where order_service is the module under test), not 'payments.StripeGateway'. Patching 'payments.StripeGateway' replaces the class in the source module but the module under test already has a direct reference to the original class.", + "files": [], + "expectations": [ + "Identifies wrong patch target as the root cause", + "Explains that the correct target is the import location in the module under test", + "Provides the correct patch path: 'order_service.StripeGateway'", + "Explains why patching the source module has no effect" + ] + }, + { + "id": 9, + "category": "output-format", + "prompt": "Show me the difference between a stub and a mock with a concrete Python example for an OrderService.", + "expected_output": "The response contains two fenced ```python code blocks. The first is labelled '# Stub: replaces a query dependency' and uses .return_value. The second is labelled '# Mock: verifies a command dependency' and uses .assert_called_once_with(). The distinction between stub (query) and mock (command) is explained in prose between the two code blocks. No prose appears inside the code blocks.", + "files": [], + "expectations": [ + "Two fenced ```python code blocks", + "First block labelled as Stub using .return_value", + "Second block labelled as Mock using .assert_called_once_with()", + "Stub vs mock distinction explained in prose between blocks" + ] + } + ] +} diff --git a/skills/test-mocking-patterns/evals/files/source-audit-service.py b/skills/test-mocking-patterns/evals/files/source-audit-service.py new file mode 100644 index 0000000..2090fef --- /dev/null +++ b/skills/test-mocking-patterns/evals/files/source-audit-service.py @@ -0,0 +1,25 @@ +""" +AuditService — source file where spy vs mock distinction matters. + +audit_action() delegates to a real in-process formatter before calling +the external audit log writer. Tests need to: + a) verify the external writer is called with the formatted string (mock) + b) also confirm the formatter runs correctly and its output is used (spy candidate) +""" + + +class AuditService: + def __init__(self, audit_writer): + self._writer = audit_writer + + def audit_action(self, user_id: str, action: str, resource: str) -> None: + """Format and persist an audit log entry. + + The formatted entry is passed to the audit_writer. The formatter + is internal to this class — it is not injected. + """ + entry = self._format_entry(user_id, action, resource) + self._writer.write(entry) + + def _format_entry(self, user_id: str, action: str, resource: str) -> str: + return f"[AUDIT] user={user_id} action={action} resource={resource}" diff --git a/skills/test-mocking-patterns/evals/files/source-email-notification-service.py b/skills/test-mocking-patterns/evals/files/source-email-notification-service.py new file mode 100644 index 0000000..a96a586 --- /dev/null +++ b/skills/test-mocking-patterns/evals/files/source-email-notification-service.py @@ -0,0 +1,34 @@ +""" +EmailNotificationService — source file used to pose mocking-decision questions. + +Has three kinds of dependency: + - An HTTP client for fetching user preferences (query — return value matters) + - An SMTP client for sending email (command — side effect, no useful return value) + - A metrics recorder for tracking send counts (fire-and-forget side effect) +""" + +import requests as http_lib + + +class EmailNotificationService: + def __init__(self, http_client=None, smtp_client=None, metrics=None): + self._http = http_client or http_lib + self._smtp = smtp_client + self._metrics = metrics + + def notify(self, user_id: str, message: str) -> bool: + """Send a notification email to a user. + + Fetches the user's email address from the preferences API, sends + the email via SMTP, and records a metric. + + Returns True if the email was sent successfully. + """ + prefs = self._http.get(f"https://prefs.example.com/users/{user_id}") + if prefs.status_code != 200: + return False + + email = prefs.json()["email"] + self._smtp.send(to=email, subject="Notification", body=message) + self._metrics.increment("email.sent", tags={"user_id": user_id}) + return True diff --git a/skills/test-mocking-patterns/evals/files/wrong-patch-target.py b/skills/test-mocking-patterns/evals/files/wrong-patch-target.py new file mode 100644 index 0000000..01bed3b --- /dev/null +++ b/skills/test-mocking-patterns/evals/files/wrong-patch-target.py @@ -0,0 +1,51 @@ +""" +Test file with a wrong-patch-target bug — eval fixture for test-mocking-patterns. + +The developer is patching `requests.get` at its source module location instead of +at the location where it is imported inside the module under test. +As a result, the real HTTP call is still made and the stub is never used. +""" + +import pytest +from unittest.mock import patch, MagicMock + +# The source module imports requests directly: +# import requests as http_lib +# So the correct patch target is: +# "source_email_notification_service.http_lib.get" +# But the test patches the source module instead — this has no effect. + +from source_email_notification_service import EmailNotificationService + + +def test_notify_returns_true_for_valid_user(): + """Should return True when the preferences API returns 200 and email is sent.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"email": "alice@example.com"} + + # ❌ Wrong patch target — patching requests at its source has no effect + # on the module under test which has already imported it as http_lib. + with patch("requests.get", return_value=mock_response): + smtp_mock = MagicMock() + metrics_mock = MagicMock() + service = EmailNotificationService(smtp_client=smtp_mock, metrics=metrics_mock) + + # This will make a real HTTP call because the patch did not intercept http_lib.get + result = service.notify("user-1", "Hello!") + assert result is True + + +def test_notify_returns_false_when_prefs_api_unavailable(): + """Should return False when the preferences API returns a non-200 status.""" + mock_response = MagicMock() + mock_response.status_code = 503 + + # ❌ Same wrong patch target + with patch("requests.get", return_value=mock_response): + smtp_mock = MagicMock() + metrics_mock = MagicMock() + service = EmailNotificationService(smtp_client=smtp_mock, metrics=metrics_mock) + + result = service.notify("user-1", "Hello!") + assert result is False diff --git a/skills/test-mocking-patterns/evals/fixture-map.md b/skills/test-mocking-patterns/evals/fixture-map.md new file mode 100644 index 0000000..f996d98 --- /dev/null +++ b/skills/test-mocking-patterns/evals/fixture-map.md @@ -0,0 +1,35 @@ +# Test Mocking Patterns — Evals Fixture Map + +Links each eval test case to its fixture file(s). + +| Test ID | Category | Fixture | +|---------|--------------|---------| +| 1 | happy-path | evals/files/source-email-notification-service.py | +| 2 | regression | evals/files/wrong-patch-target.py | +| 3 | happy-path | evals/files/source-audit-service.py | +| 4 | happy-path | *(no file — env var patching question)* | +| 5 | negative | evals/files/source-email-notification-service.py | +| 6 | negative | *(no file — test review request)* | + +## Fixture → Scenario mapping + +| Fixture file | Scenario exercised | +|---|---| +| source-email-notification-service.py | Classify three different dependency types (HTTP query → stub, SMTP command → mock, metrics → mock/dummy) | +| wrong-patch-target.py | Diagnose wrong patch namespace; provide correct target | +| source-audit-service.py | Spy vs mock decision for an injected external dependency with internal formatting | + +## Coverage summary + +- happy-path: 3 +- regression: 1 +- negative: 2 +- **total: 6** + +## Trigger eval coverage + +| Direction | Count | +|---|---| +| should_trigger = true | 13 | +| should_trigger = false | 5 | +| **total** | **18** | diff --git a/skills/test-mocking-patterns/evals/trigger-eval.json b/skills/test-mocking-patterns/evals/trigger-eval.json new file mode 100644 index 0000000..a36ef25 --- /dev/null +++ b/skills/test-mocking-patterns/evals/trigger-eval.json @@ -0,0 +1,110 @@ +[ + { + "id": "m-t01-spy-or-mock", + "query": "Should I use a spy or a mock for the email client in this test?", + "should_trigger": true, + "reason": "'Should I use a spy or mock' is a primary listed trigger phrase." + }, + { + "id": "m-t02-what-test-double", + "query": "What test double should I use for the payment gateway?", + "should_trigger": true, + "reason": "'What test double should I use' is a listed trigger phrase." + }, + { + "id": "m-t03-how-to-mock", + "query": "How do I mock the database connection in these tests?", + "should_trigger": true, + "reason": "'How do I mock this' is a listed trigger phrase." + }, + { + "id": "m-t04-stub-http-calls", + "query": "How to stub HTTP calls in a pytest test?", + "should_trigger": true, + "reason": "'How to stub HTTP calls' is a listed trigger phrase." + }, + { + "id": "m-t05-fake-vs-mock", + "query": "What's the difference between a fake and a mock?", + "should_trigger": true, + "reason": "'Fake vs mock vs stub' is a listed trigger phrase." + }, + { + "id": "m-t06-how-to-patch", + "query": "How do I patch the requests library in my test?", + "should_trigger": true, + "reason": "'How to patch' is a listed trigger phrase." + }, + { + "id": "m-t07-where-to-patch", + "query": "Where should I patch the datetime.now() call — in the source or in the test?", + "should_trigger": true, + "reason": "'Where should I patch' is a listed trigger phrase." + }, + { + "id": "m-t08-mock-not-called", + "query": "My mock isn't being called — the real implementation keeps running.", + "should_trigger": true, + "reason": "'Mock not being called' and 'my mock isn't working' are listed trigger phrases." + }, + { + "id": "m-t09-mock-not-working", + "query": "The stub I set up is returning None even though I configured a return value.", + "should_trigger": true, + "reason": "Paraphrase of 'my mock isn't working' — setup/return value not taking effect." + }, + { + "id": "m-t10-spy-on-method", + "query": "How do I spy on the format_entry method without replacing it?", + "should_trigger": true, + "reason": "'How to spy on a method' is a listed trigger phrase." + }, + { + "id": "m-t11-mock-env-var", + "query": "How do I mock an environment variable in a pytest test?", + "should_trigger": true, + "reason": "'How to mock an environment variable' is a listed trigger phrase." + }, + { + "id": "m-t12-verify-method-called", + "query": "How do I verify that a method was called with specific arguments in Jest?", + "should_trigger": true, + "reason": "'How do I verify a method was called' is a listed trigger phrase." + }, + { + "id": "m-t13-dont-own-type", + "query": "Should I mock the boto3 S3 client directly or wrap it?", + "should_trigger": true, + "reason": "Mocking a type you don't own — covered by the 'wrap third-party libraries' rule." + }, + { + "id": "m-n01-write-full-tests", + "query": "Write unit tests for the PaymentService class.", + "should_trigger": false, + "reason": "Test-writing request — routes to test-unit-write." + }, + { + "id": "m-n02-review-tests", + "query": "Review this test file and tell me if it follows our standards.", + "should_trigger": false, + "reason": "Test review request — routes to test-unit-review." + }, + { + "id": "m-n03-test-data-factory", + "query": "How should I organise test data using factory functions?", + "should_trigger": false, + "reason": "Test data strategy — routes to test-data-management." + }, + { + "id": "m-n04-why-test-failing", + "query": "Why is my test throwing KeyError on the mock return value?", + "should_trigger": false, + "reason": "Runtime debugging of a specific error — not a double-selection or patching question." + }, + { + "id": "m-n05-parametrize", + "query": "How do I use parametrize to run the same test with different inputs?", + "should_trigger": false, + "reason": "Parametrised test data — routes to test-data-management." + } +] From 51e4ac8824f86df6e4997250325ec9df9011c12d Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Wed, 24 Jun 2026 13:29:42 +0200 Subject: [PATCH 2/4] feat: add test-mocking-patterns skill with comprehensive guidance and examples --- README.md | 1 + docs/README.md | 1 + docs/test-mocking-patterns.md | 115 ++++++++++++++++++ skills/test-mocking-patterns/SKILL.md | 106 ++++++++++++++-- skills/test-mocking-patterns/evals/evals.json | 41 +++++++ .../evals/fixture-map.md | 25 ++-- .../evals/trigger-eval.json | 18 +++ 7 files changed, 286 insertions(+), 21 deletions(-) create mode 100644 docs/test-mocking-patterns.md diff --git a/README.md b/README.md index 6b1f500..7e870f9 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ its purpose, trigger phrases, and full instructions. | Skill | Description | |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| | **[pr-review](./skills/pr-review/)** | Pull request code review — reviews diffs for risk, security issues, API contract changes, dependency bumps, CI/CD and infrastructure changes. Produces concise Blocker / Important / Nit comments. | +| **[test-mocking-patterns](./skills/test-mocking-patterns/)** | Test double selection and implementation — classifies mock, stub, spy, fake, and dummy; guides patching strategy and cleanup for Python (pytest-mock), JavaScript/TypeScript (Jest), and Scala (mockito-scala). | | **[token-saving](./skills/token-saving/)** | Always-active response discipline — enforces brevity, no filler openers or closers, structured output, and a What/Why/How footer on code responses. Suspends on explicit "full detail" requests. | ## Finding More Skills diff --git a/docs/README.md b/docs/README.md index 1388ae5..58ecd7b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -25,6 +25,7 @@ Navigation hub for all guides in this repository. Browse by category below. | Guide | Description | |----|----| | [PR Review](./pr-review.md) | How the PR review skill works, what sections it applies, and how to trigger it | +| [Test Mocking Patterns](./test-mocking-patterns.md) | Double selection, patching strategies, cleanup, and language-specific guidance | | [Token Saving](./token-saving.md) | Keeping AI responses concise — how the token-saving skill works and when it applies | > **Keep this index up to date.** When you add a new guide, add a row to the appropriate table above. diff --git a/docs/test-mocking-patterns.md b/docs/test-mocking-patterns.md new file mode 100644 index 0000000..dfa1b80 --- /dev/null +++ b/docs/test-mocking-patterns.md @@ -0,0 +1,115 @@ +# Test Mocking Patterns Skill + +The `test-mocking-patterns` skill guides test double selection and implementation. It activates when you need to decide which double to use (mock, stub, spy, fake, dummy), implement a double in Python, JavaScript/TypeScript, or Scala, or diagnose a mock that isn't working. + +--- + +## What it does + +Given a dependency in the code under test, the skill: + +1. **Classifies the dependency** — query (return value matters) vs command (side effect to verify) vs fire-and-forget +2. **Recommends the right double** — stub, mock, spy, fake, or dummy based on intent +3. **Provides implementation guidance** — correct patching paths, library calls, and cleanup +4. **Diagnoses broken mocks** — wrong patch target, missing cleanup, over-specified assertions, wrong return type + +--- + +## Double selection quick reference + +| Dependency interaction | Recommended double | +|---|---| +| Returns a value the unit under test uses; no need to verify the call | **Stub** | +| Called for a side effect; test must verify it was called | **Mock** | +| Need to verify the call AND preserve the real return value | **Spy** | +| Stateful in-process replacement of an interface | **Fake** | +| Required by the type signature but never invoked | **Dummy** | + +> Prefer stubs over mocks — stubs make fewer assumptions about internal behaviour, keeping tests less brittle. + +--- + +## Languages covered + +| Language | Libraries | +|---|---| +| Python | `pytest-mock`, `unittest.mock`, `responses`, `pytest-httpx`, `freezegun` | +| JavaScript / TypeScript | Jest (`jest.fn()`, `jest.mock()`, `jest.spyOn()`) | +| Scala | mockito-scala | + +--- + +## How to trigger it + +Ask naturally — the skill fires on intent: + +``` +should I use a mock or stub for the HTTP client? +what test double should I use for the payment gateway? +my mock isn't being called — the real implementation runs instead +where should I patch requests.get? +how do I mock an environment variable? +how do I verify a method was called with specific arguments? +should I mock the boto3 client directly or wrap it? +how do I freeze time in a pytest test? +``` + +> **Does NOT trigger** for writing full test suites (use `test-unit-write`), reviewing test files for standards violations (use `test-unit-review`), or managing test data and fixtures (use `test-data-management`). + +--- + +## Common patching mistakes + +| Symptom | Likely cause | Fix | +|---|---|---| +| Mock is never called; real code runs | Mock not injected, or wrong patch target | Confirm the unit receives the mock; patch where the name is imported | +| `assert_called` fails but real call visible in logs | Patching the source module, not the import location | Patch `mymodule.requests.get`, not `requests.get` | +| Mock state bleeds between tests | No cleanup | Use `mocker` fixture (Python) or `jest.clearAllMocks()` in `beforeEach` | +| Test breaks on unrelated internal changes | Over-specified assertions | Use `ANY` / `expect.any()` for irrelevant args | + +--- + +## Key principles + +### Patch where the name is imported (Python) + +```python +# Module under test: import requests as http_lib +# ✅ — patch the name in the module under test +mocker.patch("myapp.service.http_lib.get", return_value=stub_response) + +# ❌ — patching the source has no effect on the already-imported alias +mocker.patch("requests.get", return_value=stub_response) +``` + +### Don't mock what you don't own + +Avoid mocking third-party types (boto3, SQLAlchemy sessions, gRPC stubs) directly. Wrap them in a thin interface you control and mock that interface instead. The integration test verifying the real connector belongs in an integration test, not a unit test. + +### Use freezegun for datetime (Python) + +`datetime` is a C extension — patching it directly is fragile. Use `freezegun`: + +```python +from freezegun import freeze_time + +@freeze_time("2025-01-15 12:00:00") +def test_timestamp_is_fixed(): + ... +``` + +--- + +## Installation + +The skill is installed with the rest of the toolkit: + +```bash +npx skills add https://github.com/AbsaOSS/agentic-toolkit -g +``` + +To install only this skill: + +```bash +npx skills add https://github.com/AbsaOSS/agentic-toolkit -g --skill test-mocking-patterns +``` diff --git a/skills/test-mocking-patterns/SKILL.md b/skills/test-mocking-patterns/SKILL.md index 5e8d367..56e5ab3 100644 --- a/skills/test-mocking-patterns/SKILL.md +++ b/skills/test-mocking-patterns/SKILL.md @@ -1,19 +1,12 @@ --- name: test-mocking-patterns description: > - Guides selection and implementation of test doubles (mock, stub, spy, fake, dummy) in unit - tests. Activate when deciding which double type to use, implementing a specific double, - debugging a non-working mock or stub, or choosing a patching strategy (where to patch, - scope, partial mocking, resetting between tests). - Triggers on: "should I use a spy or mock", "what test double should I use", "how do I mock - this", "how to stub HTTP calls", "fake vs mock vs stub", "how to patch", "where should I - patch", "mock not being called", "my mock isn't working", "how to spy on a method", - "stub vs mock", "when to use a fake", "how to mock a class method", "how to mock an - environment variable", "how do I verify a method was called". - Does NOT trigger for: writing full test suites (use test-unit-write), reviewing test files - against standards (use test-unit-review), managing test data and fixture builders - (use test-data-management), debugging test errors (TypeError, KeyError). - Pairs with test-unit-write and test-unit-review. + Test double selection and implementation (mock, stub, spy, fake, dummy). Activate when + choosing a double type, implementing it, or debugging a broken mock or patch. + Triggers on: "spy or mock", "what test double", "how to stub HTTP", "fake vs mock vs stub", + "where to patch", "mock not being called", "mock env var", "verify method called". + Not for: writing tests (test-unit-write), test file review (test-unit-review), + test data (test-data-management). Pairs with test-unit-write, test-unit-review. license: Proprietary compatibility: GitHub Copilot --- @@ -92,6 +85,55 @@ assert mock_send.called is not None assert mock_send.call_count >= 0 ``` +#### HTTP stubbing (Python) + +For code that makes HTTP calls, prefer a dedicated interceptor library over patching `requests` by path: + +| Library | Works with | When to prefer | +|---|---|---| +| `responses` | `requests` | Your codebase uses `requests`; avoids maintaining a patch path | +| `pytest-httpx` | `httpx` | Your codebase uses `httpx` | + +```python +# ✅ — responses library: intercepted by URL, no patch path to maintain +import responses as rsps + +@rsps.activate +def test_fetch_preferences_returns_email(): + rsps.add(rsps.GET, "https://prefs.example.com/users/u1", + json={"email": "alice@example.com"}, status=200) + result = service.fetch_preferences("u1") + assert result["email"] == "alice@example.com" +``` + +Fall back to `mocker.patch` only when the HTTP client is injected and a library-level interceptor is unavailable. + +#### Mocking datetime / time (Python) + +When code calls `datetime.now()`, `date.today()`, or `time.time()`, use `freezegun` to fix the +clock rather than patching `datetime` directly — `datetime` is a C extension and partial patching +it is fragile: + +```python +from freezegun import freeze_time + +# ✅ — freeze_time decorator pins datetime.now() for the whole test +@freeze_time("2025-01-15 12:00:00") +def test_audit_entry_includes_timestamp(): + service = AuditService(writer=MagicMock()) + service.audit_action("u1", "delete", "doc-42") + # assertions on timestamp-dependent output here + +# ✅ — freeze_time context manager for a narrower scope +def test_report_uses_today(): + with freeze_time("2025-01-15"): + result = report_service.generate_daily_report() + assert result["date"] == "2025-01-15" + +# ❌ — patching datetime.datetime is fragile; fails for C-extension datetime +mocker.patch("myapp.service.datetime.datetime", ...) +``` + --- ### JavaScript / TypeScript — Jest @@ -216,8 +258,46 @@ os.environ["PROMOTIONS_API_URL"] = "http://test-promos" > Prefer `monkeypatch.setenv` for single variables; `mocker.patch.dict("os.environ", {...})` when > setting multiple keys at once. +## Don't mock what you don't own + +Avoid mocking types defined in third-party libraries (AWS SDKs, database drivers, gRPC stubs, +ORM sessions). Your test becomes coupled to SDK internals that can change without notice, and the +mock no longer represents what the real object does. + +Wrap the third-party type in a thin interface you control, then mock that interface: + +```python +# ❌ — mocking boto3 internals; test is coupled to AWS SDK response shape +mock_s3 = MagicMock() +mock_s3.put_object.return_value = {"ResponseMetadata": {"HTTPStatusCode": 200}} + +# ✅ — define a thin protocol you own; mock that instead +from typing import Protocol + +class StorageClient(Protocol): + def upload(self, key: str, data: bytes) -> None: ... + +class S3StorageClient: + def __init__(self, bucket: str): + self._s3 = boto3.client("s3") + self._bucket = bucket + + def upload(self, key: str, data: bytes) -> None: + self._s3.put_object(Bucket=self._bucket, Key=key, Body=data) + +# In unit tests: mock StorageClient — your interface, your contract +mock_storage = MagicMock(spec=StorageClient) +``` + +The same principle applies to: database driver cursors, gRPC channel stubs, ORM session objects, +and any external SDK type. + +> The integration test verifying that `S3StorageClient.upload` connects correctly to S3 or a +> LocalStack endpoint belongs in an integration test, not a unit test. + ## Routing - Writing a full test suite (generating all test methods) → use **test-unit-write** - Reviewing a test file for standards violations → use **test-unit-review** - Managing test data setup, factories, parametrisation → use **test-data-management** +- Testing that a real external dependency works (database queries, S3, gRPC) → use **test-integration-standards** diff --git a/skills/test-mocking-patterns/evals/evals.json b/skills/test-mocking-patterns/evals/evals.json index 0409dfd..c391b48 100644 --- a/skills/test-mocking-patterns/evals/evals.json +++ b/skills/test-mocking-patterns/evals/evals.json @@ -123,6 +123,47 @@ "Second block labelled as Mock using .assert_called_once_with()", "Stub vs mock distinction explained in prose between blocks" ] + }, + { + "id": 10, + "category": "happy-path", + "prompt": "I'm using Jest and my mocked function's call count keeps bleeding from one test into the next. I have `jest.mock('../emailClient')` at the top of the file and `const mockSend = emailClient.send as jest.Mock`. How do I fix the bleed?", + "expected_output": "Add `jest.clearAllMocks()` in a `beforeEach` block. This resets call counts and recorded arguments but keeps mock implementations in place. If you also want to remove implementations, use `jest.resetAllMocks()`. If you used `jest.spyOn()` and want to restore the original function, use `jest.restoreAllMocks()`. The difference matters: clearAllMocks keeps the mock working for the next test; resetAllMocks forces you to re-configure return values in each test.", + "files": [], + "expectations": [ + "jest.clearAllMocks() in beforeEach is the primary recommendation", + "Distinction between clearAllMocks, resetAllMocks, and restoreAllMocks is explained", + "Example beforeEach block is shown", + "Explanation of why call counts bleed without cleanup" + ] + }, + { + "id": 11, + "category": "edge-case", + "prompt": "Should I mock the boto3 S3 client directly in my unit tests, or is there a better approach?", + "expected_output": "Do not mock boto3 directly — it is a third-party type you don't own. Mocking it couples the test to AWS SDK internals that can change without notice. Instead, define a thin interface (e.g. a StorageClient Protocol) that your application code depends on, and mock that. Provide a concrete S3StorageClient implementation wrapping boto3 for production. The unit test mocks the interface; a separate integration test (using LocalStack or moto) verifies the real S3 connection.", + "files": [], + "expectations": [ + "Direct boto3 mocking is identified as problematic", + "The 'don't mock what you don't own' principle is stated", + "A thin wrapper interface (Protocol) is recommended", + "Integration test context (LocalStack / moto) is mentioned for the real connectivity test", + "Python example with spec=StorageClient or Protocol shown" + ] + }, + { + "id": 12, + "category": "happy-path", + "prompt": "My service calls datetime.now() to timestamp an audit entry. How do I control the timestamp in a pytest test so the assertion doesn't depend on wall-clock time?", + "expected_output": "Use the `freezegun` library. Apply `@freeze_time('2025-01-15 12:00:00')` as a decorator on the test function, or use `with freeze_time('2025-01-15'):` as a context manager for narrower scope. Do not try to patch `datetime.datetime` directly — datetime is a C extension and partial patching is fragile. With freeze_time in place, any call to datetime.now(), date.today(), or time.time() inside the test returns the frozen value.", + "files": [], + "expectations": [ + "freezegun is recommended as the primary tool", + "@freeze_time decorator example is shown", + "Context manager alternative is shown", + "Warning against patching datetime directly is given", + "Explanation of why freezegun works reliably (handles C extension)" + ] } ] } diff --git a/skills/test-mocking-patterns/evals/fixture-map.md b/skills/test-mocking-patterns/evals/fixture-map.md index f996d98..7ea0abc 100644 --- a/skills/test-mocking-patterns/evals/fixture-map.md +++ b/skills/test-mocking-patterns/evals/fixture-map.md @@ -10,26 +10,35 @@ Links each eval test case to its fixture file(s). | 4 | happy-path | *(no file — env var patching question)* | | 5 | negative | evals/files/source-email-notification-service.py | | 6 | negative | *(no file — test review request)* | +| 7 | paraphrase | *(no file — database isolation question)* | +| 8 | edge-case | *(no file — from...import patch target question)* | +| 9 | output-format | *(no file — stub vs mock code output format)* | +| 10 | happy-path | *(no file — Jest clearAllMocks question)* | +| 11 | edge-case | *(no file — don't mock what you don't own / boto3)* | +| 12 | happy-path | *(no file — freezegun datetime mocking)* | ## Fixture → Scenario mapping | Fixture file | Scenario exercised | |---|---| -| source-email-notification-service.py | Classify three different dependency types (HTTP query → stub, SMTP command → mock, metrics → mock/dummy) | +| source-email-notification-service.py | Classify three different dependency types (HTTP query → stub, SMTP command → mock, metrics → mock/dummy); negative routing for test-unit-write | | wrong-patch-target.py | Diagnose wrong patch namespace; provide correct target | | source-audit-service.py | Spy vs mock decision for an injected external dependency with internal formatting | ## Coverage summary -- happy-path: 3 -- regression: 1 -- negative: 2 -- **total: 6** +- happy-path: 5 (1, 3, 4, 10, 12) +- regression: 1 (2) +- negative: 2 (5, 6) +- paraphrase: 1 (7) +- edge-case: 2 (8, 11) +- output-format: 1 (9) +- **total: 12** ## Trigger eval coverage | Direction | Count | |---|---| -| should_trigger = true | 13 | -| should_trigger = false | 5 | -| **total** | **18** | +| should_trigger = true | 15 | +| should_trigger = false | 6 | +| **total** | **21** | diff --git a/skills/test-mocking-patterns/evals/trigger-eval.json b/skills/test-mocking-patterns/evals/trigger-eval.json index a36ef25..ab82be0 100644 --- a/skills/test-mocking-patterns/evals/trigger-eval.json +++ b/skills/test-mocking-patterns/evals/trigger-eval.json @@ -106,5 +106,23 @@ "query": "How do I use parametrize to run the same test with different inputs?", "should_trigger": false, "reason": "Parametrised test data — routes to test-data-management." + }, + { + "id": "m-t14-freeze-time", + "query": "How do I freeze time in a pytest test so datetime.now() returns a fixed value?", + "should_trigger": true, + "reason": "Controlling datetime in tests is a mocking / patching concern covered by the freezegun section." + }, + { + "id": "m-t15-dont-own-orm", + "query": "How should I handle the SQLAlchemy session in my unit tests — should I mock it?", + "should_trigger": true, + "reason": "Paraphrase of 'don't mock what you don't own' for ORM session objects — covered by the wrapping pattern." + }, + { + "id": "m-n06-real-s3-test", + "query": "How do I test that my S3 upload actually works against a real bucket?", + "should_trigger": false, + "reason": "Integration test against real infrastructure — routes to test-integration-standards, not double selection." } ] From a4bae859aa6c2594b65ee53f78eff1f30a4ed281 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Wed, 24 Jun 2026 13:53:21 +0200 Subject: [PATCH 3/4] feat: enhance description for test-mocking-patterns skill with detailed guidance --- skills/test-mocking-patterns/SKILL.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/skills/test-mocking-patterns/SKILL.md b/skills/test-mocking-patterns/SKILL.md index 56e5ab3..f0d07df 100644 --- a/skills/test-mocking-patterns/SKILL.md +++ b/skills/test-mocking-patterns/SKILL.md @@ -1,12 +1,7 @@ --- name: test-mocking-patterns description: > - Test double selection and implementation (mock, stub, spy, fake, dummy). Activate when - choosing a double type, implementing it, or debugging a broken mock or patch. - Triggers on: "spy or mock", "what test double", "how to stub HTTP", "fake vs mock vs stub", - "where to patch", "mock not being called", "mock env var", "verify method called". - Not for: writing tests (test-unit-write), test file review (test-unit-review), - test data (test-data-management). Pairs with test-unit-write, test-unit-review. + Test double selection and implementation patterns. Use when choosing between mock, stub, spy, fake, or dummy; implementing test doubles; fixing a broken mock or patch target; controlling mocking scope; or troubleshooting why a mock isn't being called. Covers: how to classify dependencies (query vs command), where to patch (patch where it's imported, not where defined), patching with unittest.mock, Jest mocks, Scala mockito, freezegun for time control, and mocking environment variables. Not for: writing complete test suites (test-unit-write), reviewing tests for standards violations (test-unit-review), managing test data and fixtures (test-data-management), or testing actual external services (test-integration-standards). Pairs with test-unit-write, test-unit-review, and test-data-management. license: Proprietary compatibility: GitHub Copilot --- From 1cea09b35c086db54e0d0bcb93b3a2dd61afda51 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Wed, 24 Jun 2026 13:59:58 +0200 Subject: [PATCH 4/4] feat: update description for test-mocking-patterns skill and remove license and compatibility sections --- skills/test-mocking-patterns/SKILL.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/skills/test-mocking-patterns/SKILL.md b/skills/test-mocking-patterns/SKILL.md index f0d07df..697d2e9 100644 --- a/skills/test-mocking-patterns/SKILL.md +++ b/skills/test-mocking-patterns/SKILL.md @@ -1,9 +1,7 @@ --- name: test-mocking-patterns description: > - Test double selection and implementation patterns. Use when choosing between mock, stub, spy, fake, or dummy; implementing test doubles; fixing a broken mock or patch target; controlling mocking scope; or troubleshooting why a mock isn't being called. Covers: how to classify dependencies (query vs command), where to patch (patch where it's imported, not where defined), patching with unittest.mock, Jest mocks, Scala mockito, freezegun for time control, and mocking environment variables. Not for: writing complete test suites (test-unit-write), reviewing tests for standards violations (test-unit-review), managing test data and fixtures (test-data-management), or testing actual external services (test-integration-standards). Pairs with test-unit-write, test-unit-review, and test-data-management. -license: Proprietary -compatibility: GitHub Copilot + Test double selection and implementation patterns. Use when choosing between mock, stub, spy, fake, or dummy; implementing test doubles; fixing a broken mock or patch target; controlling mocking scope; or troubleshooting why a mock isn't being called. Covers: how to classify dependencies (stub vs mock vs spy), where to patch (patch where it's imported, not where defined), patching with unittest.mock, Jest mocks, Scala mockito, freezegun for time control, and mocking environment variables. Not for: writing complete test suites (test-unit-write), reviewing tests for standards violations (test-unit-review), managing test data and fixtures (test-data-management), or testing actual external services (test-integration-standards). Pairs with test-unit-write, test-unit-review, and test-data-management. --- # Test Mocking Patterns