fix(testing/ci/tooling): consistent unittest, venv guidance, runnable lab commands

- #9: standardize the test chain on stdlib unittest (nothing-to-install, which keeps M13's claims true and its planted bug intact). Aligned M5/M14/M16 prose, M14 lab/test_tasks.py, and ci/gitlab starters; ruff stays the only pip install. - #20: add venv / PEP 668 / which-python guidance to M20 (+ M14/M15 local installs); point MCP config at the venv's absolute python. - #21: replace M21 Part D's empty `git diff HEAD~1` with `git log -p` (no .gitignore added — device preserved). - #22: add a dependency-install step before M23's green baseline on a fresh clone. - #23: M24 reviewer/triage now tolerate code-fence-wrapped JSON (stdlib only); feature.patch trap untouched. - #28: fix M27 Part D CI snippet path (working-directory) and require the gate to target a varying candidate; swapped_model regression kept as the fixture. Closes #9 Closes #20 Closes #21 Closes #22 Closes #23 Closes #28 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01TfzV5QvtPDz8LJS3Pu5VLT
2026-06-22 16:07:47 -04:00
parent a6a3cfdc50
commit f98eacb196
17 changed files with 216 additions and 82 deletions
@@ -214,6 +214,10 @@ You're reviewing a branch that adds a `clear` command to the tasks-app. The diff
   python reviewer.py apply my-review.json
   ```

+   (If your assistant wrapped the JSON in a ```` ```json ```` code fence even though the prompt said
+   "JSON only," don't worry — `apply` tolerates a fenced or prose-wrapped response and reads the JSON
+   out of it.)
+
 4. **Make the human decision.** Open `feature.patch` and check the agent's headline claim: the
   `clear` branch in `cli.py` never calls `save(tlist)`, so it prints "cleared all tasks" while
   `tasks.json` is untouched — a silent no-op, the exact kind of plausibility trap Module 10 trained
@@ -19,6 +19,24 @@ from pathlib import Path

 HERE = Path(__file__).parent

+
+def load_json_response(path: Path):
+    """Parse the JSON the AI returned.
+
+    Chat assistants very often wrap their output in a ```json ... ``` code fence (or add a line of
+    prose) even when told to "return only the JSON" — so a strict json.loads on the raw paste fails
+    on the most likely real output. Try a strict parse first; if that fails, fall back to the
+    outermost { ... } block, which survives a code fence or surrounding text. Stdlib only."""
+    raw = path.read_text()
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        start, end = raw.find("{"), raw.rfind("}")
+        if start != -1 and end > start:
+            return json.loads(raw[start : end + 1])
+        raise
+
+
 PROMPT_HEADER = """\
 You are an assistive code reviewer. Follow the rubric below exactly, then review the diff that
 follows it. Return ONLY the JSON object the rubric specifies — no prose before or after.
@@ -40,7 +58,7 @@ def cmd_prompt(args: argparse.Namespace) -> int:

 def cmd_apply(args: argparse.Namespace) -> int:
    try:
-        review = json.loads(Path(args.response).read_text())
+        review = load_json_response(Path(args.response))
    except (json.JSONDecodeError, FileNotFoundError) as exc:
        print(f"error: could not read a JSON review from {args.response}: {exc}")
        return 1
@@ -39,6 +39,23 @@ def allowed_labels(taxonomy_text: str) -> set[str]:
    return set(LABEL_RE.findall(taxonomy_text))


+def load_json_response(path: Path):
+    """Parse the JSON the AI returned.
+
+    Chat assistants very often wrap their output in a ```json ... ``` code fence (or add a line of
+    prose) even when told to "return only the JSON" — so a strict json.loads on the raw paste fails
+    on the most likely real output. Try a strict parse first; if that fails, fall back to the
+    outermost { ... } block, which survives a code fence or surrounding text. Stdlib only."""
+    raw = path.read_text()
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        start, end = raw.find("{"), raw.rfind("}")
+        if start != -1 and end > start:
+            return json.loads(raw[start : end + 1])
+        raise
+
+
 def cmd_prompt(args: argparse.Namespace) -> int:
    taxonomy = Path(args.taxonomy).read_text()
    issue = Path(args.issue).read_text()
@@ -49,7 +66,7 @@ def cmd_prompt(args: argparse.Namespace) -> int:
 def cmd_apply(args: argparse.Namespace) -> int:
    allowed = allowed_labels(Path(args.taxonomy).read_text())
    try:
-        sug = json.loads(Path(args.response).read_text())
+        sug = load_json_response(Path(args.response))
    except (json.JSONDecodeError, FileNotFoundError) as exc:
        print(f"error: could not read a JSON suggestion from {args.response}: {exc}")
        return 1