Testing/CI/tooling consistency (#9,#20,#21,#22,#23,#28) (#59)
Co-authored-by: claude <claude@jpaul.io> Co-committed-by: claude <claude@jpaul.io>
This commit was merged in pull request #59.
This commit is contained in:
@@ -214,6 +214,10 @@ You're reviewing a branch that adds a `clear` command to the tasks-app. The diff
|
||||
python reviewer.py apply my-review.json
|
||||
```
|
||||
|
||||
(If your assistant wrapped the JSON in a ```` ```json ```` code fence even though the prompt said
|
||||
"JSON only," don't worry — `apply` tolerates a fenced or prose-wrapped response and reads the JSON
|
||||
out of it.)
|
||||
|
||||
4. **Make the human decision.** Open `feature.patch` and check the agent's headline claim: the
|
||||
`clear` branch in `cli.py` never calls `save(tlist)`, so it prints "cleared all tasks" while
|
||||
`tasks.json` is untouched — a silent no-op, the exact kind of plausibility trap Module 10 trained
|
||||
|
||||
@@ -19,6 +19,24 @@ from pathlib import Path
|
||||
|
||||
HERE = Path(__file__).parent
|
||||
|
||||
|
||||
def load_json_response(path: Path):
|
||||
"""Parse the JSON the AI returned.
|
||||
|
||||
Chat assistants very often wrap their output in a ```json ... ``` code fence (or add a line of
|
||||
prose) even when told to "return only the JSON" — so a strict json.loads on the raw paste fails
|
||||
on the most likely real output. Try a strict parse first; if that fails, fall back to the
|
||||
outermost { ... } block, which survives a code fence or surrounding text. Stdlib only."""
|
||||
raw = path.read_text()
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
start, end = raw.find("{"), raw.rfind("}")
|
||||
if start != -1 and end > start:
|
||||
return json.loads(raw[start : end + 1])
|
||||
raise
|
||||
|
||||
|
||||
PROMPT_HEADER = """\
|
||||
You are an assistive code reviewer. Follow the rubric below exactly, then review the diff that
|
||||
follows it. Return ONLY the JSON object the rubric specifies — no prose before or after.
|
||||
@@ -40,7 +58,7 @@ def cmd_prompt(args: argparse.Namespace) -> int:
|
||||
|
||||
def cmd_apply(args: argparse.Namespace) -> int:
|
||||
try:
|
||||
review = json.loads(Path(args.response).read_text())
|
||||
review = load_json_response(Path(args.response))
|
||||
except (json.JSONDecodeError, FileNotFoundError) as exc:
|
||||
print(f"error: could not read a JSON review from {args.response}: {exc}")
|
||||
return 1
|
||||
|
||||
@@ -39,6 +39,23 @@ def allowed_labels(taxonomy_text: str) -> set[str]:
|
||||
return set(LABEL_RE.findall(taxonomy_text))
|
||||
|
||||
|
||||
def load_json_response(path: Path):
|
||||
"""Parse the JSON the AI returned.
|
||||
|
||||
Chat assistants very often wrap their output in a ```json ... ``` code fence (or add a line of
|
||||
prose) even when told to "return only the JSON" — so a strict json.loads on the raw paste fails
|
||||
on the most likely real output. Try a strict parse first; if that fails, fall back to the
|
||||
outermost { ... } block, which survives a code fence or surrounding text. Stdlib only."""
|
||||
raw = path.read_text()
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
start, end = raw.find("{"), raw.rfind("}")
|
||||
if start != -1 and end > start:
|
||||
return json.loads(raw[start : end + 1])
|
||||
raise
|
||||
|
||||
|
||||
def cmd_prompt(args: argparse.Namespace) -> int:
|
||||
taxonomy = Path(args.taxonomy).read_text()
|
||||
issue = Path(args.issue).read_text()
|
||||
@@ -49,7 +66,7 @@ def cmd_prompt(args: argparse.Namespace) -> int:
|
||||
def cmd_apply(args: argparse.Namespace) -> int:
|
||||
allowed = allowed_labels(Path(args.taxonomy).read_text())
|
||||
try:
|
||||
sug = json.loads(Path(args.response).read_text())
|
||||
sug = load_json_response(Path(args.response))
|
||||
except (json.JSONDecodeError, FileNotFoundError) as exc:
|
||||
print(f"error: could not read a JSON suggestion from {args.response}: {exc}")
|
||||
return 1
|
||||
|
||||
Reference in New Issue
Block a user