Files
ai-workflow-course/modules/25-autonomous-agents/lab/agent_runner.py
T
claude f925fd9645 fix(M7-27+capstone): apply AI-drives-git reframe, lesson=theory, de-slop course-wide
Phase 2 sweep — all modules are post-pivot, so the learner directs the AI agent
(Claude Code as the worked example) to do the git/setup work and verifies, instead
of typing commands by hand; no re-teaching basics. Lesson sections are theory with
example output; all execution lives in the labs. De-slopped ("prose" etc. gone
course-wide, em-dash density thinned). /path/to placeholders -> ~/ai-workflow-course.

Every deliberate teaching device verified intact: M10 ai-change.patch trap,
M12 bad-clear-snippet, M13/M27 planted pending_count bug, M15 secret+typosquat+MD5,
M18 BREAK=1, M21 absent-.gitignore, M22 poisoned skill, M24 no-op patch, M25 --simulate.
Labs compile/parse (py/sh/yaml/json); no junk.

Closes #83
Closes #86
Closes #89

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01TfzV5QvtPDz8LJS3Pu5VLT
2026-06-22 21:58:17 -04:00

288 lines
13 KiB
Python

"""Module 25 lab — an autonomous-but-supervised agent orchestrator.
This is the smallest honest version of the two patterns in the module:
* issue-to-pr — read an issue, let an agent implement it, run the gate, produce a PR PROPOSAL.
* self-heal — run the gate; on failure, feed the failure back to the agent for a fix,
bounded by a retry cap; produce a PR PROPOSAL.
The load-bearing idea is in one place and you should be able to point at it: the agent NEVER merges.
Every path ends at `propose_pr()` — a branch, a commit, and the command *you* would run to open the
PR. The CI/review/security gates (Modules 14/15/10) and recovery (Module 12) are what supervise it,
not a human watching it type.
Run it two ways:
1. Simulated (no agent needed, fully deterministic) — see the machinery and the gates:
python agent_runner.py issue-to-pr issue-delete-command.md --simulate good
python agent_runner.py issue-to-pr issue-delete-command.md --simulate bad
python agent_runner.py self-heal --simulate bad
python agent_runner.py self-heal --simulate stuck
Simulation works on a SELF-CONTAINED demo target (agent_demo.py + test_agent_demo.py) so it is
deterministic and never corrupts your real tasks-app files. The gate it runs (ruff + pytest) is
the real one — the same checks Module 14's CI runs.
2. Real agent — drives your own agentic tool against the actual issue. Point AGENT_CMD at your
tool's non-interactive / one-shot mode, then drop --simulate:
export AGENT_CMD='your-agent-cli --print --prompt-file {prompt_file}'
python agent_runner.py issue-to-pr issue-delete-command.md
Language: Python 3.10+. Standard library only.
"""
from __future__ import annotations
import argparse
import os
import shlex
import subprocess
import sys
import tempfile
from pathlib import Path
RETRY_CAP = 3 # self-healing stops after this many fix attempts and hands off to a human.
# Demo target the simulator works on, so simulation never touches your real cli.py / tasks.py.
DEMO_SRC = Path("agent_demo.py")
DEMO_TEST = Path("test_agent_demo.py")
# Vendor-neutral: where your committed AI config (Module 5) might live. Override with AGENT_CONFIG.
CONFIG_CANDIDATES = ["AGENTS.md", ".agent/instructions.md", "agent-config.md"]
# --------------------------------------------------------------------------------------------------
# The gate — the same lint + test checks Module 14 runs in CI, run locally so they're reproducible.
# This is the structural supervision. It does not care whether a human or an agent wrote the change.
# --------------------------------------------------------------------------------------------------
def run_gate() -> tuple[bool, str]:
"""Run ruff then pytest in the current directory. Return (passed, combined_output)."""
out: list[str] = []
ok = True
for label, cmd in (("ruff (lint)", ["ruff", "check", "."]),
("pytest (tests)", ["pytest", "-q"])):
out.append(f"\n=== gate: {label} -> {' '.join(cmd)} ===")
try:
proc = subprocess.run(cmd, capture_output=True, text=True)
except FileNotFoundError:
out.append(f" ! {cmd[0]} not installed — `pip install pytest ruff`. Treating as a gate FAIL.")
ok = False
continue
out.append(proc.stdout.rstrip())
if proc.stderr.strip():
out.append(proc.stderr.rstrip())
if proc.returncode != 0:
ok = False
out.append(f" -> FAILED ({label})")
return ok, "\n".join(line for line in out if line is not None)
# --------------------------------------------------------------------------------------------------
# The agent — real (your tool) or simulated (deterministic, for the lab).
# --------------------------------------------------------------------------------------------------
def find_config() -> Path | None:
env = os.environ.get("AGENT_CONFIG")
if env and Path(env).exists():
return Path(env)
for name in CONFIG_CANDIDATES:
if Path(name).exists():
return Path(name)
return None
def build_prompt(task: str, *, issue_path: Path | None = None, failure: str | None = None) -> str:
"""Assemble the agent's brief: standing config (Module 5) + the specific task (issue or failure)."""
parts = ["You are working in a Git repository on the current branch. Make the change directly in",
"the files. Do not commit, push, or merge — just edit. Follow the project's conventions."]
config = find_config()
if config:
parts += ["", f"# Project conventions (from {config})", config.read_text()]
if issue_path:
parts += ["", "# Task (issue to implement)", issue_path.read_text()]
if failure:
parts += ["", "# A CI check just failed. Fix the CODE so it passes — do not weaken or delete",
"# the test to make it pass. Here is the failing output:", "```", failure, "```"]
return "\n".join(parts)
def run_real_agent(prompt: str) -> None:
"""Drive the learner's agentic tool via AGENT_CMD. Template may contain {prompt_file}; otherwise
the prompt is piped to stdin. Kept vendor-neutral on purpose."""
template = os.environ["AGENT_CMD"]
with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False) as fh:
fh.write(prompt)
prompt_file = fh.name
try:
if "{prompt_file}" in template:
cmd = shlex.split(template.replace("{prompt_file}", prompt_file))
proc = subprocess.run(cmd)
else:
proc = subprocess.run(shlex.split(template), input=prompt, text=True)
if proc.returncode != 0:
sys.exit(f"agent command exited non-zero ({proc.returncode}); aborting.")
finally:
os.unlink(prompt_file)
# Simulated agent: writes a self-contained demo module so the gate has something real to judge.
def simulate_implement(variant: str) -> None:
DEMO_TEST.write_text(
"from agent_demo import discount\n\n\n"
"def test_discount_takes_a_percentage():\n"
" # 10% off 200 is 180. A flat subtraction (200 - 10 = 190) is the plausible-but-wrong bug.\n"
" assert discount(200, 10) == 180\n"
)
if variant == "good":
DEMO_SRC.write_text("def discount(price, pct):\n return price - price * pct / 100\n")
else: # 'bad' — plausible but wrong: treats the percent as a flat amount.
DEMO_SRC.write_text("def discount(price, pct):\n return price - pct\n")
def simulate_fix(variant: str, attempt: int) -> None:
if variant == "stuck":
# The "agent" keeps producing plausible, still-wrong fixes — the loop must give up, not run forever.
DEMO_SRC.write_text(f"def discount(price, pct):\n return price - pct - {attempt}\n")
else: # 'bad' — converges on the second attempt with the correct formula.
DEMO_SRC.write_text("def discount(price, pct):\n return price - price * pct / 100\n")
def simulate_cleanup() -> None:
"""Discard the simulator's demo artifacts. These are UNTRACKED new files, so `git restore`
(which only touches tracked files) can't remove them — the simulator cleans up after itself."""
for path in (DEMO_SRC, DEMO_TEST):
path.unlink(missing_ok=True)
# --------------------------------------------------------------------------------------------------
# The endpoint every path shares: a PR PROPOSAL. Never a merge.
# --------------------------------------------------------------------------------------------------
def in_git_repo() -> bool:
return subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
capture_output=True).returncode == 0
def ensure_branch(name: str) -> None:
"""Create and switch to the agent's working branch. The orchestrator owns this git step the same
way agent-job.yml's runner does (`git switch -c`) — you direct the automation and then verify the
branch (`git branch`), instead of typing `git checkout` by hand. No-op outside a Git repo."""
if not in_git_repo():
return
exists = subprocess.run(["git", "rev-parse", "--verify", "--quiet", name],
capture_output=True).returncode == 0
subprocess.run(["git", "switch", name] if exists else ["git", "switch", "-c", name])
print(f"[git] working on branch {name} (the orchestrator created/switched it for you).")
def propose_pr(message: str) -> None:
print("\n" + "=" * 80)
print("GATE PASSED. Proposing a PR — NOT merging. A human reviews the diff (Module 10).")
print("=" * 80)
if in_git_repo():
subprocess.run(["git", "add", "-A"])
subprocess.run(["git", "commit", "-m", message])
branch = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"],
capture_output=True, text=True).stdout.strip()
print("\nReview the change you're about to propose:")
print(" git show HEAD # or: git diff main..HEAD")
print("\nThen open the PR (nothing has left your machine yet):")
print(f" git push -u origin {branch}")
print(" # ...and open a pull request on your forge. CI + security gates run there.")
else:
print("\n(Not a Git repo — skipping commit. In your tasks-app this would commit to the branch.)")
print("\nThe agent stops here. It cannot merge. That is the whole safety model.")
def reject(reason: str, gate_output: str, *, simulated: bool = False) -> None:
print(gate_output)
print("\n" + "=" * 80)
print(f"GATE FAILED: {reason}")
print("No PR proposed.")
if simulated:
# The simulated agent's change is the UNTRACKED demo files, which `git restore` can't touch.
# Discard them directly so the failed attempt leaves a clean tree.
simulate_cleanup()
print("Discarded the simulated agent's demo files (agent_demo.py, test_agent_demo.py).")
print("(With a real agent editing tracked files, you'd discard with: git restore . # Module 2)")
else:
print("The branch is left as-is for you to inspect or discard:")
print(" git restore . # throw the agent's change away (Module 2)")
print("=" * 80)
# --------------------------------------------------------------------------------------------------
# The two patterns.
# --------------------------------------------------------------------------------------------------
def cmd_issue_to_pr(issue_path: Path, simulate: str | None) -> int:
print(f"[issue-to-pr] brief: {issue_path}")
ensure_branch(f"agent/{issue_path.stem}")
if simulate:
print(f"[issue-to-pr] simulating a '{simulate}' agent on the self-contained demo target.")
simulate_implement(simulate)
else:
run_real_agent(build_prompt("implement", issue_path=issue_path))
ok, gate_output = run_gate()
if ok:
print(gate_output)
propose_pr(f"Agent: implement {issue_path.stem}")
return 0
reject("the agent's change does not pass the gate", gate_output, simulated=bool(simulate))
return 1
def cmd_self_heal(simulate: str | None) -> int:
ensure_branch("agent/self-heal")
# Establish a failing state to heal. In a real pipeline this is "CI just went red on a push".
if simulate:
print(f"[self-heal] simulating a red build ('{simulate}') on the demo target.")
simulate_implement("bad")
else:
print("[self-heal] running the gate on the current working tree to find the failure...")
for attempt in range(1, RETRY_CAP + 1):
ok, gate_output = run_gate()
if ok:
print(gate_output)
print(f"\n[self-heal] gate is green after {attempt - 1} fix attempt(s).")
propose_pr("Agent: self-healing fix for failing CI")
return 0
print(gate_output)
if attempt > RETRY_CAP - 1:
break
print(f"\n[self-heal] gate red — attempt {attempt}/{RETRY_CAP - 1}: asking the agent for a fix.")
if simulate:
simulate_fix(simulate, attempt)
else:
run_real_agent(build_prompt("fix", failure=gate_output))
print("\n" + "=" * 80)
print(f"SELF-HEAL GAVE UP after {RETRY_CAP - 1} attempts. Handing off to a human — NOT looping forever.")
print("This cap is what stops an agent burning a runner bill chasing a flaky or impossible fix.")
print("=" * 80)
return 2
def main(argv: list[str]) -> int:
parser = argparse.ArgumentParser(description="Autonomous-but-supervised agent orchestrator (Module 25).")
sub = parser.add_subparsers(dest="command", required=True)
p_itp = sub.add_parser("issue-to-pr", help="implement an issue and propose a PR")
p_itp.add_argument("issue", type=Path, help="path to the issue markdown file")
p_itp.add_argument("--simulate", choices=["good", "bad"], help="run without a real agent")
p_sh = sub.add_parser("self-heal", help="fix a failing gate, bounded by a retry cap, and propose a PR")
p_sh.add_argument("--simulate", choices=["bad", "stuck"], help="run without a real agent")
args = parser.parse_args(argv)
if not args.simulate and "AGENT_CMD" not in os.environ:
sys.exit("No --simulate and no AGENT_CMD set. Set AGENT_CMD to your agent's headless command, "
"or pass --simulate to run the deterministic demo.")
if args.command == "issue-to-pr":
return cmd_issue_to_pr(args.issue, args.simulate)
return cmd_self_heal(args.simulate)
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))