"""Module 25 lab: an autonomous-but-supervised agent orchestrator. This is the smallest honest version of the two patterns in the module: * issue-to-pr : read an issue, let an agent implement it, run the gate, produce a PR PROPOSAL. * self-heal : run the gate; on failure, feed the failure back to the agent for a fix, bounded by a retry cap; produce a PR PROPOSAL. The load-bearing idea is in one place and you should be able to point at it: the agent NEVER merges. Every path ends at `propose_pr()`: a branch, a commit, and the command *you* would run to open the PR. The CI/review/security gates (Modules 14/15/10) and recovery (Module 12) are what supervise it, not a human watching it type. Run it two ways: 1. Simulated (no agent needed, fully deterministic); see the machinery and the gates: python3 agent_runner.py issue-to-pr issue-delete-command.md --simulate good python3 agent_runner.py issue-to-pr issue-delete-command.md --simulate bad python3 agent_runner.py self-heal --simulate bad python3 agent_runner.py self-heal --simulate stuck Simulation works on a SELF-CONTAINED demo target (agent_demo.py + test_agent_demo.py) so it is deterministic and never corrupts your real tasks-app files. The gate it runs (ruff + pytest) is the real one, the same checks Module 14's CI runs. 2. Real agent: drives your own agentic tool against the actual issue. Point AGENT_CMD at your tool's non-interactive / one-shot mode, then drop --simulate: export AGENT_CMD='your-agent-cli --print --prompt-file {prompt_file}' python3 agent_runner.py issue-to-pr issue-delete-command.md Language: Python 3.10+. Standard library only. """ from __future__ import annotations import argparse import os import shlex import subprocess import sys import tempfile from pathlib import Path RETRY_CAP = 3 # self-healing stops after this many fix attempts and hands off to a human. # Demo target the simulator works on, so simulation never touches your real cli.py / tasks.py. DEMO_SRC = Path("agent_demo.py") DEMO_TEST = Path("test_agent_demo.py") # Vendor-neutral: where your committed AI config (Module 5) might live. Override with AGENT_CONFIG. CONFIG_CANDIDATES = ["AGENTS.md", ".agent/instructions.md", "agent-config.md"] # -------------------------------------------------------------------------------------------------- # The gate: the same lint + test checks Module 14 runs in CI, run locally so they're reproducible. # This is the structural supervision. It does not care whether a human or an agent wrote the change. # -------------------------------------------------------------------------------------------------- def run_gate() -> tuple[bool, str]: """Run ruff then pytest in the current directory. Return (passed, combined_output).""" out: list[str] = [] ok = True for label, cmd in (("ruff (lint)", ["ruff", "check", "."]), ("pytest (tests)", ["pytest", "-q"])): out.append(f"\n=== gate: {label} -> {' '.join(cmd)} ===") try: proc = subprocess.run(cmd, capture_output=True, text=True) except FileNotFoundError: out.append(f" ! {cmd[0]} not installed; run `pip install pytest ruff`. Treating as a gate FAIL.") ok = False continue out.append(proc.stdout.rstrip()) if proc.stderr.strip(): out.append(proc.stderr.rstrip()) if proc.returncode != 0: ok = False out.append(f" -> FAILED ({label})") return ok, "\n".join(line for line in out if line is not None) # -------------------------------------------------------------------------------------------------- # The agent: real (your tool) or simulated (deterministic, for the lab). # -------------------------------------------------------------------------------------------------- def find_config() -> Path | None: env = os.environ.get("AGENT_CONFIG") if env and Path(env).exists(): return Path(env) for name in CONFIG_CANDIDATES: if Path(name).exists(): return Path(name) return None def build_prompt(task: str, *, issue_path: Path | None = None, failure: str | None = None) -> str: """Assemble the agent's brief: standing config (Module 5) + the specific task (issue or failure).""" parts = ["You are working in a Git repository on the current branch. Make the change directly in", "the files. Do not commit, push, or merge; just edit. Follow the project's conventions."] config = find_config() if config: parts += ["", f"# Project conventions (from {config})", config.read_text()] if issue_path: parts += ["", "# Task (issue to implement)", issue_path.read_text()] if failure: parts += ["", "# A CI check just failed. Fix the CODE so it passes; do not weaken or delete", "# the test to make it pass. Here is the failing output:", "```", failure, "```"] return "\n".join(parts) def run_real_agent(prompt: str) -> None: """Drive the learner's agentic tool via AGENT_CMD. Template may contain {prompt_file}; otherwise the prompt is piped to stdin. Kept vendor-neutral on purpose.""" template = os.environ["AGENT_CMD"] with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False) as fh: fh.write(prompt) prompt_file = fh.name try: if "{prompt_file}" in template: cmd = shlex.split(template.replace("{prompt_file}", prompt_file)) proc = subprocess.run(cmd) else: proc = subprocess.run(shlex.split(template), input=prompt, text=True) if proc.returncode != 0: sys.exit(f"agent command exited non-zero ({proc.returncode}); aborting.") finally: os.unlink(prompt_file) # Simulated agent: writes a self-contained demo module so the gate has something real to judge. def simulate_implement(variant: str) -> None: DEMO_TEST.write_text( "from agent_demo import discount\n\n\n" "def test_discount_takes_a_percentage():\n" " # 10% off 200 is 180. A flat subtraction (200 - 10 = 190) is the plausible-but-wrong bug.\n" " assert discount(200, 10) == 180\n" ) if variant == "good": DEMO_SRC.write_text("def discount(price, pct):\n return price - price * pct / 100\n") else: # 'bad': plausible but wrong, treats the percent as a flat amount. DEMO_SRC.write_text("def discount(price, pct):\n return price - pct\n") def simulate_fix(variant: str, attempt: int) -> None: if variant == "stuck": # The "agent" keeps producing plausible, still-wrong fixes, so the loop must give up, not run forever. DEMO_SRC.write_text(f"def discount(price, pct):\n return price - pct - {attempt}\n") else: # 'bad': converges on the second attempt with the correct formula. DEMO_SRC.write_text("def discount(price, pct):\n return price - price * pct / 100\n") def simulate_cleanup() -> None: """Discard the simulator's demo artifacts. These are UNTRACKED new files, so `git restore` (which only touches tracked files) can't remove them, so the simulator cleans up after itself.""" for path in (DEMO_SRC, DEMO_TEST): path.unlink(missing_ok=True) # -------------------------------------------------------------------------------------------------- # The endpoint every path shares: a PR PROPOSAL. Never a merge. # -------------------------------------------------------------------------------------------------- def in_git_repo() -> bool: return subprocess.run(["git", "rev-parse", "--is-inside-work-tree"], capture_output=True).returncode == 0 def ensure_branch(name: str) -> None: """Create and switch to the agent's working branch. The orchestrator owns this git step the same way agent-job.yml's runner does (`git switch -c`): you direct the automation and then verify the branch (`git branch`), instead of typing `git checkout` by hand. No-op outside a Git repo.""" if not in_git_repo(): return exists = subprocess.run(["git", "rev-parse", "--verify", "--quiet", name], capture_output=True).returncode == 0 subprocess.run(["git", "switch", name] if exists else ["git", "switch", "-c", name]) print(f"[git] working on branch {name} (the orchestrator created/switched it for you).") def propose_pr(message: str) -> None: print("\n" + "=" * 80) print("GATE PASSED. Proposing a PR, NOT merging. A human reviews the diff (Module 10).") print("=" * 80) if in_git_repo(): subprocess.run(["git", "add", "-A"]) subprocess.run(["git", "commit", "-m", message]) branch = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True).stdout.strip() print("\nReview the change you're about to propose:") print(" git show HEAD # or: git diff main..HEAD") print("\nThen open the PR (nothing has left your machine yet):") print(f" git push -u origin {branch}") print(" # ...and open a pull request on your forge. CI + security gates run there.") else: print("\n(Not a Git repo, so skipping commit. In your tasks-app this would commit to the branch.)") print("\nThe agent stops here. It cannot merge. That is the whole safety model.") def reject(reason: str, gate_output: str, *, simulated: bool = False) -> None: print(gate_output) print("\n" + "=" * 80) print(f"GATE FAILED: {reason}") print("No PR proposed.") if simulated: # The simulated agent's change is the UNTRACKED demo files, which `git restore` can't touch. # Discard them directly so the failed attempt leaves a clean tree. simulate_cleanup() print("Discarded the simulated agent's demo files (agent_demo.py, test_agent_demo.py).") print("(With a real agent editing tracked files, you'd discard with: git restore . # Module 2)") else: print("The branch is left as-is for you to inspect or discard:") print(" git restore . # throw the agent's change away (Module 2)") print("=" * 80) # -------------------------------------------------------------------------------------------------- # The two patterns. # -------------------------------------------------------------------------------------------------- def cmd_issue_to_pr(issue_path: Path, simulate: str | None) -> int: print(f"[issue-to-pr] brief: {issue_path}") ensure_branch(f"agent/{issue_path.stem}") if simulate: print(f"[issue-to-pr] simulating a '{simulate}' agent on the self-contained demo target.") simulate_implement(simulate) else: run_real_agent(build_prompt("implement", issue_path=issue_path)) ok, gate_output = run_gate() if ok: print(gate_output) propose_pr(f"Agent: implement {issue_path.stem}") return 0 reject("the agent's change does not pass the gate", gate_output, simulated=bool(simulate)) return 1 def cmd_self_heal(simulate: str | None) -> int: ensure_branch("agent/self-heal") # Establish a failing state to heal. In a real pipeline this is "CI just went red on a push". if simulate: print(f"[self-heal] simulating a red build ('{simulate}') on the demo target.") simulate_implement("bad") else: print("[self-heal] running the gate on the current working tree to find the failure...") for attempt in range(1, RETRY_CAP + 1): ok, gate_output = run_gate() if ok: print(gate_output) print(f"\n[self-heal] gate is green after {attempt - 1} fix attempt(s).") propose_pr("Agent: self-healing fix for failing CI") return 0 print(gate_output) if attempt > RETRY_CAP - 1: break print(f"\n[self-heal] gate red, attempt {attempt}/{RETRY_CAP - 1}: asking the agent for a fix.") if simulate: simulate_fix(simulate, attempt) else: run_real_agent(build_prompt("fix", failure=gate_output)) print("\n" + "=" * 80) print(f"SELF-HEAL GAVE UP after {RETRY_CAP - 1} attempts. Handing off to a human, NOT looping forever.") print("This cap is what stops an agent burning a runner bill chasing a flaky or impossible fix.") print("=" * 80) return 2 def main(argv: list[str]) -> int: parser = argparse.ArgumentParser(description="Autonomous-but-supervised agent orchestrator (Module 25).") sub = parser.add_subparsers(dest="command", required=True) p_itp = sub.add_parser("issue-to-pr", help="implement an issue and propose a PR") p_itp.add_argument("issue", type=Path, help="path to the issue markdown file") p_itp.add_argument("--simulate", choices=["good", "bad"], help="run without a real agent") p_sh = sub.add_parser("self-heal", help="fix a failing gate, bounded by a retry cap, and propose a PR") p_sh.add_argument("--simulate", choices=["bad", "stuck"], help="run without a real agent") args = parser.parse_args(argv) if not args.simulate and "AGENT_CMD" not in os.environ: sys.exit("No --simulate and no AGENT_CMD set. Set AGENT_CMD to your agent's headless command, " "or pass --simulate to run the deterministic demo.") if args.command == "issue-to-pr": return cmd_issue_to_pr(args.issue, args.simulate) return cmd_self_heal(args.simulate) if __name__ == "__main__": raise SystemExit(main(sys.argv[1:]))