Build out all 27 modules + capstone (#1)

Co-authored-by: claude <claude@jpaul.io> Co-committed-by: claude <claude@jpaul.io>
2026-06-22 12:19:01 -04:00
parent 4bd586bbd0
commit 2684095e2f
117 changed files with 15131 additions and 1 deletions
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+"""orient.py — build a factual orientation pack for a repo you didn't write.
+
+Run it from the root of a cloned repo. It prints a Markdown summary of *ground truth*
+about the codebase — size, languages, project signals, the biggest (often most central)
+files, the top-level layout, and likely build/test commands — that you can paste in as the
+opening context for an AI session before asking it to map or change anything.
+
+The point is NOT to replace the AI's own exploration. It's to anchor that exploration in
+facts the model can't hallucinate: real file names, real counts, real entry points. The AI
+then verifies and deepens this; you never let it map from vibes alone.
+
+No dependencies. Standard library only. Works on any OS with Python 3.10+ and git.
+
+    python orient.py                 # print the pack
+    python orient.py > ORIENT.md     # save it to hand to the AI (don't commit it)
+"""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+from collections import Counter
+from pathlib import Path
+
+# Files whose mere presence tells you how the project is built, tested, shipped, and configured.
+# (key file/dir -> what its presence means). Kept tool- and language-agnostic on purpose.
+SIGNALS: dict[str, str] = {
+    "pyproject.toml": "Python project (PEP 621 / poetry / hatch)",
+    "setup.py": "Python project (legacy setuptools)",
+    "requirements.txt": "Python dependencies (pip)",
+    "package.json": "Node/JS project",
+    "pnpm-lock.yaml": "Node project (pnpm)",
+    "yarn.lock": "Node project (yarn)",
+    "go.mod": "Go module",
+    "Cargo.toml": "Rust crate",
+    "pom.xml": "Java/Maven project",
+    "build.gradle": "Java/Kotlin/Gradle project",
+    "Gemfile": "Ruby project",
+    "composer.json": "PHP project",
+    "Makefile": "Make targets (often the real entry point for build/test)",
+    "Dockerfile": "Containerized (Module 16)",
+    "docker-compose.yml": "Multi-service local stack (Module 16)",
+    "compose.yaml": "Multi-service local stack (Module 16)",
+    ".github": "GitHub Actions / project meta",
+    ".gitea": "Gitea Actions",
+    ".gitlab-ci.yml": "GitLab CI",
+    "tox.ini": "Python test matrix",
+    "README.md": "Has a README — read it first",
+    "CONTRIBUTING.md": "Has contributor guidance — read before changing",
+    "ARCHITECTURE.md": "Has an architecture doc — rare and valuable",
+    "AGENTS.md": "Has a committed AI instructions file (Module 5)",
+    "CLAUDE.md": "Has a committed AI instructions file (Module 5)",
+}
+
+# Common test-runner hints keyed off a present signal file.
+TEST_HINTS: dict[str, str] = {
+    "pyproject.toml": "pytest    (or: python -m pytest)",
+    "tox.ini": "tox",
+    "package.json": "npm test    (check the \"scripts\" block for the real command)",
+    "go.mod": "go test ./...",
+    "Cargo.toml": "cargo test",
+    "Makefile": "make test    (if a 'test' target exists)",
+    "pom.xml": "mvn test",
+    "Gemfile": "bundle exec rspec    (or rake test)",
+}
+
+CODE_EXTS = {
+    ".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rs", ".java", ".kt", ".rb",
+    ".php", ".c", ".h", ".cc", ".cpp", ".hpp", ".cs", ".swift", ".scala", ".sh",
+}
+
+
+def git(*args: str) -> str:
+    """Run a git command, return stdout (stripped), or "" on failure."""
+    try:
+        out = subprocess.run(
+            ["git", *args],
+            capture_output=True, text=True, check=True,
+        )
+        return out.stdout.strip()
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return ""
+
+
+def tracked_files() -> list[str]:
+    listing = git("ls-files")
+    return [line for line in listing.splitlines() if line]
+
+
+def line_count(path: str) -> int:
+    try:
+        with open(path, "rb") as fh:
+            return sum(1 for _ in fh)
+    except OSError:
+        return 0
+
+
+def main() -> int:
+    if not Path(".git").exists() and not git("rev-parse", "--is-inside-work-tree"):
+        print("Not inside a git repository. cd into a cloned repo first.", file=sys.stderr)
+        return 1
+
+    files = tracked_files()
+    if not files:
+        print("No tracked files found (is this an empty or non-git repo?).", file=sys.stderr)
+        return 1
+
+    out: list[str] = []
+    w = out.append
+
+    # --- identity -----------------------------------------------------------
+    remote = git("remote", "get-url", "origin") or "(no origin remote)"
+    branch = git("rev-parse", "--abbrev-ref", "HEAD") or "(unknown)"
+    total_commits = git("rev-list", "--count", "HEAD") or "?"
+
+    w("# Repo orientation pack\n")
+    w(f"- **Origin:** {remote}")
+    w(f"- **Branch:** {branch}")
+    w(f"- **Total commits:** {total_commits}")
+    w(f"- **Tracked files:** {len(files)}")
+
+    # --- languages ----------------------------------------------------------
+    ext_counts: Counter[str] = Counter()
+    for f in files:
+        ext = Path(f).suffix.lower() or "(none)"
+        ext_counts[ext] += 1
+    w("\n## Languages / file types (top 15 by file count)\n")
+    for ext, n in ext_counts.most_common(15):
+        marker = " <- code" if ext in CODE_EXTS else ""
+        w(f"- `{ext}`: {n}{marker}")
+
+    # --- project signals ----------------------------------------------------
+    present = {name for name in SIGNALS if Path(name).exists()}
+    w("\n## Project signals (what's present at the root)\n")
+    if present:
+        for name in SIGNALS:
+            if name in present:
+                w(f"- `{name}` — {SIGNALS[name]}")
+    else:
+        w("- (none of the usual manifests/CI/docs at the root — look one level down)")
+
+    # --- likely test command ------------------------------------------------
+    hints = [TEST_HINTS[name] for name in TEST_HINTS if name in present]
+    w("\n## Likely build/test command (verify before trusting)\n")
+    if hints:
+        for h in hints:
+            w(f"- `{h}`")
+    else:
+        w("- No obvious runner detected. Search the README and CI config for the real command.")
+
+    # --- biggest files (often the spine) ------------------------------------
+    sized = sorted(
+        ((line_count(f), f) for f in files if Path(f).suffix.lower() in CODE_EXTS),
+        reverse=True,
+    )[:15]
+    w("\n## Largest code files (often where the core logic lives)\n")
+    if sized:
+        for n, f in sized:
+            w(f"- {n:>6} lines  `{f}`")
+    else:
+        w("- (no recognized source files)")
+
+    # --- top-level layout ---------------------------------------------------
+    top_dirs: Counter[str] = Counter()
+    for f in files:
+        head = f.split("/", 1)[0]
+        top_dirs[head] += 1
+    w("\n## Top-level layout (entries by tracked-file count)\n")
+    for name, n in sorted(top_dirs.items(), key=lambda kv: (-kv[1], kv[0])):
+        kind = "dir" if "/" in next(p for p in files if p.split("/", 1)[0] == name) else "file"
+        w(f"- `{name}`{'/' if kind == 'dir' else ''} — {n}")
+
+    # --- recent activity ----------------------------------------------------
+    recent = git("log", "--oneline", "-10")
+    w("\n## Last 10 commits (the project's recent direction)\n")
+    w("```")
+    w(recent or "(no history)")
+    w("```")
+
+    w("\n---")
+    w("> Generated by orient.py. These are *facts*, not conclusions. Hand them to the AI as the")
+    w("> opening context, then make it verify and map the areas you actually care about before")
+    w("> it changes anything.")
+
+    print("\n".join(out))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,32 @@
+# Skill: Map this repo
+
+A navigation playbook (a Module 21 skill) for orienting in a codebase you didn't write.
+Point your agentic tool at this file as a skill, or paste it in as instructions. The goal is a
+**read-only** mental model — no edits happen here.
+
+## When to use
+At the start of any session on an unfamiliar repo, before any change is discussed.
+
+## Rules
+- **Read only.** Do not edit, create, or delete files while mapping. No exceptions.
+- **Cite real paths.** Every claim about the code must point to a file and, ideally, a line range.
+  If you can't cite it, say "unverified" instead of guessing.
+- **Breadth before depth.** Establish the whole shape before diving into any one area.
+- **No conclusions from file names alone.** A file called `auth.py` may not be where auth lives.
+
+## Steps
+1. Read the orientation pack (from `orient.py`), the README, and any `CONTRIBUTING`,
+   `ARCHITECTURE`, or committed AI-instructions file. Treat these as claims to verify, not truth.
+2. Identify the **entry points**: how does this thing start? (CLI `main`, web server, library
+   exports.) Name the exact file(s).
+3. Trace **one representative request/command end to end** — from entry point to where it does its
+   real work and back. List the files it passes through, in order.
+4. Produce an **architecture summary** (max ~1 page):
+   - One paragraph: what this project does and how it's structured.
+   - A "where things live" table: concern -> directory/file.
+   - The build/test/run commands, confirmed against the README or CI config.
+   - 3-5 things that surprised you or look risky to touch.
+5. List **open questions** you could not resolve from the code. Do not paper over them.
+
+## Output
+A single Markdown summary. End with: "Verified against: <list of files actually read>."
@@ -0,0 +1,39 @@
+# Skill: Safe scoped change
+
+A safe-change playbook (a Module 21 skill) for modifying a codebase you don't fully understand.
+Use it only **after** `map-this-repo` has produced an architecture summary. The whole bet of this
+skill is: small, scoped, tested, reviewable — never a sweeping rewrite.
+
+## When to use
+When making a concrete change to an unfamiliar repo.
+
+## Rules
+- **One change, one branch.** Create a branch first (Module 6). Never work on the default branch.
+- **Smallest diff that solves it.** Touch the fewest files possible. If the change wants to sprawl,
+  stop and re-scope — sprawl in code you don't understand is how you break things invisibly.
+- **No drive-by edits.** Do not reformat, rename, or "clean up" unrelated code. Those bury the real
+  change and make the diff unreviewable (Module 10).
+- **Match local conventions.** Mirror the surrounding code's style, naming, and patterns — not your
+  own defaults.
+- **Tests are the contract.** A change isn't done until it's covered (Module 13) and the existing
+  suite still passes.
+
+## Steps
+1. **State the change in one sentence** and the acceptance criterion ("done when X").
+2. **Find the blast radius first:** search for every caller/usage of what you're about to touch.
+   List them. If you can't enumerate them, you're not ready to change it.
+3. **Run the existing tests before touching anything** — establish a green baseline. If they were
+   already red, note it; don't let a pre-existing failure get blamed on you.
+4. **Make the minimal edit.** Keep it to the files identified in step 2.
+5. **Add or extend a test** that fails without your change and passes with it.
+6. **Run the full suite.** All green, including the baseline tests.
+7. **Self-review the diff** as if reviewing someone else's PR (Module 10): is every changed line
+   necessary and explained? Revert anything that isn't.
+8. **Write the PR description:** what changed, why, blast radius, how it was tested, what you did
+   NOT touch and why.
+
+## Stop conditions (escalate to a human instead of pushing on)
+- The change requires touching more than ~3 files or a "core" file from the architecture summary.
+- You can't enumerate the callers of what you're changing.
+- A test you don't understand starts failing.
+- The fix needs a design decision the existing code doesn't settle.