Use python3 as the canonical command name course-wide (#104) (#105)

2026-06-23 20:25:05 -04:00
parent 7f439212ac
commit 95e5911957
102 changed files with 380 additions and 378 deletions
@@ -14,7 +14,7 @@ than pretending. NOTHING here pins a provider.
    EVAL_JUDGE_MODEL  # the model name to ask for

 Run it standalone to grade one sample:
-    python llm_judge.py "Add count command" "fix"
+    python3 llm_judge.py "Add count command" "fix"
 """

 import json
@@ -1,9 +1,9 @@
 """Run the eval set against one candidate and print a scorecard.

 Usage:
-    python run_eval.py candidates/current_model
-    python run_eval.py candidates/swapped_model
-    python run_eval.py candidates/current_model --threshold 0.9
+    python3 run_eval.py candidates/current_model
+    python3 run_eval.py candidates/swapped_model
+    python3 run_eval.py candidates/current_model --threshold 0.9

 A "candidate" is a directory containing a tasks.py that an agent produced. The
 runner imports that tasks.py, runs every case in eval_set.py against it, prints