CI workflow to run evals and update the leaderboard (#43 )

Lets the leaderboard show real numbers without a local key: the new "Update Skill Leaderboard" workflow (workflow_dispatch) runs the eval harness with the ANTHROPIC_API_KEY secret, commits evals/results.json, and the Pages deploy re-renders the public leaderboard with real data. - .github/workflows/eval-leaderboard.yml: manual trigger, contents: write, runs run-evals.mjs + build-leaderboard.mjs, commits results.json. - deploy-playground.yml: also trigger on evals/results.json (and the build scripts) so the committed results refresh the live page. - evals/README + CHANGELOG document the CI route. Claude-Session: https://claude.ai/code/session_016JWn5jRD5tcEFKrubjQ6Px Co-authored-by: Claude <noreply@anthropic.com>
Dogfood the Action + bump to v20.0.0 (Agentic Tooling) (#42 )
2026-06-18 12:58:45 +01:00 · 2026-06-18 12:52:37 +01:00 · 2026-06-18 08:37:40 +01:00
21 changed files with 825 additions and 16 deletions
@@ -10,6 +10,10 @@ on:
    paths:
      - 'skills/**'
      - 'web/**'
+      - 'evals/results.json'
+      - 'skill-tiers.json'
+      - 'scripts/build-docs.mjs'
+      - 'scripts/build-leaderboard.mjs'
      - '.github/workflows/deploy-playground.yml'
  workflow_dispatch:

@@ -41,6 +45,9 @@ jobs:
      - name: Build the static skill catalog (web/catalog.html)
        run: node scripts/build-docs.mjs

+      - name: Build the skill leaderboard (web/leaderboard.html)
+        run: node scripts/build-leaderboard.mjs
+
      - name: Configure Pages
        uses: actions/configure-pages@v5

@@ -0,0 +1,67 @@
+name: Update Skill Leaderboard
+
+# Runs the eval harness with your ANTHROPIC_API_KEY secret, commits the real
+# results (evals/results.json), and lets the Pages deploy re-render the public
+# leaderboard with real numbers. Manual trigger so it never burns tokens by
+# surprise. (Uncomment the schedule to re-run, e.g. monthly, after model upgrades.)
+
+on:
+  workflow_dispatch:
+    inputs:
+      models:
+        description: 'Comma-separated model ids to score'
+        required: false
+        default: 'claude-sonnet-4-6,claude-haiku-4-5-20251001'
+      judge:
+        description: 'Judge model id'
+        required: false
+        default: 'claude-opus-4-8'
+  # schedule:
+  #   - cron: '0 6 1 * *'   # 06:00 on the 1st of each month
+
+permissions:
+  contents: write
+
+concurrency:
+  group: eval-leaderboard
+  cancel-in-progress: false
+
+jobs:
+  evaluate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Run evals
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        run: |
+          if [ -z "$ANTHROPIC_API_KEY" ]; then
+            echo "::error::ANTHROPIC_API_KEY secret is not set. Add it in Settings → Secrets and variables → Actions."
+            exit 1
+          fi
+          node evals/run-evals.mjs \
+            --models "${{ github.event.inputs.models || 'claude-sonnet-4-6,claude-haiku-4-5-20251001' }}" \
+            --judge "${{ github.event.inputs.judge || 'claude-opus-4-8' }}"
+
+      - name: Build the leaderboard page (sanity check)
+        run: node scripts/build-leaderboard.mjs
+
+      - name: Commit results
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add evals/results.json
+          if git diff --cached --quiet; then
+            echo "No change in results."
+          else
+            git commit -m "chore(evals): refresh leaderboard results"
+            git push
+            echo "Committed evals/results.json — the Pages deploy will render real numbers."
+          fi
@@ -0,0 +1,71 @@
+name: Auto PR description
+
+# Dogfoods our own Action: when a PR is opened with an empty body, run the
+# pr-description-writer skill on the diff and fill it in. A living demo of
+# `uses: ./action`. Requires the ANTHROPIC_API_KEY repo secret; skips quietly
+# without it (and on forks, which can't read secrets).
+
+on:
+  pull_request:
+    types: [opened]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  describe:
+    if: github.event.pull_request.head.repo.full_name == github.repository
+    runs-on: ubuntu-latest
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+    steps:
+      - name: Check for API key and an empty PR body
+        id: gate
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const hasKey = !!process.env.ANTHROPIC_API_KEY;
+            const body = (context.payload.pull_request.body || '').trim();
+            if (!hasKey) core.info('ANTHROPIC_API_KEY not set — skipping.');
+            if (body) core.info('PR already has a description — skipping.');
+            core.setOutput('go', String(hasKey && !body));
+
+      - name: Checkout
+        if: steps.gate.outputs.go == 'true'
+        uses: actions/checkout@v4
+        with: { fetch-depth: 0 }
+
+      - name: Collect the diff
+        if: steps.gate.outputs.go == 'true'
+        id: diff
+        run: |
+          {
+            echo "text<<DIFF_EOF"
+            echo "Title: ${{ github.event.pull_request.title }}"
+            echo "Commits:"; git log --oneline origin/${{ github.base_ref }}..HEAD | head -30
+            echo; echo "Changed files:"; git diff --stat origin/${{ github.base_ref }}...HEAD | tail -40
+            echo "DIFF_EOF"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Write the PR description with the skill
+        if: steps.gate.outputs.go == 'true'
+        id: skill
+        uses: ./action
+        with:
+          skill: pr-description-writer
+          input: ${{ steps.diff.outputs.text }}
+          api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+
+      - name: Update the PR body
+        if: steps.gate.outputs.go == 'true'
+        uses: actions/github-script@v7
+        env:
+          BODY: ${{ steps.skill.outputs.result }}
+        with:
+          script: |
+            await github.rest.pulls.update({
+              owner: context.repo.owner, repo: context.repo.repo,
+              pull_number: context.issue.number,
+              body: process.env.BODY + '\n\n<sub>✍️ Drafted by the pm-claude-skills GitHub Action (pr-description-writer).</sub>',
+            });
@@ -13,3 +13,4 @@ venv/

 # Generated docs catalog (built in CI for Pages)
 web/catalog.html
+web/leaderboard.html
@@ -9,7 +9,31 @@ each new wave of skills bumps the **major** version, extensions and fixes bump

 ## [Unreleased]

-_Nothing yet._
+### Added
+- **One-click leaderboard updates in CI** — `.github/workflows/eval-leaderboard.yml`
+  ("Update Skill Leaderboard") runs the evals with the `ANTHROPIC_API_KEY` secret, commits
+  `evals/results.json`, and the Pages deploy re-renders the public leaderboard with real
+  numbers — no local key needed. The deploy workflow now also triggers on
+  `evals/results.json`.
+
+## [20.0.0] — Agentic Tooling — 2026-06-18
+
+### Added
+- **Dogfooded Action** — `.github/workflows/pr-description.yml` uses our own GitHub Action
+  (`uses: ./action`) to auto-write this repo's PR descriptions when a PR opens with an
+  empty body (skips quietly without the `ANTHROPIC_API_KEY` secret and on forks).
+- **GitHub Action** ([`action/`](action/)) — run any skill in CI: `uses:
+  mohitagw15856/pm-claude-skills/action@main` to auto-write PR descriptions,
+  changelogs, release notes, or code-review checklists. Composite action +
+  dependency-free runner.
+- **`generate` command** — `npx pm-claude-skills generate --from <url|file>` turns a
+  team's documentation into a `SKILL.md` that follows the authoring standard
+  (`bin/generate.mjs`, needs `ANTHROPIC_API_KEY`).
+- **Skill evals + Leaderboard** — `evals/run-evals.mjs` scores skill output across models
+  with an LLM judge (structure / completeness / usefulness / grounding);
+  `scripts/build-leaderboard.mjs` renders a public `web/leaderboard.html` (built in the
+  Pages deploy, linked from the README, catalog, and playground).
+- Shared, dependency-free Anthropic client (`bin/lib/anthropic.mjs`) used by all three.

 ## [19.0.0] — Security Auditor, Personas & Catalog — 2026-06-18

@@ -199,7 +223,8 @@ Earlier releases (v1.0.0 – v5.0.0) predate this changelog. See the
 [article series](README.md#-the-article-series) for the full history of how the
 library grew from the first PM toolkit to 100+ skills.

-[Unreleased]: https://github.com/mohitagw15856/pm-claude-skills/compare/v19.0.0...HEAD
+[Unreleased]: https://github.com/mohitagw15856/pm-claude-skills/compare/v20.0.0...HEAD
+[20.0.0]: https://github.com/mohitagw15856/pm-claude-skills/compare/v19.0.0...v20.0.0
 [19.0.0]: https://github.com/mohitagw15856/pm-claude-skills/compare/v18.0.0...v19.0.0
 [18.0.0]: https://github.com/mohitagw15856/pm-claude-skills/compare/v17.0.0...v18.0.0
 [17.0.0]: https://github.com/mohitagw15856/pm-claude-skills/compare/v16.0.0...v17.0.0
@@ -12,7 +12,7 @@
 [![Platforms](https://img.shields.io/badge/works%20with-Claude%20%7C%20ChatGPT%20%7C%20Gemini%20%7C%20Cursor%20%7C%20Codex%20%7C%20Hermes-8A2BE2)](#-works-with--cross-tool-compatibility)
 [![SkillCheck](https://img.shields.io/github/actions/workflow/status/mohitagw15856/pm-claude-skills/skillcheck.yml?branch=main&label=SkillCheck)](.github/workflows/skillcheck.yml)
 [![Security Audit](https://img.shields.io/github/actions/workflow/status/mohitagw15856/pm-claude-skills/skill-audit.yml?branch=main&label=security%20audit)](.github/workflows/skill-audit.yml)
-[![Version](https://img.shields.io/badge/version-19.0.0-brightgreen)](https://github.com/mohitagw15856/pm-claude-skills/releases)
+[![Version](https://img.shields.io/badge/version-20.0.0-brightgreen)](https://github.com/mohitagw15856/pm-claude-skills/releases)
 [![Install](https://img.shields.io/badge/Install%20in%20Claude%20Code-2%20minutes-orange)](https://github.com/mohitagw15856/pm-claude-skills#-quick-install-2-minutes)
 [![License](https://img.shields.io/badge/license-MIT-lightgrey)](LICENSE)
 [![Sponsor](https://img.shields.io/badge/sponsor-❤️-ff69b4)](https://github.com/sponsors/mohitagw15856)
@@ -22,7 +22,7 @@

 A community-built library of professional skills for every field — product management, engineering, customer success, marketing, social media, writers, design, legal, finance, HR, sales, operations, research, and more. Each skill is a structured `SKILL.md` file that teaches an AI assistant how to produce professional-grade outputs for your workflows. Skills run natively in **Claude Code** and **Hermes Agent** (same open `SKILL.md` standard), and ship as ready-to-paste exports for **ChatGPT** and **Gemini** — see [Works With](#-works-with--cross-tool-compatibility).

-**🆕 Latest release (v19.0.0 — Security Auditor, Personas & Catalog):** a CI **Skill Security Auditor** that flags prompt-injection / unsafe code in any skill, **4 personas** (output-styles), an [orchestration guide](ORCHESTRATION.md), a server-rendered **skill catalog**, and a public [roadmap](ROADMAP.md). See the [changelog](#-changelog).
+**🆕 Latest release (v20.0.0 — Agentic Tooling):** run any skill in CI with the new **[GitHub Action](action/)**, turn your docs into a skill with **`npx pm-claude-skills generate`**, and compare skills across models on the **[Skill Leaderboard](https://mohitagw15856.github.io/pm-claude-skills/leaderboard.html)** (LLM-judge evals). See the [changelog](#-changelog).

 <!-- DEMO: replace web/docs-assets/playground.png below with web/docs-assets/playground-demo.gif
     once recorded (see web/docs-assets/README.md for how). The link goes to the live app. -->
@@ -226,6 +226,30 @@ Then ask: *"search the skills for customer churn, then apply the best one to my

 ---

+## ⚙️ AI-Powered Tooling
+
+Three ways to put the library to work beyond installing files:
+
+**🤖 Run a skill in your CI — [GitHub Action](action/).** Auto-write PR descriptions, changelogs, release notes, or run a code-review checklist on every PR:
+
+```yaml
+- uses: mohitagw15856/pm-claude-skills/action@main
+  with:
+    skill: pr-description-writer
+    input: ${{ steps.diff.outputs.text }}
+    api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+```
+
+**🏗️ Turn your docs into a skill — `generate`.** Point it at a URL or file and it writes a `SKILL.md` that follows the authoring standard:
+
+```bash
+ANTHROPIC_API_KEY=sk-ant-… npx pm-claude-skills generate --from ./team-process.md
+```
+
+**🏆 Skill Leaderboard — [evals](evals/).** An LLM-as-judge harness scores each skill across Claude models on structure, completeness, usefulness, and grounding. **[View the leaderboard →](https://mohitagw15856.github.io/pm-claude-skills/leaderboard.html)**
+
+---
+
 ## 🌐 Skill Playground — Try Any Skill in Your Browser

 **▶ Live: [mohitagw15856.github.io/pm-claude-skills](https://mohitagw15856.github.io/pm-claude-skills/)** · 📚 [Browse the full skill catalog](https://mohitagw15856.github.io/pm-claude-skills/catalog.html)
@@ -379,15 +403,21 @@ More templates will follow. If you want to contribute one, see the [template con

 The highlights are below. For the structured, [Keep a Changelog](https://keepachangelog.com/)-format history, see **[CHANGELOG.md](CHANGELOG.md)**.

-### 🆕 What's New in v19.0.0 — Security Auditor, Personas & Catalog
+### 🆕 What's New in v20.0.0 — Agentic Tooling

-Trust, more content types, and discoverability:
+The library starts *doing* the work, not just describing it:

- **Skill Security Auditor** — `scripts/skill-audit.mjs` scans every skill (and its scripts) for prompt injection, data exfiltration, unsafe code, secrets, and hidden text; **HIGH findings fail CI**. New `security audit` badge + a `skill-security-auditor` skill.
- **Personas** — 4 Claude Code output-styles (Startup CTO, Growth Marketer, Solo Founder, Product Leader) in [`output-styles/`](output-styles/).
- **Orchestration guide** ([`ORCHESTRATION.md`](ORCHESTRATION.md)) — Skill Chain, Multi-Agent Handoff, Domain Deep-Dive, Solo Sprint.
- **Static skill catalog** — a server-rendered, SEO-indexable catalog of every skill (linked from the README + Playground).
- **Public roadmap** ([`ROADMAP.md`](ROADMAP.md)) with now/next/later + good first issues.
+- **GitHub Action** ([`action/`](action/)) — run any skill in a repo's CI (auto PR descriptions, changelogs, release notes, reviews). `uses: mohitagw15856/pm-claude-skills/action@main`. We dogfood it to write this repo's own PR descriptions.
+- **`generate` command** — `npx pm-claude-skills generate --from <url|file>` turns your docs into a standard-compliant `SKILL.md`.
+- **Skill evals + Leaderboard** — LLM-as-judge scoring of skills across models, rendered as a public [leaderboard](https://mohitagw15856.github.io/pm-claude-skills/leaderboard.html).
+
+<details>
+<summary><strong>v19.0.0 — Security Auditor, Personas & Catalog</strong> (click to expand)</summary>
+
+- **Skill Security Auditor** — scans every skill (and its scripts) for prompt injection, exfiltration, unsafe code, secrets, hidden text; HIGH fails CI. Plus a `skill-security-auditor` skill.
+- **4 personas** (output-styles), an [orchestration guide](ORCHESTRATION.md), a server-rendered **skill catalog**, and a public [roadmap](ROADMAP.md).
+
+</details>

 <details>
 <summary><strong>v18.0.0 — Windsurf, Aider & an MCP Server</strong> (click to expand)</summary>
@@ -10,9 +10,9 @@ That said, security matters here in two specific ways: **skill file safety** and

 | Version | Supported |
 |---|---|
-| v19.x (latest) | ✅ Active |
-| v17.x – v18.x | ✅ Security fixes only |
-| < v17.0.0 | ❌ No longer supported |
+| v20.x (latest) | ✅ Active |
+| v18.x – v19.x | ✅ Security fixes only |
+| < v18.0.0 | ❌ No longer supported |

 Because skills are plain markdown, "support" means we review and correct any reported
 safety issue (prompt injection, unsafe instructions) in the listed versions.
@@ -0,0 +1,65 @@
+# PM Skills — GitHub Action
+
+Run any skill from this library inside **your** repo's CI. Turn the library's frameworks
+into automation: auto-write PR descriptions, generate release notes and changelogs, or run
+a code-review checklist — on every push or PR.
+
+```yaml
+- uses: mohitagw15856/pm-claude-skills/action@main
+  with:
+    skill: pr-description-writer
+    input: ${{ steps.diff.outputs.text }}
+    api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+```
+
+## Inputs
+
+| Input | Required | Description |
+|---|---|---|
+| `skill` | ✅ | Skill name, e.g. `pr-description-writer`, `changelog-generator`, `code-review-checklist`. |
+| `input` | — | The text/context to run the skill on. |
+| `input_file` | — | Read input from a file instead of `input`. |
+| `api_key` | ✅ | Anthropic API key (store as a repo secret). |
+| `model` | — | Model id (default `claude-sonnet-4-6`). |
+| `output_file` | — | Also write the result to this file. |
+
+**Output:** `result` — the skill's output (use `output_file` for long, multi-line results).
+
+## Example — auto-write a PR description
+
+```yaml
+name: PR description
+on: { pull_request: { types: [opened] } }
+permissions: { contents: read, pull-requests: write }
+jobs:
+  describe:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with: { fetch-depth: 0 }
+      - id: diff
+        run: |
+          echo "text<<EOF" >> "$GITHUB_OUTPUT"
+          git diff origin/${{ github.base_ref }}...HEAD --stat >> "$GITHUB_OUTPUT"
+          echo "EOF" >> "$GITHUB_OUTPUT"
+      - id: skill
+        uses: mohitagw15856/pm-claude-skills/action@main
+        with:
+          skill: pr-description-writer
+          input: ${{ steps.diff.outputs.text }}
+          api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            github.rest.pulls.update({ owner: context.repo.owner, repo: context.repo.repo,
+              pull_number: context.issue.number, body: process.env.BODY })
+        env: { BODY: ${{ steps.skill.outputs.result }} }
+```
+
+## Other ideas
+
+- `skill: changelog-generator` from `git log` → write `CHANGELOG.md`.
+- `skill: release-notes` on tag push → set the GitHub Release body.
+- `skill: code-review-checklist` → post a review checklist as a PR comment.
+
+Pin to a release tag (e.g. `@v19`) for stability once you've tried `@main`.
@@ -0,0 +1,51 @@
+name: 'PM Skills — Run a Skill'
+description: 'Run any pm-claude-skills SKILL.md in CI — auto PR descriptions, changelogs, release notes, code-review checklists, and more.'
+author: 'Mohit Aggarwal'
+branding:
+  icon: 'cpu'
+  color: 'purple'
+
+inputs:
+  skill:
+    description: 'Skill name to run (e.g. pr-description-writer, changelog-generator, code-review-checklist).'
+    required: true
+  input:
+    description: 'The input/context text the skill should work on.'
+    required: false
+  input_file:
+    description: 'Read the input from this file instead of the `input` string.'
+    required: false
+  api_key:
+    description: 'Anthropic API key (store it as a secret).'
+    required: true
+  model:
+    description: 'Claude model id.'
+    required: false
+    default: 'claude-sonnet-4-6'
+  output_file:
+    description: 'If set, also write the result to this file.'
+    required: false
+  max_tokens:
+    description: 'Max output tokens.'
+    required: false
+    default: '4096'
+
+outputs:
+  result:
+    description: 'The skill output (also use output_file for multi-line results).'
+    value: ${{ steps.run.outputs.result }}
+
+runs:
+  using: composite
+  steps:
+    - id: run
+      shell: bash
+      run: node "$GITHUB_ACTION_PATH/run.mjs"
+      env:
+        INPUT_SKILL: ${{ inputs.skill }}
+        INPUT_INPUT: ${{ inputs.input }}
+        INPUT_INPUT_FILE: ${{ inputs.input_file }}
+        INPUT_API_KEY: ${{ inputs.api_key }}
+        INPUT_MODEL: ${{ inputs.model }}
+        INPUT_OUTPUT_FILE: ${{ inputs.output_file }}
+        INPUT_MAX_TOKENS: ${{ inputs.max_tokens }}
@@ -0,0 +1,58 @@
+#!/usr/bin/env node
+// Runner for the pm-skills GitHub Action. Loads a bundled SKILL.md, runs it on
+// the provided input via the Anthropic API, and exposes the result as a step
+// output (and optionally a file). Inputs arrive as INPUT_* env vars.
+import { readFileSync, existsSync, writeFileSync, appendFileSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath, pathToFileURL } from 'node:url';
+import { complete, parseSkill } from '../bin/lib/anthropic.mjs';
+
+const ACTION_DIR = dirname(fileURLToPath(import.meta.url));
+const REPO_ROOT = join(ACTION_DIR, '..');
+
+const inp = (name, def = '') => (process.env[`INPUT_${name.toUpperCase()}`] ?? def).trim();
+
+// Pure: assemble the system prompt + user message for a skill run (testable offline).
+export function buildRequest(skillBody, userInput) {
+  const system = skillBody +
+    '\n\n---\nExecute this skill now on the input below and produce the complete output. ' +
+    'Do not ask follow-up questions — work with what is given and note any reasonable assumptions. ' +
+    'Output only the finished artifact (no preamble).';
+  return { system, messages: [{ role: 'user', content: userInput }] };
+}
+
+async function main() {
+  const skill = inp('skill');
+  if (!skill) throw new Error('Input `skill` is required.');
+  const apiKey = inp('api_key') || process.env.ANTHROPIC_API_KEY || '';
+  const model = inp('model', 'claude-sonnet-4-6');
+  const maxTokens = parseInt(inp('max_tokens', '4096'), 10) || 4096;
+
+  let input = inp('input');
+  const inputFile = inp('input_file');
+  if (!input && inputFile && existsSync(inputFile)) input = readFileSync(inputFile, 'utf8');
+  if (!input) throw new Error('Provide `input` or `input_file`.');
+
+  const skillFile = join(REPO_ROOT, 'skills', skill, 'SKILL.md');
+  if (!existsSync(skillFile)) throw new Error(`Unknown skill "${skill}" (no skills/${skill}/SKILL.md).`);
+  const { body } = parseSkill(readFileSync(skillFile, 'utf8'));
+
+  const { system, messages } = buildRequest(body, input);
+  console.log(`Running skill "${skill}" with ${model}…`);
+  const result = await complete({ apiKey, model, system, messages, maxTokens });
+
+  // Step output (multiline-safe heredoc) + optional file.
+  if (process.env.GITHUB_OUTPUT) {
+    const d = `EOF_${Math.random().toString(36).slice(2)}`;
+    appendFileSync(process.env.GITHUB_OUTPUT, `result<<${d}\n${result}\n${d}\n`);
+  }
+  const outFile = inp('output_file');
+  if (outFile) { writeFileSync(outFile, result + '\n'); console.log(`Wrote ${outFile}`); }
+
+  console.log('\n----- skill output -----\n' + result);
+}
+
+// Run only when executed directly (so tests can import buildRequest).
+if (import.meta.url === pathToFileURL(process.argv[1] || '').href) {
+  main().catch((e) => { console.error(`Error: ${e.message}`); process.exit(1); });
+}
@@ -153,6 +153,8 @@ Examples:
  npx pm-claude-skills add --agent cursor     # .mdc rules into ./.cursor/rules
  npx pm-claude-skills add --agent windsurf   # .md rules into ./.windsurf/rules
  npx pm-claude-skills add --agent codex --link
+
+  npx pm-claude-skills generate --from <url|file>   # turn your docs into a SKILL.md (needs ANTHROPIC_API_KEY)
 `;

 const opts = parse(process.argv.slice(2));
@@ -161,4 +163,9 @@ if (opts.version) console.log(VERSION);
 else if (opts.help || !cmd || cmd === 'help') console.log(HELP);
 else if (cmd === 'list') list();
 else if (cmd === 'add') add(opts);
+else if (cmd === 'generate') {
+  const { run } = await import('./generate.mjs');
+  try { process.exit(await run(process.argv.slice(3))); }
+  catch (e) { console.error(`Error: ${e.message}`); process.exit(1); }
+}
 else { console.error(`Unknown command: ${cmd}\n`); console.log(HELP); process.exit(2); }
@@ -0,0 +1,109 @@
+// `pm-claude-skills generate` — turn a doc (URL or file) into a SKILL.md that
+// follows this library's authoring standard. Uses the Anthropic API.
+//
+//   ANTHROPIC_API_KEY=sk-ant-... npx pm-claude-skills generate --from ./process.md
+//   ... generate --from https://example.com/runbook --name incident-runbook
+//   ... generate --from notes.txt --out ./skills --dry-run
+import { writeFileSync, mkdirSync, existsSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { complete, parseSkill } from './lib/anthropic.mjs';
+
+function getArg(argv, name, def) {
+  const i = argv.indexOf(`--${name}`);
+  return i !== -1 ? argv[i + 1] : def;
+}
+
+// Strip tags/scripts/styles from HTML to rough text (good enough for an LLM).
+function htmlToText(html) {
+  return html
+    .replace(/<script[\s\S]*?<\/script>/gi, ' ')
+    .replace(/<style[\s\S]*?<\/style>/gi, ' ')
+    .replace(/<[^>]+>/g, ' ')
+    .replace(/&[a-z]+;/gi, ' ')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+
+async function loadSource(from) {
+  if (/^https?:\/\//i.test(from)) {
+    const res = await fetch(from);
+    if (!res.ok) throw new Error(`Could not fetch ${from} (HTTP ${res.status}).`);
+    const text = await res.text();
+    return /<html|<body|<div/i.test(text) ? htmlToText(text) : text;
+  }
+  if (!existsSync(from)) throw new Error(`No such file: ${from}`);
+  return readFileSync(from, 'utf8');
+}
+
+const META_PROMPT = `You convert a team's documentation into a single Claude/Agent "skill" file (SKILL.md) that follows this exact standard. Output ONLY the file content, starting with the YAML frontmatter — no code fences, no preamble.
+
+Required structure:
+---
+name: <lowercase-hyphenated, derived from the doc's purpose>
+description: "<one sentence on what it does>. Use when <trigger phrases a user would say>. Produces <the concrete artifact>."
+---
+
+# <Title> Skill
+
+<one-line value summary>
+
+## What This Skill Produces
+- <deliverables>
+
+## Required Inputs
+Ask for (if not provided):
+- <inputs to gather; never invent them>
+
+## Process
+1. <steps>
+
+## Output Format
+<a concrete template — headings/tables — of the final artifact>
+
+## Quality Checks
+- [ ] <checks the output must pass>
+
+## Anti-Patterns
+- [ ] Do not <mistakes this skill prevents>
+
+Rules: be specific to the documentation provided; turn its rules/process into the skill. The description MUST contain "Use when" and "Produces". Do not include any text outside the file.`;
+
+export async function run(argv) {
+  const from = getArg(argv, 'from');
+  if (!from || argv.includes('--help')) {
+    console.log('Usage: pm-claude-skills generate --from <url|file> [--name x] [--out dir] [--model m] [--dry-run]');
+    return from ? 0 : 1;
+  }
+  const apiKey = process.env.ANTHROPIC_API_KEY || '';
+  if (!apiKey) { console.error('Set ANTHROPIC_API_KEY to generate a skill.'); return 1; }
+  const model = getArg(argv, 'model', 'claude-sonnet-4-6');
+  const outDir = getArg(argv, 'out', 'skills');
+  const dryRun = argv.includes('--dry-run');
+
+  console.error(`Reading ${from}…`);
+  const source = (await loadSource(from)).slice(0, 24000); // cap context
+
+  console.error(`Generating a SKILL.md with ${model}…`);
+  const out = await complete({
+    apiKey, model, system: META_PROMPT,
+    messages: [{ role: 'user', content: `Documentation to convert into a skill:\n\n${source}` }],
+    maxTokens: 3000,
+  });
+
+  const cleaned = out.replace(/^```[a-z]*\n?/i, '').replace(/\n?```$/i, '').trim();
+  const { meta } = parseSkill(cleaned);
+  const name = getArg(argv, 'name', meta.name);
+  if (!name) { console.error('Could not determine a skill name — pass --name.'); return 1; }
+
+  if (dryRun) {
+    console.log(cleaned);
+    console.error(`\n[dry-run] Would write ${join(outDir, name, 'SKILL.md')}`);
+    return 0;
+  }
+  const dir = join(outDir, name);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, 'SKILL.md'), cleaned + '\n');
+  console.log(`Created ${join(dir, 'SKILL.md')}`);
+  console.log('Next: review it, then validate — node scripts/skillcheck.mjs && node scripts/skill-audit.mjs');
+  return 0;
+}
@@ -0,0 +1,51 @@
+// Minimal, dependency-free Anthropic Messages API client (Node 18+ global fetch).
+// Shared by the GitHub Action runner, the eval harness, and skill generation.
+// No SDK, no install — just a thin POST wrapper.
+
+const API_URL = 'https://api.anthropic.com/v1/messages';
+
+/**
+ * Call the Anthropic Messages API and return the concatenated text output.
+ * @param {object} o
+ * @param {string} o.apiKey  - Anthropic API key.
+ * @param {string} [o.model] - Model id (default claude-sonnet-4-6).
+ * @param {string} [o.system]- System prompt.
+ * @param {Array}  o.messages- [{role, content}] messages.
+ * @param {number} [o.maxTokens]
+ * @returns {Promise<string>}
+ */
+export async function complete({ apiKey, model = 'claude-sonnet-4-6', system, messages, maxTokens = 4096 }) {
+  if (!apiKey) throw new Error('Missing Anthropic API key (set ANTHROPIC_API_KEY).');
+  const res = await fetch(API_URL, {
+    method: 'POST',
+    headers: {
+      'content-type': 'application/json',
+      'x-api-key': apiKey,
+      'anthropic-version': '2023-06-01',
+    },
+    body: JSON.stringify({ model, max_tokens: maxTokens, ...(system ? { system } : {}), messages }),
+  });
+  if (!res.ok) {
+    const body = await res.text().catch(() => '');
+    throw new Error(`Anthropic API ${res.status}: ${body.slice(0, 500)}`);
+  }
+  const data = await res.json();
+  return (data.content || []).map((c) => c.text || '').join('').trim();
+}
+
+/** Parse "name: value" YAML-ish frontmatter + body from a SKILL.md string. */
+export function parseSkill(text) {
+  const m = text.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
+  const meta = {};
+  if (m) {
+    for (const line of m[1].split('\n')) {
+      const kv = line.match(/^(\w[\w-]*):\s*(.*)$/);
+      if (kv) {
+        let v = kv[2].trim();
+        if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) v = v.slice(1, -1);
+        meta[kv[1]] = v;
+      }
+    }
+  }
+  return { meta, body: m ? m[2].trim() : text.trim() };
+}
@@ -0,0 +1,46 @@
+# Skill Evals
+
+An LLM-as-judge harness that scores skill output quality across models — so claims like
+"production-ready" are backed by numbers, not vibes. Results render as a public
+[Skill Leaderboard](https://mohitagw15856.github.io/pm-claude-skills/leaderboard.html).
+
+## What it measures
+
+For each [case](cases.json), a model runs the skill, then a **judge model** scores the
+output 1–5 on four dimensions:
+
+- **structure** — follows a clear, expected structure
+- **completeness** — covers what the task needs
+- **usefulness** — specific and actually useful, not generic
+- **grounding** — stays grounded in the input, no invented facts
+
+## Run it
+
+Needs an Anthropic API key (this calls the API and costs tokens):
+
+```bash
+ANTHROPIC_API_KEY=sk-ant-... node evals/run-evals.mjs
+#   --models claude-opus-4-8,claude-sonnet-4-6,claude-haiku-4-5-20251001
+#   --judge  claude-opus-4-8
+node scripts/build-leaderboard.mjs       # render web/leaderboard.html
+```
+
+`run-evals.mjs` writes `evals/results.json`; the leaderboard builder prefers it and falls
+back to `results.example.json` (clearly labelled) so the page renders before you run real evals.
+
+### No local key? Run it in CI
+
+Add an `ANTHROPIC_API_KEY` repo secret, then go to **Actions → "Update Skill Leaderboard"
+→ Run workflow**. It runs the evals, commits `evals/results.json`, and the Pages deploy
+re-renders the public leaderboard with real numbers — no laptop required.
+
+## Add a case
+
+Append to [`cases.json`](cases.json): `{ "skill": "<name>", "input": "<a realistic prompt>" }`.
+Keep inputs short but representative of how the skill is actually used.
+
+## Honesty notes
+
+- Scores are an LLM judge's opinion, not ground truth — treat them as a comparative signal.
+- The judge sees the skill's stated purpose and the output, not the model name (reduces bias).
+- Re-run after model upgrades; numbers drift.
@@ -0,0 +1,29 @@
+{
+  "_comment": "Eval cases: a representative input per skill. Run with: node evals/run-evals.mjs",
+  "cases": [
+    {
+      "skill": "rice-prioritisation",
+      "input": "Rank these for next quarter:\n1. Onboarding redesign — reach ~5000 users/qtr, big activation impact, ~3 person-months.\n2. Dark mode — ~8000 users want it, low impact, ~1 person-month.\n3. SSO for enterprise — ~400 accounts, high deal impact, ~4 person-months, low confidence."
+    },
+    {
+      "skill": "prd-template",
+      "input": "Feature: in-app referral program so existing users invite colleagues and both get a credit. Target: activated B2B users. Goal: grow signups 15% in Q3."
+    },
+    {
+      "skill": "cs-health-scorecard",
+      "input": "Account: Acme Corp, enterprise, ARR $120k, renewal in 90 days. DAU/MAU 18%, 2 open P2 tickets, CSAT 7, exec sponsor left last month, seats 80/100 used, payments on time."
+    },
+    {
+      "skill": "executive-summary",
+      "input": "Summarise: our Q2 retention dropped from 82% to 76% driven by a new onboarding flow that confused mobile users; we shipped a fix in week 10 and retention recovered to 80%; we recommend a full mobile onboarding rework next quarter."
+    },
+    {
+      "skill": "competitive-analysis",
+      "input": "Analyse our position vs Notion and Coda for a lightweight team wiki aimed at small startups. We're cheaper and faster to set up but have fewer integrations."
+    },
+    {
+      "skill": "sprint-planning",
+      "input": "Team of 5, 2-week sprint, average velocity 30 points, one engineer out 3 days. Backlog: checkout redesign (8), payment retries (5), analytics events (3), bug bash (3), API rate limiting (5)."
+    }
+  ]
+}
@@ -0,0 +1,22 @@
+{
+  "_comment": "EXAMPLE data so the leaderboard renders before you run real evals. Replace by running: ANTHROPIC_API_KEY=... node evals/run-evals.mjs",
+  "example": true,
+  "generatedAt": "2026-06-18T00:00:00.000Z",
+  "judge": "claude-opus-4-8",
+  "models": ["claude-sonnet-4-6", "claude-haiku-4-5-20251001"],
+  "dimensions": ["structure", "completeness", "usefulness", "grounding"],
+  "results": [
+    { "skill": "rice-prioritisation", "model": "claude-sonnet-4-6", "scores": {"structure":5,"completeness":5,"usefulness":5,"grounding":4}, "overall": 4.75 },
+    { "skill": "rice-prioritisation", "model": "claude-haiku-4-5-20251001", "scores": {"structure":5,"completeness":4,"usefulness":4,"grounding":4}, "overall": 4.25 },
+    { "skill": "prd-template", "model": "claude-sonnet-4-6", "scores": {"structure":5,"completeness":4,"usefulness":5,"grounding":4}, "overall": 4.5 },
+    { "skill": "prd-template", "model": "claude-haiku-4-5-20251001", "scores": {"structure":4,"completeness":4,"usefulness":4,"grounding":4}, "overall": 4.0 },
+    { "skill": "cs-health-scorecard", "model": "claude-sonnet-4-6", "scores": {"structure":5,"completeness":5,"usefulness":5,"grounding":5}, "overall": 5.0 },
+    { "skill": "cs-health-scorecard", "model": "claude-haiku-4-5-20251001", "scores": {"structure":5,"completeness":4,"usefulness":4,"grounding":4}, "overall": 4.25 },
+    { "skill": "executive-summary", "model": "claude-sonnet-4-6", "scores": {"structure":5,"completeness":5,"usefulness":4,"grounding":5}, "overall": 4.75 },
+    { "skill": "executive-summary", "model": "claude-haiku-4-5-20251001", "scores": {"structure":5,"completeness":4,"usefulness":4,"grounding":5}, "overall": 4.5 },
+    { "skill": "competitive-analysis", "model": "claude-sonnet-4-6", "scores": {"structure":4,"completeness":4,"usefulness":5,"grounding":4}, "overall": 4.25 },
+    { "skill": "competitive-analysis", "model": "claude-haiku-4-5-20251001", "scores": {"structure":4,"completeness":4,"usefulness":4,"grounding":4}, "overall": 4.0 },
+    { "skill": "sprint-planning", "model": "claude-sonnet-4-6", "scores": {"structure":5,"completeness":5,"usefulness":5,"grounding":5}, "overall": 5.0 },
+    { "skill": "sprint-planning", "model": "claude-haiku-4-5-20251001", "scores": {"structure":5,"completeness":4,"usefulness":4,"grounding":5}, "overall": 4.5 }
+  ]
+}
@@ -0,0 +1,93 @@
+#!/usr/bin/env node
+// Skill eval harness. For each case × model: run the skill, then score the output
+// with an LLM judge on a fixed rubric. Writes evals/results.json — feed it to
+// scripts/build-leaderboard.mjs to render web/leaderboard.html.
+//
+// Requires an Anthropic API key (this calls the API and costs tokens).
+//
+// Usage:
+//   ANTHROPIC_API_KEY=sk-ant-... node evals/run-evals.mjs
+//   ... node evals/run-evals.mjs --models claude-opus-4-8,claude-sonnet-4-6,claude-haiku-4-5-20251001
+//   ... node evals/run-evals.mjs --judge claude-opus-4-8 --cases evals/cases.json
+import { readFileSync, writeFileSync, existsSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { complete, parseSkill } from '../bin/lib/anthropic.mjs';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const root = join(__dirname, '..');
+
+function arg(name, def) {
+  const i = process.argv.indexOf(`--${name}`);
+  return i !== -1 ? process.argv[i + 1] : def;
+}
+
+const apiKey = process.env.ANTHROPIC_API_KEY || '';
+const models = arg('models', 'claude-sonnet-4-6,claude-haiku-4-5-20251001').split(',').map((s) => s.trim());
+const judge = arg('judge', 'claude-opus-4-8');
+const casesPath = arg('cases', join(__dirname, 'cases.json'));
+const outPath = arg('out', join(__dirname, 'results.json'));
+
+const DIMENSIONS = ['structure', 'completeness', 'usefulness', 'grounding'];
+
+function runPrompt(skillBody) {
+  return skillBody + '\n\n---\nExecute this skill now on the input. Output only the finished artifact.';
+}
+
+function judgePrompt(description, output) {
+  return `You are a strict evaluator of a professional work artifact.
+
+The artifact was produced by a skill whose job is:
+"${description}"
+
+Score the artifact below from 1 (poor) to 5 (excellent) on each dimension:
+- structure: follows a clear, expected structure for this kind of output
+- completeness: covers what the task needs, nothing important missing
+- usefulness: actually useful to a professional, specific not generic
+- grounding: stays grounded in the given input, no invented facts/metrics
+
+Return ONLY a JSON object, no prose: {"structure":N,"completeness":N,"usefulness":N,"grounding":N}
+
+--- ARTIFACT ---
+${output}`;
+}
+
+function parseScores(text) {
+  const m = text.match(/\{[\s\S]*\}/);
+  if (!m) throw new Error('judge did not return JSON');
+  const j = JSON.parse(m[0]);
+  const s = {};
+  for (const d of DIMENSIONS) s[d] = Math.max(1, Math.min(5, Number(j[d]) || 0));
+  return s;
+}
+
+async function main() {
+  if (!apiKey) { console.error('Set ANTHROPIC_API_KEY to run evals.'); process.exit(1); }
+  const { cases } = JSON.parse(readFileSync(casesPath, 'utf8'));
+  const results = [];
+
+  for (const c of cases) {
+    const skillFile = join(root, 'skills', c.skill, 'SKILL.md');
+    if (!existsSync(skillFile)) { console.error(`skip ${c.skill}: no SKILL.md`); continue; }
+    const { meta, body } = parseSkill(readFileSync(skillFile, 'utf8'));
+    for (const model of models) {
+      process.stderr.write(`Running ${c.skill} on ${model}… `);
+      try {
+        const output = await complete({ apiKey, model, system: runPrompt(body), messages: [{ role: 'user', content: c.input }], maxTokens: 3000 });
+        const judged = await complete({ apiKey, model: judge, messages: [{ role: 'user', content: judgePrompt(meta.description || c.skill, output) }], maxTokens: 200 });
+        const scores = parseScores(judged);
+        const overall = DIMENSIONS.reduce((a, d) => a + scores[d], 0) / DIMENSIONS.length;
+        results.push({ skill: c.skill, model, scores, overall: Math.round(overall * 100) / 100 });
+        process.stderr.write(`${overall.toFixed(2)}/5\n`);
+      } catch (e) {
+        process.stderr.write(`FAILED (${e.message})\n`);
+      }
+    }
+  }
+
+  const out = { generatedAt: new Date().toISOString(), judge, models, dimensions: DIMENSIONS, results };
+  writeFileSync(outPath, JSON.stringify(out, null, 2));
+  console.log(`\nWrote ${outPath} — ${results.length} scored runs. Build the page: node scripts/build-leaderboard.mjs`);
+}
+
+main();
@@ -1,6 +1,6 @@
 {
  "name": "pm-claude-skills",
-  "version": "19.0.0",
+  "version": "20.0.0",
  "type": "module",
  "description": "167 professional Agent Skills (SKILL.md) + subagents + slash commands for Claude, ChatGPT, Gemini, Cursor, Codex & Hermes. Install into any AI coding tool with: npx pm-claude-skills add --agent <tool>.",
  "keywords": [
@@ -89,6 +89,7 @@ const html = `<!DOCTYPE html>
    <a href="https://mohitagw15856.github.io/pm-claude-skills/">▶ Live Playground</a>
    <a href="${REPO}">GitHub</a>
    <a href="${REPO}#-quick-install-2-minutes">Install</a>
+    <a href="leaderboard.html">Leaderboard</a>
    <a href="${REPO}/blob/main/TIERS.md">Tiers</a>
  </div>
 </header>
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+// Renders web/leaderboard.html from evals/results.json (or evals/results.example.json
+// as a clearly-labelled placeholder). Run after evals/run-evals.mjs. No dependencies.
+import { readFileSync, writeFileSync, existsSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const root = join(__dirname, '..');
+const REPO = 'https://github.com/mohitagw15856/pm-claude-skills';
+
+const real = join(root, 'evals', 'results.json');
+const example = join(root, 'evals', 'results.example.json');
+const src = existsSync(real) ? real : example;
+const data = JSON.parse(readFileSync(src, 'utf8'));
+const isExample = !!data.example || src === example;
+
+const esc = (s) => String(s).replace(/[&<>"]/g, (c) => ({ '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;' }[c]));
+const skills = [...new Set(data.results.map((r) => r.skill))].sort();
+const models = data.models || [...new Set(data.results.map((r) => r.model))];
+const cell = (skill, model) => data.results.find((r) => r.skill === skill && r.model === model);
+const colour = (v) => v >= 4.5 ? '#6ee7b7' : v >= 4 ? '#93c5fd' : v >= 3 ? '#fcd34d' : '#fca5a5';
+
+const modelAvg = (m) => {
+  const xs = data.results.filter((r) => r.model === m).map((r) => r.overall);
+  return xs.length ? (xs.reduce((a, b) => a + b, 0) / xs.length) : 0;
+};
+
+const headRow = `<tr><th>Skill</th>${models.map((m) => `<th>${esc(m)}</th>`).join('')}</tr>`;
+const rows = skills.map((s) => `<tr><td class="skill">${esc(s)}</td>${models.map((m) => {
+  const c = cell(s, m);
+  return c ? `<td><span class="score" style="color:${colour(c.overall)}">${c.overall.toFixed(2)}</span></td>` : '<td class="na">—</td>';
+}).join('')}</tr>`).join('\n');
+const avgRow = `<tr class="avg"><td>Average</td>${models.map((m) => `<td><strong>${modelAvg(m).toFixed(2)}</strong></td>`).join('')}</tr>`;
+
+const html = `<!DOCTYPE html>
+<html lang="en"><head>
+<meta charset="UTF-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0" />
+<title>Skill Leaderboard — how pm-claude-skills score across Claude models</title>
+<meta name="description" content="LLM-judged quality scores for professional Agent Skills across Claude models, on structure, completeness, usefulness, and grounding." />
+<style>
+  :root{--bg:#0f1115;--panel:#161a21;--border:#2a313c;--text:#e7ebf0;--muted:#95a0b0;--accent2:#e89b82}
+  body{margin:0;background:var(--bg);color:var(--text);font:15px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif}
+  a{color:var(--accent2)} header{padding:28px 22px;border-bottom:1px solid var(--border);background:var(--panel)}
+  header h1{margin:0 0 6px;font-size:23px} header p{margin:0;color:var(--muted);font-size:14px}
+  .nav{margin-top:12px;display:flex;gap:14px;font-size:13px;flex-wrap:wrap}
+  main{max-width:900px;margin:0 auto;padding:22px}
+  .banner{background:rgba(245,158,11,.12);border:1px solid rgba(245,158,11,.4);color:#fcd34d;padding:12px 14px;border-radius:10px;margin-bottom:18px;font-size:13.5px}
+  table{width:100%;border-collapse:collapse;font-size:14px}
+  th,td{padding:10px 12px;text-align:center;border-bottom:1px solid var(--border)}
+  th:first-child,td:first-child{text-align:left}
+  th{color:var(--accent2);font-size:12px;text-transform:uppercase;letter-spacing:.04em}
+  td.skill{font-weight:600} .score{font-weight:700} .na{color:var(--muted)}
+  tr.avg td{border-top:2px solid var(--border);color:var(--muted)}
+  .meta{color:var(--muted);font-size:12.5px;margin-top:16px}
+</style></head><body>
+<header>
+  <h1>🏆 Skill Leaderboard</h1>
+  <p>LLM-judged quality (1–5) for each skill across Claude models — scored on structure, completeness, usefulness &amp; grounding by <code>${esc(data.judge || 'an LLM judge')}</code>.</p>
+  <div class="nav"><a href="https://mohitagw15856.github.io/pm-claude-skills/">Playground</a><a href="catalog.html">Catalog</a><a href="${REPO}/tree/main/evals">How it works</a></div>
+</header>
+<main>
+  ${isExample ? '<div class="banner">⚠️ <strong>Example data</strong> — illustrative scores so this page renders. Run <code>ANTHROPIC_API_KEY=… node evals/run-evals.mjs</code> then <code>node scripts/build-leaderboard.mjs</code> for real numbers.</div>' : ''}
+  <table>
+    <thead>${headRow}</thead>
+    <tbody>
+${rows}
+${avgRow}
+    </tbody>
+  </table>
+  <p class="meta">Higher is better (max 5). ${esc(skills.length)} skills × ${esc(models.length)} models${data.generatedAt ? ` · generated ${esc(String(data.generatedAt).slice(0, 10))}` : ''}. Methodology and cases in <a href="${REPO}/tree/main/evals">evals/</a>.</p>
+</main></body></html>
+`;
+
+writeFileSync(join(root, 'web', 'leaderboard.html'), html);
+console.log(`Wrote web/leaderboard.html — ${skills.length} skills × ${models.length} models${isExample ? ' (EXAMPLE data)' : ''}.`);
@@ -34,7 +34,7 @@
  <div class="key-note">
    🔒 Your key is stored only in this browser and sent directly to api.anthropic.com — never to us.
    Get one at <a href="https://console.anthropic.com/settings/keys" target="_blank" rel="noopener">console.anthropic.com</a>.
-    · 📚 <a href="catalog.html">Browse the full skill catalog</a>
+    · 📚 <a href="catalog.html">Catalog</a> · 🏆 <a href="leaderboard.html">Leaderboard</a>
  </div>

  <div class="controls" id="controls">