AI-powered tooling: GitHub Action, generate command, evals + leaderboard (#41)

Three features riding 2026 trends (agentic CI, codegen, evals), sharing one dependency-free Anthropic client (bin/lib/anthropic.mjs). 1. GitHub Action (action/) — run any skill in a consumer repo's CI: uses: mohitagw15856/pm-claude-skills/action@main. Composite action + run.mjs (loads the bundled SKILL.md, calls the API, exposes result as a step output / file). Docs with auto-PR-description example. 2. generate command — `npx pm-claude-skills generate --from <url|file>` turns a team's docs into a SKILL.md following the authoring standard (bin/generate.mjs, wired into the CLI; needs ANTHROPIC_API_KEY). 3. Skill evals + Leaderboard — evals/run-evals.mjs runs each case across models and scores output with an LLM judge (structure/completeness/usefulness/ grounding); scripts/build-leaderboard.mjs renders web/leaderboard.html (built in the Pages deploy, falls back to clearly-labelled example data). Linked from README, catalog, and playground. Offline-testable parts verified (prompt building, skill loading, graceful errors, leaderboard render). SkillCheck/audit/exports all green. Claude-Session: https://claude.ai/code/session_016JWn5jRD5tcEFKrubjQ6Px Co-authored-by: Claude <noreply@anthropic.com>
2026-06-18 08:37:40 +01:00
parent 288a340dbe
commit 51bf4be52f
17 changed files with 644 additions and 2 deletions
@@ -153,6 +153,8 @@ Examples:
  npx pm-claude-skills add --agent cursor     # .mdc rules into ./.cursor/rules
  npx pm-claude-skills add --agent windsurf   # .md rules into ./.windsurf/rules
  npx pm-claude-skills add --agent codex --link
+
+  npx pm-claude-skills generate --from <url|file>   # turn your docs into a SKILL.md (needs ANTHROPIC_API_KEY)
 `;

 const opts = parse(process.argv.slice(2));
@@ -161,4 +163,9 @@ if (opts.version) console.log(VERSION);
 else if (opts.help || !cmd || cmd === 'help') console.log(HELP);
 else if (cmd === 'list') list();
 else if (cmd === 'add') add(opts);
+else if (cmd === 'generate') {
+  const { run } = await import('./generate.mjs');
+  try { process.exit(await run(process.argv.slice(3))); }
+  catch (e) { console.error(`Error: ${e.message}`); process.exit(1); }
+}
 else { console.error(`Unknown command: ${cmd}\n`); console.log(HELP); process.exit(2); }
@@ -0,0 +1,109 @@
+// `pm-claude-skills generate` — turn a doc (URL or file) into a SKILL.md that
+// follows this library's authoring standard. Uses the Anthropic API.
+//
+//   ANTHROPIC_API_KEY=sk-ant-... npx pm-claude-skills generate --from ./process.md
+//   ... generate --from https://example.com/runbook --name incident-runbook
+//   ... generate --from notes.txt --out ./skills --dry-run
+import { writeFileSync, mkdirSync, existsSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { complete, parseSkill } from './lib/anthropic.mjs';
+
+function getArg(argv, name, def) {
+  const i = argv.indexOf(`--${name}`);
+  return i !== -1 ? argv[i + 1] : def;
+}
+
+// Strip tags/scripts/styles from HTML to rough text (good enough for an LLM).
+function htmlToText(html) {
+  return html
+    .replace(/<script[\s\S]*?<\/script>/gi, ' ')
+    .replace(/<style[\s\S]*?<\/style>/gi, ' ')
+    .replace(/<[^>]+>/g, ' ')
+    .replace(/&[a-z]+;/gi, ' ')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+
+async function loadSource(from) {
+  if (/^https?:\/\//i.test(from)) {
+    const res = await fetch(from);
+    if (!res.ok) throw new Error(`Could not fetch ${from} (HTTP ${res.status}).`);
+    const text = await res.text();
+    return /<html|<body|<div/i.test(text) ? htmlToText(text) : text;
+  }
+  if (!existsSync(from)) throw new Error(`No such file: ${from}`);
+  return readFileSync(from, 'utf8');
+}
+
+const META_PROMPT = `You convert a team's documentation into a single Claude/Agent "skill" file (SKILL.md) that follows this exact standard. Output ONLY the file content, starting with the YAML frontmatter — no code fences, no preamble.
+
+Required structure:
+---
+name: <lowercase-hyphenated, derived from the doc's purpose>
+description: "<one sentence on what it does>. Use when <trigger phrases a user would say>. Produces <the concrete artifact>."
+---
+
+# <Title> Skill
+
+<one-line value summary>
+
+## What This Skill Produces
+- <deliverables>
+
+## Required Inputs
+Ask for (if not provided):
+- <inputs to gather; never invent them>
+
+## Process
+1. <steps>
+
+## Output Format
+<a concrete template — headings/tables — of the final artifact>
+
+## Quality Checks
+- [ ] <checks the output must pass>
+
+## Anti-Patterns
+- [ ] Do not <mistakes this skill prevents>
+
+Rules: be specific to the documentation provided; turn its rules/process into the skill. The description MUST contain "Use when" and "Produces". Do not include any text outside the file.`;
+
+export async function run(argv) {
+  const from = getArg(argv, 'from');
+  if (!from || argv.includes('--help')) {
+    console.log('Usage: pm-claude-skills generate --from <url|file> [--name x] [--out dir] [--model m] [--dry-run]');
+    return from ? 0 : 1;
+  }
+  const apiKey = process.env.ANTHROPIC_API_KEY || '';
+  if (!apiKey) { console.error('Set ANTHROPIC_API_KEY to generate a skill.'); return 1; }
+  const model = getArg(argv, 'model', 'claude-sonnet-4-6');
+  const outDir = getArg(argv, 'out', 'skills');
+  const dryRun = argv.includes('--dry-run');
+
+  console.error(`Reading ${from}…`);
+  const source = (await loadSource(from)).slice(0, 24000); // cap context
+
+  console.error(`Generating a SKILL.md with ${model}…`);
+  const out = await complete({
+    apiKey, model, system: META_PROMPT,
+    messages: [{ role: 'user', content: `Documentation to convert into a skill:\n\n${source}` }],
+    maxTokens: 3000,
+  });
+
+  const cleaned = out.replace(/^```[a-z]*\n?/i, '').replace(/\n?```$/i, '').trim();
+  const { meta } = parseSkill(cleaned);
+  const name = getArg(argv, 'name', meta.name);
+  if (!name) { console.error('Could not determine a skill name — pass --name.'); return 1; }
+
+  if (dryRun) {
+    console.log(cleaned);
+    console.error(`\n[dry-run] Would write ${join(outDir, name, 'SKILL.md')}`);
+    return 0;
+  }
+  const dir = join(outDir, name);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, 'SKILL.md'), cleaned + '\n');
+  console.log(`Created ${join(dir, 'SKILL.md')}`);
+  console.log('Next: review it, then validate — node scripts/skillcheck.mjs && node scripts/skill-audit.mjs');
+  return 0;
+}
@@ -0,0 +1,51 @@
+// Minimal, dependency-free Anthropic Messages API client (Node 18+ global fetch).
+// Shared by the GitHub Action runner, the eval harness, and skill generation.
+// No SDK, no install — just a thin POST wrapper.
+
+const API_URL = 'https://api.anthropic.com/v1/messages';
+
+/**
+ * Call the Anthropic Messages API and return the concatenated text output.
+ * @param {object} o
+ * @param {string} o.apiKey  - Anthropic API key.
+ * @param {string} [o.model] - Model id (default claude-sonnet-4-6).
+ * @param {string} [o.system]- System prompt.
+ * @param {Array}  o.messages- [{role, content}] messages.
+ * @param {number} [o.maxTokens]
+ * @returns {Promise<string>}
+ */
+export async function complete({ apiKey, model = 'claude-sonnet-4-6', system, messages, maxTokens = 4096 }) {
+  if (!apiKey) throw new Error('Missing Anthropic API key (set ANTHROPIC_API_KEY).');
+  const res = await fetch(API_URL, {
+    method: 'POST',
+    headers: {
+      'content-type': 'application/json',
+      'x-api-key': apiKey,
+      'anthropic-version': '2023-06-01',
+    },
+    body: JSON.stringify({ model, max_tokens: maxTokens, ...(system ? { system } : {}), messages }),
+  });
+  if (!res.ok) {
+    const body = await res.text().catch(() => '');
+    throw new Error(`Anthropic API ${res.status}: ${body.slice(0, 500)}`);
+  }
+  const data = await res.json();
+  return (data.content || []).map((c) => c.text || '').join('').trim();
+}
+
+/** Parse "name: value" YAML-ish frontmatter + body from a SKILL.md string. */
+export function parseSkill(text) {
+  const m = text.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
+  const meta = {};
+  if (m) {
+    for (const line of m[1].split('\n')) {
+      const kv = line.match(/^(\w[\w-]*):\s*(.*)$/);
+      if (kv) {
+        let v = kv[2].trim();
+        if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) v = v.slice(1, -1);
+        meta[kv[1]] = v;
+      }
+    }
+  }
+  return { meta, body: m ? m[2].trim() : text.trim() };
+}