AI-powered tooling: GitHub Action, generate command, evals + leaderboard (#41)

Three features riding 2026 trends (agentic CI, codegen, evals), sharing one
dependency-free Anthropic client (bin/lib/anthropic.mjs).

1. GitHub Action (action/) — run any skill in a consumer repo's CI:
   uses: mohitagw15856/pm-claude-skills/action@main. Composite action +
   run.mjs (loads the bundled SKILL.md, calls the API, exposes result as a
   step output / file). Docs with auto-PR-description example.

2. generate command — `npx pm-claude-skills generate --from <url|file>` turns
   a team's docs into a SKILL.md following the authoring standard
   (bin/generate.mjs, wired into the CLI; needs ANTHROPIC_API_KEY).

3. Skill evals + Leaderboard — evals/run-evals.mjs runs each case across models
   and scores output with an LLM judge (structure/completeness/usefulness/
   grounding); scripts/build-leaderboard.mjs renders web/leaderboard.html
   (built in the Pages deploy, falls back to clearly-labelled example data).
   Linked from README, catalog, and playground.

Offline-testable parts verified (prompt building, skill loading, graceful
errors, leaderboard render). SkillCheck/audit/exports all green.


Claude-Session: https://claude.ai/code/session_016JWn5jRD5tcEFKrubjQ6Px

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
mohitagw15856
2026-06-18 08:37:40 +01:00
committed by GitHub
parent 288a340dbe
commit 51bf4be52f
17 changed files with 644 additions and 2 deletions
+7
View File
@@ -153,6 +153,8 @@ Examples:
npx pm-claude-skills add --agent cursor # .mdc rules into ./.cursor/rules
npx pm-claude-skills add --agent windsurf # .md rules into ./.windsurf/rules
npx pm-claude-skills add --agent codex --link
npx pm-claude-skills generate --from <url|file> # turn your docs into a SKILL.md (needs ANTHROPIC_API_KEY)
`;
const opts = parse(process.argv.slice(2));
@@ -161,4 +163,9 @@ if (opts.version) console.log(VERSION);
else if (opts.help || !cmd || cmd === 'help') console.log(HELP);
else if (cmd === 'list') list();
else if (cmd === 'add') add(opts);
else if (cmd === 'generate') {
const { run } = await import('./generate.mjs');
try { process.exit(await run(process.argv.slice(3))); }
catch (e) { console.error(`Error: ${e.message}`); process.exit(1); }
}
else { console.error(`Unknown command: ${cmd}\n`); console.log(HELP); process.exit(2); }
+109
View File
@@ -0,0 +1,109 @@
// `pm-claude-skills generate` — turn a doc (URL or file) into a SKILL.md that
// follows this library's authoring standard. Uses the Anthropic API.
//
// ANTHROPIC_API_KEY=sk-ant-... npx pm-claude-skills generate --from ./process.md
// ... generate --from https://example.com/runbook --name incident-runbook
// ... generate --from notes.txt --out ./skills --dry-run
import { writeFileSync, mkdirSync, existsSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import { complete, parseSkill } from './lib/anthropic.mjs';
function getArg(argv, name, def) {
const i = argv.indexOf(`--${name}`);
return i !== -1 ? argv[i + 1] : def;
}
// Strip tags/scripts/styles from HTML to rough text (good enough for an LLM).
function htmlToText(html) {
return html
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/&[a-z]+;/gi, ' ')
.replace(/\s+/g, ' ')
.trim();
}
async function loadSource(from) {
if (/^https?:\/\//i.test(from)) {
const res = await fetch(from);
if (!res.ok) throw new Error(`Could not fetch ${from} (HTTP ${res.status}).`);
const text = await res.text();
return /<html|<body|<div/i.test(text) ? htmlToText(text) : text;
}
if (!existsSync(from)) throw new Error(`No such file: ${from}`);
return readFileSync(from, 'utf8');
}
const META_PROMPT = `You convert a team's documentation into a single Claude/Agent "skill" file (SKILL.md) that follows this exact standard. Output ONLY the file content, starting with the YAML frontmatter — no code fences, no preamble.
Required structure:
---
name: <lowercase-hyphenated, derived from the doc's purpose>
description: "<one sentence on what it does>. Use when <trigger phrases a user would say>. Produces <the concrete artifact>."
---
# <Title> Skill
<one-line value summary>
## What This Skill Produces
- <deliverables>
## Required Inputs
Ask for (if not provided):
- <inputs to gather; never invent them>
## Process
1. <steps>
## Output Format
<a concrete template — headings/tables — of the final artifact>
## Quality Checks
- [ ] <checks the output must pass>
## Anti-Patterns
- [ ] Do not <mistakes this skill prevents>
Rules: be specific to the documentation provided; turn its rules/process into the skill. The description MUST contain "Use when" and "Produces". Do not include any text outside the file.`;
export async function run(argv) {
const from = getArg(argv, 'from');
if (!from || argv.includes('--help')) {
console.log('Usage: pm-claude-skills generate --from <url|file> [--name x] [--out dir] [--model m] [--dry-run]');
return from ? 0 : 1;
}
const apiKey = process.env.ANTHROPIC_API_KEY || '';
if (!apiKey) { console.error('Set ANTHROPIC_API_KEY to generate a skill.'); return 1; }
const model = getArg(argv, 'model', 'claude-sonnet-4-6');
const outDir = getArg(argv, 'out', 'skills');
const dryRun = argv.includes('--dry-run');
console.error(`Reading ${from}`);
const source = (await loadSource(from)).slice(0, 24000); // cap context
console.error(`Generating a SKILL.md with ${model}`);
const out = await complete({
apiKey, model, system: META_PROMPT,
messages: [{ role: 'user', content: `Documentation to convert into a skill:\n\n${source}` }],
maxTokens: 3000,
});
const cleaned = out.replace(/^```[a-z]*\n?/i, '').replace(/\n?```$/i, '').trim();
const { meta } = parseSkill(cleaned);
const name = getArg(argv, 'name', meta.name);
if (!name) { console.error('Could not determine a skill name — pass --name.'); return 1; }
if (dryRun) {
console.log(cleaned);
console.error(`\n[dry-run] Would write ${join(outDir, name, 'SKILL.md')}`);
return 0;
}
const dir = join(outDir, name);
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, 'SKILL.md'), cleaned + '\n');
console.log(`Created ${join(dir, 'SKILL.md')}`);
console.log('Next: review it, then validate — node scripts/skillcheck.mjs && node scripts/skill-audit.mjs');
return 0;
}
+51
View File
@@ -0,0 +1,51 @@
// Minimal, dependency-free Anthropic Messages API client (Node 18+ global fetch).
// Shared by the GitHub Action runner, the eval harness, and skill generation.
// No SDK, no install — just a thin POST wrapper.
const API_URL = 'https://api.anthropic.com/v1/messages';
/**
* Call the Anthropic Messages API and return the concatenated text output.
* @param {object} o
* @param {string} o.apiKey - Anthropic API key.
* @param {string} [o.model] - Model id (default claude-sonnet-4-6).
* @param {string} [o.system]- System prompt.
* @param {Array} o.messages- [{role, content}] messages.
* @param {number} [o.maxTokens]
* @returns {Promise<string>}
*/
export async function complete({ apiKey, model = 'claude-sonnet-4-6', system, messages, maxTokens = 4096 }) {
if (!apiKey) throw new Error('Missing Anthropic API key (set ANTHROPIC_API_KEY).');
const res = await fetch(API_URL, {
method: 'POST',
headers: {
'content-type': 'application/json',
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({ model, max_tokens: maxTokens, ...(system ? { system } : {}), messages }),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`Anthropic API ${res.status}: ${body.slice(0, 500)}`);
}
const data = await res.json();
return (data.content || []).map((c) => c.text || '').join('').trim();
}
/** Parse "name: value" YAML-ish frontmatter + body from a SKILL.md string. */
export function parseSkill(text) {
const m = text.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
const meta = {};
if (m) {
for (const line of m[1].split('\n')) {
const kv = line.match(/^(\w[\w-]*):\s*(.*)$/);
if (kv) {
let v = kv[2].trim();
if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) v = v.slice(1, -1);
meta[kv[1]] = v;
}
}
}
return { meta, body: m ? m[2].trim() : text.trim() };
}