diff --git a/.github/workflows/eval-leaderboard.yml b/.github/workflows/eval-leaderboard.yml
index 4520082..ff24cdc 100644
--- a/.github/workflows/eval-leaderboard.yml
+++ b/.github/workflows/eval-leaderboard.yml
@@ -29,6 +29,7 @@ concurrency:
 jobs:
   evaluate:
     runs-on: ubuntu-latest
+    timeout-minutes: 20
     steps:
       - name: Checkout
         uses: actions/checkout@v4
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2267b55..32be794 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,12 @@ each new wave of skills bumps the **major** version, extensions and fixes bump
 
 ## [Unreleased]
 
+### Changed
+- **Faster, hang-proof evals.** The Anthropic client now has a per-request timeout (120s)
+  and limited retries (429/5xx/timeout); the eval harness runs cases concurrently
+  (default 4). The leaderboard workflow has a 20-minute job timeout. A 24-call run that
+  was sequential now finishes in a few minutes and can't stall a job indefinitely.
+
 ### Added
 - **One-click leaderboard updates in CI** — `.github/workflows/eval-leaderboard.yml`
   ("Update Skill Leaderboard") runs the evals with the `ANTHROPIC_API_KEY` secret, commits
diff --git a/bin/lib/anthropic.mjs b/bin/lib/anthropic.mjs
index 409d5b4..445364e 100644
--- a/bin/lib/anthropic.mjs
+++ b/bin/lib/anthropic.mjs
@@ -6,31 +6,57 @@ const API_URL = 'https://api.anthropic.com/v1/messages';
 
 /**
  * Call the Anthropic Messages API and return the concatenated text output.
+ * Adds a per-request timeout and limited retries so a slow/transient failure
+ * can't hang a CI job forever.
  * @param {object} o
  * @param {string} o.apiKey  - Anthropic API key.
  * @param {string} [o.model] - Model id (default claude-sonnet-4-6).
  * @param {string} [o.system]- System prompt.
  * @param {Array}  o.messages- [{role, content}] messages.
  * @param {number} [o.maxTokens]
+ * @param {number} [o.timeoutMs] - Per-request timeout (default 120s).
+ * @param {number} [o.retries]   - Retries on timeout / 429 / 5xx (default 2).
  * @returns {Promise<string>}
  */
-export async function complete({ apiKey, model = 'claude-sonnet-4-6', system, messages, maxTokens = 4096 }) {
+export async function complete({ apiKey, model = 'claude-sonnet-4-6', system, messages, maxTokens = 4096, timeoutMs = 120000, retries = 2 }) {
   if (!apiKey) throw new Error('Missing Anthropic API key (set ANTHROPIC_API_KEY).');
-  const res = await fetch(API_URL, {
-    method: 'POST',
-    headers: {
-      'content-type': 'application/json',
-      'x-api-key': apiKey,
-      'anthropic-version': '2023-06-01',
-    },
-    body: JSON.stringify({ model, max_tokens: maxTokens, ...(system ? { system } : {}), messages }),
-  });
-  if (!res.ok) {
-    const body = await res.text().catch(() => '');
-    throw new Error(`Anthropic API ${res.status}: ${body.slice(0, 500)}`);
+  let lastErr;
+  for (let attempt = 0; attempt <= retries; attempt++) {
+    const ctrl = new AbortController();
+    const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+    try {
+      const res = await fetch(API_URL, {
+        method: 'POST',
+        headers: {
+          'content-type': 'application/json',
+          'x-api-key': apiKey,
+          'anthropic-version': '2023-06-01',
+        },
+        body: JSON.stringify({ model, max_tokens: maxTokens, ...(system ? { system } : {}), messages }),
+        signal: ctrl.signal,
+      });
+      if (res.ok) {
+        const data = await res.json();
+        return (data.content || []).map((c) => c.text || '').join('').trim();
+      }
+      const body = await res.text().catch(() => '');
+      // Retry transient server / rate-limit errors; fail fast on 4xx (bad key/model).
+      if ((res.status === 429 || res.status >= 500) && attempt < retries) {
+        lastErr = new Error(`Anthropic API ${res.status}`);
+      } else {
+        throw new Error(`Anthropic API ${res.status}: ${body.slice(0, 500)}`);
+      }
+    } catch (e) {
+      if (e.name === 'AbortError') e = new Error(`Anthropic API request timed out after ${timeoutMs}ms`);
+      const retryable = /timed out/.test(e.message) || e.name === 'TypeError' || /Anthropic API (429|5\d\d)/.test(e.message);
+      if (!retryable || attempt >= retries) throw e;
+      lastErr = e;
+    } finally {
+      clearTimeout(timer);
+    }
+    await new Promise((r) => setTimeout(r, 1000 * 2 ** attempt)); // backoff: 1s, 2s, 4s
   }
-  const data = await res.json();
-  return (data.content || []).map((c) => c.text || '').join('').trim();
+  throw lastErr || new Error('Anthropic API request failed.');
 }
 
 /** Parse "name: value" YAML-ish frontmatter + body from a SKILL.md string. */
diff --git a/evals/run-evals.mjs b/evals/run-evals.mjs
index 9669fdd..9867a67 100644
--- a/evals/run-evals.mjs
+++ b/evals/run-evals.mjs
@@ -61,33 +61,53 @@ function parseScores(text) {
   return s;
 }
 
+// Run an async worker over `items` with at most `limit` in flight.
+async function pool(items, limit, worker) {
+  const out = [];
+  let i = 0;
+  await Promise.all(Array.from({ length: Math.min(limit, items.length) }, async () => {
+    while (i < items.length) {
+      const idx = i++;
+      out[idx] = await worker(items[idx]);
+    }
+  }));
+  return out;
+}
+
+async function scoreTask({ c, body, description, model }) {
+  try {
+    const output = await complete({ apiKey, model, system: runPrompt(body), messages: [{ role: 'user', content: c.input }], maxTokens: 3000 });
+    const judged = await complete({ apiKey, model: judge, messages: [{ role: 'user', content: judgePrompt(description, output) }], maxTokens: 200 });
+    const scores = parseScores(judged);
+    const overall = DIMENSIONS.reduce((a, d) => a + scores[d], 0) / DIMENSIONS.length;
+    process.stderr.write(`✓ ${c.skill} on ${model} — ${overall.toFixed(2)}/5\n`);
+    return { skill: c.skill, model, scores, overall: Math.round(overall * 100) / 100 };
+  } catch (e) {
+    process.stderr.write(`✗ ${c.skill} on ${model} — FAILED (${e.message})\n`);
+    return null;
+  }
+}
+
 async function main() {
   if (!apiKey) { console.error('Set ANTHROPIC_API_KEY to run evals.'); process.exit(1); }
+  const concurrency = parseInt(arg('concurrency', '4'), 10) || 4;
   const { cases } = JSON.parse(readFileSync(casesPath, 'utf8'));
-  const results = [];
 
+  // Build the full (case × model) task list.
+  const tasks = [];
   for (const c of cases) {
     const skillFile = join(root, 'skills', c.skill, 'SKILL.md');
     if (!existsSync(skillFile)) { console.error(`skip ${c.skill}: no SKILL.md`); continue; }
     const { meta, body } = parseSkill(readFileSync(skillFile, 'utf8'));
-    for (const model of models) {
-      process.stderr.write(`Running ${c.skill} on ${model}… `);
-      try {
-        const output = await complete({ apiKey, model, system: runPrompt(body), messages: [{ role: 'user', content: c.input }], maxTokens: 3000 });
-        const judged = await complete({ apiKey, model: judge, messages: [{ role: 'user', content: judgePrompt(meta.description || c.skill, output) }], maxTokens: 200 });
-        const scores = parseScores(judged);
-        const overall = DIMENSIONS.reduce((a, d) => a + scores[d], 0) / DIMENSIONS.length;
-        results.push({ skill: c.skill, model, scores, overall: Math.round(overall * 100) / 100 });
-        process.stderr.write(`${overall.toFixed(2)}/5\n`);
-      } catch (e) {
-        process.stderr.write(`FAILED (${e.message})\n`);
-      }
-    }
+    for (const model of models) tasks.push({ c, body, description: meta.description || c.skill, model });
   }
 
+  process.stderr.write(`Scoring ${tasks.length} runs (concurrency ${concurrency})…\n`);
+  const results = (await pool(tasks, concurrency, scoreTask)).filter(Boolean);
+
   const out = { generatedAt: new Date().toISOString(), judge, models, dimensions: DIMENSIONS, results };
   writeFileSync(outPath, JSON.stringify(out, null, 2));
-  console.log(`\nWrote ${outPath} — ${results.length} scored runs. Build the page: node scripts/build-leaderboard.mjs`);
+  console.log(`\nWrote ${outPath} — ${results.length}/${tasks.length} scored runs. Build the page: node scripts/build-leaderboard.mjs`);
 }
 
 main();