From b7aa4aa2d9195aec49f238717db02ac88d404815 Mon Sep 17 00:00:00 2001 From: mohitagw15856 <119053560+mohitagw15856@users.noreply.github.com> Date: Thu, 18 Jun 2026 20:13:31 +0000 Subject: [PATCH] chore(evals): refresh leaderboard results --- evals/results.json | 92 +++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/evals/results.json b/evals/results.json index 93bd1e9..b40de59 100644 --- a/evals/results.json +++ b/evals/results.json @@ -1,5 +1,5 @@ { - "generatedAt": "2026-06-18T12:40:14.995Z", + "generatedAt": "2026-06-18T20:13:31.726Z", "judge": "claude-opus-4-8", "models": [ "claude-sonnet-4-6", @@ -41,20 +41,20 @@ "structure": 5, "completeness": 5, "usefulness": 5, - "grounding": 4 + "grounding": 5 }, - "overall": 4.75 + "overall": 5 }, { "skill": "prd-template", "model": "claude-haiku-4-5-20251001", "scores": { "structure": 5, - "completeness": 4, + "completeness": 5, "usefulness": 5, - "grounding": 3 + "grounding": 4 }, - "overall": 4.25 + "overall": 4.75 }, { "skill": "cs-health-scorecard", @@ -63,9 +63,9 @@ "structure": 5, "completeness": 5, "usefulness": 5, - "grounding": 5 + "grounding": 4 }, - "overall": 5 + "overall": 4.75 }, { "skill": "cs-health-scorecard", @@ -84,44 +84,44 @@ "scores": { "structure": 5, "completeness": 5, - "usefulness": 5, - "grounding": 5 - }, - "overall": 5 - }, - { - "skill": "executive-summary", - "model": "claude-haiku-4-5-20251001", - "scores": { - "structure": 5, - "completeness": 5, - "usefulness": 5, - "grounding": 4 - }, - "overall": 4.75 - }, - { - "skill": "competitive-analysis", - "model": "claude-sonnet-4-6", - "scores": { - "structure": 5, - "completeness": 4, - "usefulness": 5, - "grounding": 5 - }, - "overall": 4.75 - }, - { - "skill": "competitive-analysis", - "model": "claude-haiku-4-5-20251001", - "scores": { - "structure": 5, - "completeness": 4, - "usefulness": 5, + "usefulness": 4, "grounding": 4 }, "overall": 4.5 }, + { + "skill": "executive-summary", + "model": "claude-haiku-4-5-20251001", + "scores": { + "structure": 5, + "completeness": 4, + "usefulness": 4, + "grounding": 4 + }, + "overall": 4.25 + }, + { + "skill": "competitive-analysis", + "model": "claude-sonnet-4-6", + "scores": { + "structure": 5, + "completeness": 4, + "usefulness": 5, + "grounding": 5 + }, + "overall": 4.75 + }, + { + "skill": "competitive-analysis", + "model": "claude-haiku-4-5-20251001", + "scores": { + "structure": 5, + "completeness": 5, + "usefulness": 5, + "grounding": 4 + }, + "overall": 4.75 + }, { "skill": "sprint-planning", "model": "claude-sonnet-4-6", @@ -138,11 +138,11 @@ "model": "claude-haiku-4-5-20251001", "scores": { "structure": 5, - "completeness": 4, - "usefulness": 4, - "grounding": 3 + "completeness": 5, + "usefulness": 5, + "grounding": 4 }, - "overall": 4 + "overall": 4.75 } ] } \ No newline at end of file