Merge pull request #59 from mohitagw15856/eval-results

chore(evals): refresh leaderboard results
This commit is contained in:
mohitagw15856
2026-06-18 21:14:03 +01:00
committed by GitHub
+46 -46
View File
@@ -1,5 +1,5 @@
{ {
"generatedAt": "2026-06-18T12:40:14.995Z", "generatedAt": "2026-06-18T20:13:31.726Z",
"judge": "claude-opus-4-8", "judge": "claude-opus-4-8",
"models": [ "models": [
"claude-sonnet-4-6", "claude-sonnet-4-6",
@@ -41,20 +41,20 @@
"structure": 5, "structure": 5,
"completeness": 5, "completeness": 5,
"usefulness": 5, "usefulness": 5,
"grounding": 4 "grounding": 5
}, },
"overall": 4.75 "overall": 5
}, },
{ {
"skill": "prd-template", "skill": "prd-template",
"model": "claude-haiku-4-5-20251001", "model": "claude-haiku-4-5-20251001",
"scores": { "scores": {
"structure": 5, "structure": 5,
"completeness": 4, "completeness": 5,
"usefulness": 5, "usefulness": 5,
"grounding": 3 "grounding": 4
}, },
"overall": 4.25 "overall": 4.75
}, },
{ {
"skill": "cs-health-scorecard", "skill": "cs-health-scorecard",
@@ -63,9 +63,9 @@
"structure": 5, "structure": 5,
"completeness": 5, "completeness": 5,
"usefulness": 5, "usefulness": 5,
"grounding": 5 "grounding": 4
}, },
"overall": 5 "overall": 4.75
}, },
{ {
"skill": "cs-health-scorecard", "skill": "cs-health-scorecard",
@@ -84,44 +84,44 @@
"scores": { "scores": {
"structure": 5, "structure": 5,
"completeness": 5, "completeness": 5,
"usefulness": 5, "usefulness": 4,
"grounding": 5
},
"overall": 5
},
{
"skill": "executive-summary",
"model": "claude-haiku-4-5-20251001",
"scores": {
"structure": 5,
"completeness": 5,
"usefulness": 5,
"grounding": 4
},
"overall": 4.75
},
{
"skill": "competitive-analysis",
"model": "claude-sonnet-4-6",
"scores": {
"structure": 5,
"completeness": 4,
"usefulness": 5,
"grounding": 5
},
"overall": 4.75
},
{
"skill": "competitive-analysis",
"model": "claude-haiku-4-5-20251001",
"scores": {
"structure": 5,
"completeness": 4,
"usefulness": 5,
"grounding": 4 "grounding": 4
}, },
"overall": 4.5 "overall": 4.5
}, },
{
"skill": "executive-summary",
"model": "claude-haiku-4-5-20251001",
"scores": {
"structure": 5,
"completeness": 4,
"usefulness": 4,
"grounding": 4
},
"overall": 4.25
},
{
"skill": "competitive-analysis",
"model": "claude-sonnet-4-6",
"scores": {
"structure": 5,
"completeness": 4,
"usefulness": 5,
"grounding": 5
},
"overall": 4.75
},
{
"skill": "competitive-analysis",
"model": "claude-haiku-4-5-20251001",
"scores": {
"structure": 5,
"completeness": 5,
"usefulness": 5,
"grounding": 4
},
"overall": 4.75
},
{ {
"skill": "sprint-planning", "skill": "sprint-planning",
"model": "claude-sonnet-4-6", "model": "claude-sonnet-4-6",
@@ -138,11 +138,11 @@
"model": "claude-haiku-4-5-20251001", "model": "claude-haiku-4-5-20251001",
"scores": { "scores": {
"structure": 5, "structure": 5,
"completeness": 4, "completeness": 5,
"usefulness": 4, "usefulness": 5,
"grounding": 3 "grounding": 4
}, },
"overall": 4 "overall": 4.75
} }
] ]
} }