Scorecard Format
JSON schema and TypeScript Zod schema for scorecards
Summary
Machine-readable scorecard format (JSON + TypeScript Zod schema) capturing project metadata, dimension scores (0–3 + confidence level), evidence (file paths, grep, command output), findings (structured gaps + recommendations), overall rating (Human-only to Agent-first), and trend comparison to previous scorecard. Standard format enables tool consumption, historical tracking, and cross-project benchmarking. Includes full Zod schema for validation.
- Metadata: Project name, date, repository, audit scope
- Dimension scores: 0–3 per dimension, confidence (high/medium/low)
- Evidence: File paths, grep results, command output, glob findings
- Findings: Structured gaps with severity, file reference, fix steps
- Overall rating: 0–30 scale mapped to human-readable band
- Trend: Delta from previous scorecard (if re-audit)
Agentify emits structured scorecards as JSON. This page documents the schema, provides a TypeScript/Zod definition, and shows a worked example.
Schema Overview
A scorecard captures:
- Project metadata (name, date, repository)
- Dimension scores (0/1/2/3 and confidence)
- Evidence (file paths, grep results, command outputs)
- Findings (structured gaps and recommendations)
- Overall rating (human-only to agent-first)
- Trend (comparison to previous scorecard)
TypeScript Zod Schema
import { z } from 'zod';
const DimensionName = z.enum([
'api-surface',
'cli-design',
'mcp-server',
'discovery-aeo',
'authentication',
'error-handling',
'tool-design',
'context-files',
'multi-agent',
'testing-evaluation',
]);
const ConfidenceLevel = z.enum(['high', 'medium', 'low']);
const EvidenceItem = z.object({
type: z.enum(['file', 'grep', 'glob', 'command']),
description: z.string(),
location: z.string().describe('File path, grep pattern, glob pattern, or command'),
result: z.string().describe('Quoted output or finding'),
});
const Finding = z.object({
what: z.string().describe('Specific issue with file:line reference'),
why: z.string().describe('Why this matters for agent consumption'),
fix: z.string().describe('Concrete steps to resolve'),
dimension: DimensionName,
currentScore: z.number().int().min(0).max(3),
targetScore: z.number().int().min(0).max(3),
severity: z.enum(['critical', 'high', 'medium', 'low']),
});
const FindingCluster = z.object({
name: z.string().describe('Descriptive cluster name'),
rationale: z.string().describe('Why these findings belong together'),
findings: z.array(Finding),
suggestedApproach: z.string().describe('1-2 sentences on how to tackle'),
dependencies: z.array(z.string()).describe('Other cluster names to complete first'),
});
const Dimension = z.object({
name: DimensionName,
score: z.number().int().min(0).max(3).nullable().describe('null if N/A'),
confidence: ConfidenceLevel.nullable().describe('null if N/A'),
evidence: z.array(EvidenceItem),
findings: z.array(z.string()).describe('Finding IDs referencing FindingCluster.findings'),
notes: z.string().optional(),
});
const OverallRating = z.enum([
'human-only',
'agent-tolerant',
'agent-ready',
'agent-first',
]);
const Scorecard = z.object({
version: z.literal('1.0'),
project: z.object({
name: z.string(),
repository: z.string().url().optional(),
description: z.string().optional(),
}),
date: z.string().datetime().describe('ISO 8601, e.g., 2026-04-17T15:30:00Z'),
auditor: z.string().optional(),
dimensions: z.array(Dimension).length(10),
findingClusters: z.array(FindingCluster),
overall: z.object({
totalScore: z.number().int().min(0).max(30),
scaledScore: z.number().int().min(0).max(30).describe('Scaled if dimensions are N/A'),
rating: OverallRating,
percentile: z.number().min(0).max(100).optional(),
}),
trend: z.object({
previousDate: z.string().datetime().optional(),
previousScore: z.number().int().optional(),
delta: z.number().int().optional(),
status: z.enum(['improved', 'stable', 'regressed']).optional(),
}).optional(),
});
export type Scorecard = z.infer<typeof Scorecard>;Example Scorecard
{
"version": "1.0",
"project": {
"name": "acme-api",
"repository": "https://github.com/acme/api",
"description": "Customer-facing REST API for SaaS platform"
},
"date": "2026-04-17T15:30:00Z",
"auditor": "surface@1.2.0",
"dimensions": [
{
"name": "api-surface",
"score": 2,
"confidence": "high",
"evidence": [
{
"type": "file",
"description": "OpenAPI spec exists",
"location": "docs/api/openapi.yaml",
"result": "40 endpoints, operationId on 38 (95%), descriptions 30-50 words"
},
{
"type": "grep",
"description": "Agent-oriented descriptions",
"location": "grep -A 2 'description:' docs/api/openapi.yaml",
"result": "Sample: 'Search for users by email. Use when you need to find user records.' (agent context present)"
},
{
"type": "grep",
"description": "Arazzo workflows",
"location": "grep -r 'Arazzo\\|x-action' docs/",
"result": "0 matches. No workflow definitions."
}
],
"findings": ["api-missing-arazzo"],
"notes": "OpenAPI is well-documented with agent-oriented descriptions. Missing Arazzo workflows prevent multi-step operation definition."
},
{
"name": "cli-design",
"score": null,
"confidence": null,
"evidence": [],
"findings": [],
"notes": "N/A: Project is a web API with no CLI tool"
},
{
"name": "mcp-server",
"score": 0,
"confidence": "high",
"evidence": [
{
"type": "glob",
"description": "MCP server config",
"location": "glob('**/.mcp.json')",
"result": "0 files"
},
{
"type": "grep",
"description": "MCP SDK imports",
"location": "grep -r '@modelcontextprotocol/sdk' .",
"result": "0 matches"
}
],
"findings": ["mcp-not-implemented"],
"notes": "No MCP server. Project could expose tools to agents via MCP."
},
{
"name": "discovery-aeo",
"score": 1,
"confidence": "medium",
"evidence": [
{
"type": "file",
"description": "AGENTS.md",
"location": "AGENTS.md (repo root)",
"result": "45 lines, covers commands and conventions"
},
{
"type": "glob",
"description": "Discovery files",
"location": "glob('**/llms.txt')",
"result": "0 files"
},
{
"type": "grep",
"description": "JSON-LD markup",
"location": "grep -r 'application/ld+json' docs/",
"result": "0 matches"
}
],
"findings": ["discovery-llms-txt-missing", "discovery-json-ld-missing"],
"notes": "AGENTS.md present but minimal. No llms.txt or structured data for agent discovery."
},
{
"name": "authentication",
"score": 2,
"confidence": "high",
"evidence": [
{
"type": "file",
"description": "OAuth config",
"location": "src/auth/oauth.ts:10–45",
"result": "Client credentials grant implemented. Token expires in 1 hour."
},
{
"type": "grep",
"description": "JWT validation",
"location": "grep -A 5 'verify' src/auth/jwt.ts",
"result": "Validates signature, iss, aud, exp claims"
},
{
"type": "grep",
"description": "Token exchange (RFC 8693)",
"location": "grep -r 'token-exchange\\|RFC 8693' src/",
"result": "0 matches"
}
],
"findings": ["auth-missing-token-exchange"],
"notes": "OAuth 2.1 M2M with scoped tokens. Missing RFC 8693 Token Exchange for narrowly-scoped ephemeral tokens."
},
{
"name": "error-handling",
"score": 2,
"confidence": "high",
"evidence": [
{
"type": "file",
"description": "Error middleware",
"location": "src/middleware/errors.ts:50–80",
"result": "Returns RFC 7807 Problem Details: { type, title, status, detail, instance }"
},
{
"type": "grep",
"description": "is_retriable field",
"location": "grep -r 'is_retriable\\|isRetriable' src/",
"result": "4 matches in error handler (not complete across all errors)"
},
{
"type": "command",
"description": "Error response test",
"location": "curl -s http://localhost:3000/api/users/invalid -H 'Accept: application/json'",
"result": "{ type: 'https://api.example.com/errors/not-found', title: 'Not Found', status: 404, detail: 'User ID not valid', is_retriable: false }"
}
],
"findings": ["error-missing-suggestions", "error-incomplete-retriable"],
"notes": "RFC 7807 implemented. is_retriable present but not on all error types. Missing suggestions array and doc_uri."
},
{
"name": "tool-design",
"score": 1,
"confidence": "medium",
"evidence": [
{
"type": "glob",
"description": "Tool definitions",
"location": "glob('**/tools/**')",
"result": "tools/ directory found with 6 tool files"
},
{
"type": "file",
"description": "Tool descriptions",
"location": "tools/search-users.ts:1–20",
"result": "Description: 'Search for users' (3 words, no agent context)"
},
{
"type": "grep",
"description": "toModelOutput",
"location": "grep -r 'toModelOutput' tools/",
"result": "0 matches"
}
],
"findings": ["tools-weak-descriptions", "tools-no-token-optimization"],
"notes": "Basic tool schemas exist but descriptions lack 'use when' context and disambiguation. No token optimization."
},
{
"name": "context-files",
"score": 2,
"confidence": "high",
"evidence": [
{
"type": "file",
"description": "AGENTS.md quality",
"location": "AGENTS.md",
"result": "Commands section present with exact invocations. Permission boundaries defined (always/ask-first/never). 45 lines."
},
{
"type": "file",
"description": "CLAUDE.md presence",
"location": "CLAUDE.md",
"result": "Not found"
},
{
"type": "grep",
"description": "Cursor rules",
"location": "glob('**/.cursor/rules/**')",
"result": "0 files"
}
],
"findings": ["context-multi-tool-missing"],
"notes": "Good AGENTS.md. Missing Claude-specific and Cursor-specific overrides for multi-tool context."
},
{
"name": "multi-agent",
"score": null,
"confidence": null,
"evidence": [],
"findings": [],
"notes": "N/A: Project is a web API, not an agent system"
},
{
"name": "testing-evaluation",
"score": 1,
"confidence": "medium",
"evidence": [
{
"type": "glob",
"description": "Test files",
"location": "glob('**/*.test.ts')",
"result": "8 test files found"
},
{
"type": "grep",
"description": "MCP server tests",
"location": "grep -r 'InMemoryTransport' tests/",
"result": "0 matches (no MCP server to test)"
},
{
"type": "grep",
"description": "Error recovery tests",
"location": "grep -r 'isRetriable\\|retry' tests/",
"result": "2 matches (minimal coverage)"
},
{
"type": "command",
"description": "Test suite run",
"location": "npm test -- --coverage",
"result": "42 tests, 81% coverage. No error recovery or multi-step flow tests."
}
],
"findings": ["testing-error-recovery-missing", "testing-multi-step-missing"],
"notes": "Basic tests exist but no agent-specific evaluation. Missing error recovery and multi-step flow testing."
}
],
"findingClusters": [
{
"name": "Agent tool discoverability",
"rationale": "Agents struggle to find and understand tools because OpenAPI descriptions lack agent context and no MCP server exists.",
"findings": [
{
"what": "tools/search-users.ts:1–20: Tool descriptions are terse (3 words) with no 'use when' context",
"why": "Agents use tool descriptions as prompts. Terse descriptions lead to incorrect tool selection.",
"fix": "Rewrite descriptions to include: when to use, when NOT to use, required parameters, return type. Target 30-50 words.",
"dimension": "tool-design",
"currentScore": 1,
"targetScore": 2,
"severity": "high"
},
{
"what": "mcp-server: 0 files matching **/.mcp.json (no MCP server exists)",
"why": "Agents consume MCP servers to discover tools. Without MCP, agents must rely on API docs or CLI help.",
"fix": "Create .mcp.json and MCP server implementation. Expose 6 existing tools via MCP. Use InMemoryTransport for testing.",
"dimension": "mcp-server",
"currentScore": 0,
"targetScore": 2,
"severity": "critical"
},
{
"what": "llms.txt missing from web root",
"why": "Agent web crawlers look for llms.txt to understand project scope and API surface. Without it, discovery is harder.",
"fix": "Create llms.txt at project root with H2 sections for API, CLI, tools, and links to key docs.",
"dimension": "discovery-aeo",
"currentScore": 1,
"targetScore": 2,
"severity": "medium"
}
],
"suggestedApproach": "Start with tool descriptions (1 hour). Add MCP server (4 hours). Add llms.txt (30 min). This cluster improves tool discoverability score from 1→2 across three dimensions.",
"dependencies": []
},
{
"name": "Error recovery for agents",
"rationale": "Agents cannot recover from errors because error responses lack suggestions, complete is_retriable markers, and doc_uri links.",
"findings": [
{
"what": "src/middleware/errors.ts: is_retriable present on ~50% of error types, missing on validation errors",
"why": "Agents need to know which errors are retriable. Partial implementation leaves ambiguity.",
"fix": "Add is_retriable to all error handlers. Validation errors are not retriable. Rate limit 429 is retriable.",
"dimension": "error-handling",
"currentScore": 2,
"targetScore": 3,
"severity": "high"
},
{
"what": "Error responses lack suggestions array (e.g., 'retry after 5s', 'increase timeout')",
"why": "Agents use suggestions to decide next steps. Without them, agents may retry immediately or give up.",
"fix": "Add suggestions[] array to error schema. Examples: rate limit 429 → ['wait 60s', 'reduce request size'].",
"dimension": "error-handling",
"currentScore": 2,
"targetScore": 3,
"severity": "high"
},
{
"what": "tests/: No error recovery tests. grep 'retry' returns 2 matches (minimal).",
"why": "Without tests, error recovery logic regresses. Agents depend on consistent behavior.",
"fix": "Add tests for: rate limit → retry, validation error → no retry, timeout → retry. Cover multi-step recovery.",
"dimension": "testing-evaluation",
"currentScore": 1,
"targetScore": 2,
"severity": "medium"
}
],
"suggestedApproach": "Update error schema (1 hour). Add suggestions logic (1 hour). Write recovery tests (2 hours). Total: 4 hours.",
"dependencies": []
}
],
"overall": {
"totalScore": 11,
"scaledScore": 14,
"rating": "agent-tolerant",
"percentile": 35
},
"trend": {
"previousDate": "2026-03-15T10:00:00Z",
"previousScore": 8,
"delta": 3,
"status": "improved"
}
}Parsing the Scorecard in Code
import { Scorecard } from './scorecard-schema';
async function loadScorecard(filePath: string): Promise<Scorecard> {
const raw = await Deno.readTextFile(filePath);
const json = JSON.parse(raw);
return Scorecard.parse(json); // Zod validation
}
function displaySummary(scorecard: Scorecard): void {
const { overall, project } = scorecard;
console.log(`
Project: ${project.name}
Date: ${scorecard.date}
Rating: ${overall.rating} (${overall.scaledScore}/30)
Scores:
${scorecard.dimensions
.filter((d) => d.score !== null)
.map((d) => ` ${d.name}: ${d.score}/3 (${d.confidence})`)
.join('\n')}
`);
}
function findHighSeverityIssues(scorecard: Scorecard): Finding[] {
return scorecard.findingClusters
.flatMap((cluster) => cluster.findings)
.filter((f) => f.severity === 'critical' || f.severity === 'high')
.sort((a, b) => {
const severity = { critical: 3, high: 2, medium: 1, low: 0 };
return severity[b.severity] - severity[a.severity];
});
}Storage & Versioning
Scorecards are stored in the audited project:
docs/surface/scorecard-[YYYY-MM-DD].json # Versioned snapshots
docs/surface/scorecard-latest.json # Current snapshot (symlink or copy)
docs/surface/history.md # Human-readable changelogStore version history in git so you can track trends over time:
git log docs/surface/scorecard-*.json