my-evals.js
Copy
Ask AI
import { createApp } from 'claudeye';
const app = createApp();
// ── Evals ────────────────────────────────────────────────────────
app.eval('under-50-turns', ({ stats }) => ({
pass: stats.turnCount <= 50,
score: Math.max(0, 1 - stats.turnCount / 100),
message: `${stats.turnCount} turn(s)`,
}));
app.eval('has-completion', ({ entries }) => {
const last = [...entries].reverse().find(e => e.type === 'assistant');
const hasText = last?.message?.content?.some?.(b => b.type === 'text');
return {
pass: !!hasText,
score: hasText ? 1.0 : 0,
message: hasText ? 'Ended with text' : 'No final text response',
};
});
// ── Enrichments ──────────────────────────────────────────────────
app.enrich('overview', ({ stats }) => ({
'Turns': stats.turnCount,
'Tool Calls': stats.toolCallCount,
'Models': stats.models.join(', ') || 'none',
}));
// ── Alerts ───────────────────────────────────────────────────────
// Log a summary line for every session
app.alert('log-results', ({ projectName, sessionId, evalSummary, enrichSummary }) => {
const evals = evalSummary
? `${evalSummary.passCount} pass, ${evalSummary.failCount} fail, ${evalSummary.errorCount} error`
: 'no evals';
const enrichments = enrichSummary
? `${enrichSummary.results.length} enrichments`
: 'no enrichments';
console.log(`[ALERT] ${projectName}/${sessionId}: ${evals} | ${enrichments}`);
});
// Warn when evals fail
app.alert('warn-on-failure', ({ projectName, sessionId, evalSummary }) => {
if (evalSummary && evalSummary.failCount > 0) {
const failedNames = evalSummary.results
.filter(r => !r.error && !r.skipped && !r.pass)
.map(r => r.name);
console.warn(`[FAILURE] ${projectName}/${sessionId}: ${failedNames.join(', ')} failed`);
}
});
// Slack webhook on failure (replace URL to enable)
// app.alert('slack-on-failure', async ({ projectName, sessionId, evalSummary }) => {
// if (evalSummary && evalSummary.failCount > 0) {
// await fetch('https://hooks.slack.com/services/YOUR/WEBHOOK/URL', {
// method: 'POST',
// headers: { 'Content-Type': 'application/json' },
// body: JSON.stringify({
// text: `${evalSummary.failCount} evals failed for ${projectName}/${sessionId}`,
// }),
// });
// }
// });
app.listen();
Copy
Ask AI
# Alerts fire during background processing
claudeye --evals ./my-evals.js --queue-interval 30

