Skip to main content
Two named views - performance metrics and quality checks - each with focused filter tiles and an evals setup.
my-evals.js
import { createApp } from 'claudeye';

const app = createApp();

app.condition(({ entries }) => entries.length > 0);

// ── Performance view ─────────────────────────────────────────────
app.dashboard.view('performance', { label: 'Performance Metrics' })
  .filter({ preBuilt: ['lastModified'] })
  .filter('turn-count', ({ stats }) => stats.turnCount, { label: 'Turn Count' })
  .filter('tool-calls', ({ stats }) => stats.toolCallCount, { label: 'Tool Calls' })
  .filter('avg-tools-per-turn',
    ({ stats }) => stats.turnCount > 0
      ? Math.round(stats.toolCallCount / stats.turnCount * 10) / 10
      : 0,
    {
      label: 'Avg Tools/Turn',
      condition: ({ stats }) => stats.toolCallCount > 0,
    }
  );

// ── Quality view ─────────────────────────────────────────────────
app.dashboard.view('quality', { label: 'Quality Checks' })
  .filter('has-errors', ({ entries }) =>
    entries.some(e =>
      e.type === 'assistant' &&
      Array.isArray(e.message?.content) &&
      e.message.content.some(b => b.type === 'tool_use' && b.is_error)
    ),
    { label: 'Has Errors' }
  )
  .filter('primary-model', ({ stats }) => stats.models[0] || 'unknown',
    { label: 'Primary Model' }
  )
  .filter('uses-subagents', ({ stats }) => stats.subagentCount > 0,
    { label: 'Uses Subagents' }
  );

// ── Evals ────────────────────────────────────────────────────────
app.eval('under-50-turns', ({ stats }) => ({
  pass: stats.turnCount <= 50,
  score: Math.max(0, 1 - stats.turnCount / 100),
  message: `${stats.turnCount} turn(s)`,
}));

app.eval('has-completion', ({ entries }) => {
  const last = [...entries].reverse().find(e => e.type === 'assistant');
  const hasText = last?.message?.content?.some?.(b => b.type === 'text');
  return {
    pass: !!hasText,
    score: hasText ? 1.0 : 0,
    message: hasText ? 'Ended with text' : 'No final text response',
  };
});

// ── Enrichments ──────────────────────────────────────────────────
app.enrich('session-overview', ({ stats }) => ({
  'Turns': stats.turnCount,
  'Tool Calls': stats.toolCallCount,
  'Subagents': stats.subagentCount,
  'Duration': stats.duration,
  'Models': stats.models.join(', ') || 'none',
}));

app.listen();
claudeye --evals ./my-evals.js