Interface EvalTestCase

A test case for evaluation.

interface EvalTestCase {
    id: string;
    name: string;
    category?: string;
    input: string;
    expectedOutput?: string;
    referenceOutputs?: string[];
    context?: string;
    expectedToolCalls?: {
        toolName: string;
        args?: Record<string, unknown>;
    }[];
    criteria?: EvalCriteria[];
    metadata?: Record<string, unknown>;
}

Properties

id: string

Unique test case ID

name: string

Test case name

category?: string

Category or tag

input: string

Input to the agent

expectedOutput?: string

Expected output (for comparison)

referenceOutputs?: string[]

Reference outputs for similarity comparison

context?: string

Context or system prompt

expectedToolCalls?: {
    toolName: string;
    args?: Record<string, unknown>;
}[]

Expected tool calls

Type declaration

  • toolName: string
  • Optional args?: Record<string, unknown>
criteria?: EvalCriteria[]

Evaluation criteria

metadata?: Record<string, unknown>

Metadata