Interface: IEvaluator
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:268
Interface for the agent evaluator.
Example
const evaluator = new Evaluator();
// Create test suite
const testCases: EvalTestCase[] = [
{
id: 'greet-1',
name: 'Basic greeting',
input: 'Hello!',
expectedOutput: 'Hello! How can I help you today?',
criteria: [
{ name: 'relevance', description: 'Is greeting appropriate', weight: 0.5, scorer: 'llm_judge' },
{ name: 'politeness', description: 'Is response polite', weight: 0.5, scorer: 'contains' },
],
},
];
// Run evaluation
const run = await evaluator.runEvaluation('greeting-test', testCases, agentFn);
console.log(`Pass rate: ${run.aggregateMetrics.passRate * 100}%`);
Methods
compareRuns()
compareRuns(
runId1,runId2):Promise<EvalComparison>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:339
Compares two evaluation runs.
Parameters
runId1
string
First run ID
runId2
string
Second run ID
Returns
Promise<EvalComparison>
Comparison results
evaluateTestCase()
evaluateTestCase(
testCase,actualOutput,config?):Promise<EvalTestResult>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:291
Evaluates a single test case.
Parameters
testCase
The test case
actualOutput
string
The agent's actual output
config?
Evaluation configuration
Returns
Promise<EvalTestResult>
Test result
generateReport()
generateReport(
runId,format):Promise<string>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:347
Generates a report for a run.
Parameters
runId
string
Run ID
format
Report format
"json" | "markdown" | "html"
Returns
Promise<string>
Report content
getRun()
getRun(
runId):Promise<EvalRun|undefined>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:324
Gets an evaluation run by ID.
Parameters
runId
string
Run ID
Returns
Promise<EvalRun | undefined>
The evaluation run or undefined
listRuns()
listRuns(
limit?):Promise<EvalRun[]>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:331
Lists recent evaluation runs.
Parameters
limit?
number
Maximum runs to return
Returns
Promise<EvalRun[]>
Array of runs
registerScorer()
registerScorer(
name,fn):void
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:317
Registers a custom scorer.
Parameters
name
string
Scorer name
fn
Scoring function
Returns
void
runEvaluation()
runEvaluation(
name,testCases,agentFn,config?):Promise<EvalRun>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:277
Runs an evaluation suite against an agent.
Parameters
name
string
Name for this evaluation run
testCases
Test cases to evaluate
agentFn
(input, context?) => Promise<string>
Function that takes input and returns agent output
config?
Evaluation configuration
Returns
Promise<EvalRun>
The completed evaluation run
score()
score(
scorer,actual,expected?,references?):Promise<number>
Defined in: packages/agentos/src/core/evaluation/IEvaluator.ts:305
Scores output using a specific scorer.
Parameters
scorer
string
Scorer name
actual
string
Actual output
expected?
string
Expected output
references?
string[]
Reference outputs
Returns
Promise<number>
Score (0-1)