# Evaluation

Evaluation and evaluator endpoints.

## List evaluators

> List evaluators for the authenticated project, optionally filtered by container.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluatorListResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","description":"Object type identifier.","default":"list"},"data":{"items":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}]},"type":"array","title":"Data","description":"Returned items."},"total":{"type":"integer","minimum":0,"title":"Total","description":"Total number of items available for this resource."}},"additionalProperties":false,"type":"object","required":["data","total"],"title":"EvaluatorListResponse"},"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators":{"get":{"tags":["Evaluation"],"summary":"List evaluators","description":"List evaluators for the authenticated project, optionally filtered by container.","operationId":"evaluators_list","parameters":[{"name":"container","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":100,"minimum":1,"default":20,"title":"Limit"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluatorListResponse"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Create an evaluator

> Create a new evaluator scoped to a project or container.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluatorReq":{"properties":{"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label."},"type":{"type":"string","enum":["judge","code"],"title":"Type","description":"Evaluator type."},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model","description":"Judge model slug."},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Judge prompt."},"source":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Source","description":"Code evaluator source."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional requirements list."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode.","default":"pointwise"},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for judge evaluators (e.g. response_format, temperature)."}},"additionalProperties":false,"type":"object","required":["type"],"title":"EvaluatorReq"},"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators":{"post":{"tags":["Evaluation"],"summary":"Create an evaluator","description":"Create a new evaluator scoped to a project or container.","operationId":"evaluators_create","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluatorReq"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}],"title":"Response Evaluators Create"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Get an evaluator

> Fetch a single evaluator by id or name within the authenticated project.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators/{evaluator}":{"get":{"tags":["Evaluation"],"summary":"Get an evaluator","description":"Fetch a single evaluator by id or name within the authenticated project.","operationId":"evaluators_retrieve","parameters":[{"name":"evaluator","in":"path","required":true,"schema":{"type":"string","title":"Evaluator"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}],"title":"Response Evaluators Retrieve"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Update an evaluator

> Update an existing evaluator by id or name.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"UpdateEvaluatorReq":{"properties":{"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label."},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model","description":"Judge model slug."},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Judge prompt."},"source":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Source","description":"Code evaluator source."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional requirements list."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"title":"Mode","description":"Evaluation mode.","type":"string","enum":["pointwise","reference","pairwise"],"nullable":true},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for judge evaluators (e.g. response_format, temperature)."}},"additionalProperties":false,"type":"object","title":"UpdateEvaluatorReq","description":"Patchable fields for an evaluator."},"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators/{evaluator}":{"patch":{"tags":["Evaluation"],"summary":"Update an evaluator","description":"Update an existing evaluator by id or name.","operationId":"evaluators_update","parameters":[{"name":"evaluator","in":"path","required":true,"schema":{"type":"string","title":"Evaluator"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateEvaluatorReq"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}],"title":"Response Evaluators Update"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Delete an evaluator

> Delete an evaluator by id.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"DeleteEvaluatorRes":{"properties":{"object":{"type":"string","const":"evaluator","title":"Object","description":"Object type.","default":"evaluator"},"id":{"type":"string","title":"Id","description":"Evaluator id."},"deleted":{"type":"boolean","const":true,"title":"Deleted","description":"Deletion status.","default":true}},"additionalProperties":false,"type":"object","required":["id"],"title":"DeleteEvaluatorRes"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators/{evaluator_id}":{"delete":{"tags":["Evaluation"],"summary":"Delete an evaluator","description":"Delete an evaluator by id.","operationId":"evaluators_delete","parameters":[{"name":"evaluator_id","in":"path","required":true,"schema":{"type":"string","title":"Evaluator Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteEvaluatorRes"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## List evaluation runs

> List evaluation runs for the authenticated project. Optionally filter by container and status.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRunListResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","description":"Object type identifier.","default":"list"},"data":{"items":{"$ref":"#/components/schemas/EvaluationRun"},"type":"array","title":"Data","description":"Returned items."},"total":{"type":"integer","minimum":0,"title":"Total","description":"Total number of items available for this resource."}},"additionalProperties":false,"type":"object","required":["data","total"],"title":"EvaluationRunListResponse"},"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs":{"get":{"tags":["Evaluation"],"summary":"List evaluation runs","description":"List evaluation runs for the authenticated project. Optionally filter by container and status.","operationId":"evaluation_runs_list","parameters":[{"name":"container","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Container ID or label to filter by.","title":"Container"},"description":"Container ID or label to filter by."},{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Filter by run status (e.g. 'running', 'completed', 'error').","title":"Status"},"description":"Filter by run status (e.g. 'running', 'completed', 'error')."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":100,"minimum":1,"default":20,"title":"Limit"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRunListResponse"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Create an evaluation run

> Launch an evaluation run. Validates access to the specified container, evaluators, data sources, and models, then dispatches the run through the backend gateway interface.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRunReq":{"properties":{"container":{"type":"string","title":"Container","description":"Container id or label."},"evaluators":{"items":{"type":"string"},"type":"array","minItems":1,"title":"Evaluators","description":"Evaluator ids or labels."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Sample-side data source. Omit to default to the container's task logs. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with the specified models."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Ground-truth-side data source. Omit to default to the container's task logs. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with a model."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Baseline-side data source for pairwise evaluation. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with a model."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"environment":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Environment","description":"Execution environment name (maps to Modal app suffix).","default":"main"}},"additionalProperties":false,"type":"object","required":["container","evaluators"],"title":"EvaluationRunReq","description":"Request body for creating an evaluation run.\n\nEach side of the evaluation (``sample`` and ``ground_truth``) is described\nby a single data-source object whose ``type`` discriminator determines how\ndata is obtained:\n\n- ``\"dataset\"``   — pull from a dataset.\n- ``\"container\"`` — pull from the container's task logs.\n- ``\"generate\"``  — generate completions using one or more models.\n\nBoth fields are optional, but **at least one must be provided**.  When a\nside is omitted it defaults to the top-level container's task logs.  At\nleast one resolved side must not be ``type='generate'`` so there is seed\ninput to evaluate against."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs":{"post":{"tags":["Evaluation"],"summary":"Create an evaluation run","description":"Launch an evaluation run. Validates access to the specified container, evaluators, data sources, and models, then dispatches the run through the backend gateway interface.","operationId":"evaluation_runs_create","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRunReq"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRun"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Get an evaluation run

> Retrieve a single evaluation run by ID within the authenticated project.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs/{run_id}":{"get":{"tags":["Evaluation"],"summary":"Get an evaluation run","description":"Retrieve a single evaluation run by ID within the authenticated project.","operationId":"evaluation_runs_retrieve","parameters":[{"name":"run_id","in":"path","required":true,"schema":{"type":"string","title":"Run Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRun"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.maniac.ai/api-reference/evaluation.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
