# Evaluation

Evaluation and evaluator endpoints.

## List evaluators

> List evaluators for the authenticated project, optionally filtered by container.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluatorListResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","description":"Object type identifier.","default":"list"},"data":{"items":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}]},"type":"array","title":"Data","description":"Returned items."},"total":{"type":"integer","minimum":0,"title":"Total","description":"Total number of items available for this resource."}},"additionalProperties":false,"type":"object","required":["data","total"],"title":"EvaluatorListResponse"},"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators":{"get":{"tags":["Evaluation"],"summary":"List evaluators","description":"List evaluators for the authenticated project, optionally filtered by container.","operationId":"evaluators_list","parameters":[{"name":"container","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":100,"minimum":1,"default":20,"title":"Limit"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluatorListResponse"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Create an evaluator

> Create a new evaluator scoped to a project or container.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluatorReq":{"properties":{"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label."},"type":{"type":"string","enum":["judge","code"],"title":"Type","description":"Evaluator type."},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model","description":"Judge model slug."},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Judge prompt."},"source":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Source","description":"Code evaluator source."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional requirements list."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode.","default":"pointwise"},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for judge evaluators (e.g. response_format, temperature)."}},"additionalProperties":false,"type":"object","required":["type"],"title":"EvaluatorReq"},"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators":{"post":{"tags":["Evaluation"],"summary":"Create an evaluator","description":"Create a new evaluator scoped to a project or container.","operationId":"evaluators_create","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluatorReq"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}],"title":"Response Evaluators Create"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Get an evaluator

> Fetch a single evaluator by id or name within the authenticated project.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators/{evaluator}":{"get":{"tags":["Evaluation"],"summary":"Get an evaluator","description":"Fetch a single evaluator by id or name within the authenticated project.","operationId":"evaluators_retrieve","parameters":[{"name":"evaluator","in":"path","required":true,"schema":{"type":"string","title":"Evaluator"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}],"title":"Response Evaluators Retrieve"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Update an evaluator

> Update an existing evaluator by id or name.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"UpdateEvaluatorReq":{"properties":{"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label."},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model","description":"Judge model slug."},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Judge prompt."},"source":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Source","description":"Code evaluator source."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional requirements list."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"title":"Mode","description":"Evaluation mode.","type":"string","enum":["pointwise","reference","pairwise"],"nullable":true},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for judge evaluators (e.g. response_format, temperature)."}},"additionalProperties":false,"type":"object","title":"UpdateEvaluatorReq","description":"Patchable fields for an evaluator."},"JudgeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.judge","title":"Object","description":"Object type.","default":"evaluator.judge"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"model":{"type":"string","title":"Model","description":"Judge model slug."},"prompt":{"type":"string","title":"Prompt","description":"Judge prompt."},"inference_parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Inference Parameters","description":"Optional inference parameters for the judge model (e.g. response_format, temperature)."}},"type":"object","required":["project_id","id","name","created_at","model","prompt"],"title":"JudgeEvaluator"},"CodeEvaluator":{"properties":{"object":{"type":"string","const":"evaluator.code","title":"Object","description":"Object type.","default":"evaluator.code"},"project_id":{"type":"string","title":"Project Id","description":"Project id."},"container_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container Id","description":"Container id."},"id":{"type":"string","title":"Id","description":"Evaluator id."},"name":{"type":"string","title":"Name","description":"Evaluator name."},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Evaluator description."},"created_at":{"type":"string","title":"Created At","description":"Creation timestamp."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"pass_threshold":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Pass Threshold","description":"Optional pass threshold."},"mode":{"type":"string","enum":["pointwise","reference","pairwise"],"title":"Mode","description":"Evaluation mode: score a candidate directly, against a reference, or against a baseline.","default":"pointwise"},"api":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api","description":"API type."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container label (if container-specific)."},"source":{"type":"string","title":"Source","description":"Source code to evaluate."},"requirements":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Requirements","description":"Optional list of package requirements."}},"type":"object","required":["project_id","id","name","created_at","source"],"title":"CodeEvaluator"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators/{evaluator}":{"patch":{"tags":["Evaluation"],"summary":"Update an evaluator","description":"Update an existing evaluator by id or name.","operationId":"evaluators_update","parameters":[{"name":"evaluator","in":"path","required":true,"schema":{"type":"string","title":"Evaluator"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateEvaluatorReq"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/JudgeEvaluator"},{"$ref":"#/components/schemas/CodeEvaluator"}],"title":"Response Evaluators Update"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Delete an evaluator

> Delete an evaluator by id.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"DeleteEvaluatorRes":{"properties":{"object":{"type":"string","const":"evaluator","title":"Object","description":"Object type.","default":"evaluator"},"id":{"type":"string","title":"Id","description":"Evaluator id."},"deleted":{"type":"boolean","const":true,"title":"Deleted","description":"Deletion status.","default":true}},"additionalProperties":false,"type":"object","required":["id"],"title":"DeleteEvaluatorRes"},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluators/{evaluator_id}":{"delete":{"tags":["Evaluation"],"summary":"Delete an evaluator","description":"Delete an evaluator by id.","operationId":"evaluators_delete","parameters":[{"name":"evaluator_id","in":"path","required":true,"schema":{"type":"string","title":"Evaluator Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteEvaluatorRes"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## List evaluation runs

> List evaluation runs for the authenticated project. Optionally filter by container and status.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRunListResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","description":"Object type identifier.","default":"list"},"data":{"items":{"$ref":"#/components/schemas/EvaluationRun"},"type":"array","title":"Data","description":"Returned items."},"total":{"type":"integer","minimum":0,"title":"Total","description":"Total number of items available for this resource."}},"additionalProperties":false,"type":"object","required":["data","total"],"title":"EvaluationRunListResponse"},"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs":{"get":{"tags":["Evaluation"],"summary":"List evaluation runs","description":"List evaluation runs for the authenticated project. Optionally filter by container and status.","operationId":"evaluation_runs_list","parameters":[{"name":"container","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Container ID or label to filter by.","title":"Container"},"description":"Container ID or label to filter by."},{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Filter by run status (e.g. 'running', 'completed', 'error').","title":"Status"},"description":"Filter by run status (e.g. 'running', 'completed', 'error')."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":100,"minimum":1,"default":20,"title":"Limit"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRunListResponse"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Create an evaluation run

> Launch an evaluation run. Validates access to the specified container, evaluators, data sources, and models, then dispatches the run through the backend gateway interface.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRunReq":{"properties":{"container":{"type":"string","title":"Container","description":"Container id or label."},"evaluators":{"items":{"type":"string"},"type":"array","minItems":1,"title":"Evaluators","description":"Evaluator ids or labels."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Sample-side data source. Omit to default to the container's task logs. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with the specified models."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Ground-truth-side data source. Omit to default to the container's task logs. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with a model."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Baseline-side data source for pairwise evaluation. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with a model."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"environment":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Environment","description":"Execution environment name (maps to Modal app suffix).","default":"main"}},"additionalProperties":false,"type":"object","required":["container","evaluators"],"title":"EvaluationRunReq","description":"Request body for creating an evaluation run.\n\nEach side of the evaluation (``sample`` and ``ground_truth``) is described\nby a single data-source object whose ``type`` discriminator determines how\ndata is obtained:\n\n- ``\"dataset\"``   — pull from a dataset.\n- ``\"container\"`` — pull from the container's task logs.\n- ``\"generate\"``  — generate completions using one or more models.\n\nBoth fields are optional, but **at least one must be provided**.  When a\nside is omitted it defaults to the top-level container's task logs.  At\nleast one resolved side must not be ``type='generate'`` so there is seed\ninput to evaluate against."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs":{"post":{"tags":["Evaluation"],"summary":"Create an evaluation run","description":"Launch an evaluation run. Validates access to the specified container, evaluators, data sources, and models, then dispatches the run through the backend gateway interface.","operationId":"evaluation_runs_create","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRunReq"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRun"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Get an evaluation run

> Retrieve a single evaluation run by ID within the authenticated project.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs/{run_id}":{"get":{"tags":["Evaluation"],"summary":"Get an evaluation run","description":"Retrieve a single evaluation run by ID within the authenticated project.","operationId":"evaluation_runs_retrieve","parameters":[{"name":"run_id","in":"path","required":true,"schema":{"type":"string","title":"Run Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRun"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```
