> For the complete documentation index, see [llms.txt](https://docs.maniac.ai/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://docs.maniac.ai/api-reference/rest-api.md).

# REST API

## List evaluation runs

> List evaluation runs for the authenticated project. Optionally filter by container and status.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRunListResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","description":"Object type identifier.","default":"list"},"data":{"items":{"$ref":"#/components/schemas/EvaluationRun"},"type":"array","title":"Data","description":"Returned items."},"total":{"type":"integer","minimum":0,"title":"Total","description":"Total number of items available for this resource."}},"additionalProperties":false,"type":"object","required":["data","total"],"title":"EvaluationRunListResponse"},"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs":{"get":{"tags":["Evaluation"],"summary":"List evaluation runs","description":"List evaluation runs for the authenticated project. Optionally filter by container and status.","operationId":"evaluation_runs_list","parameters":[{"name":"container","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Container ID or label to filter by.","title":"Container"},"description":"Container ID or label to filter by."},{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Filter by run status (e.g. 'running', 'completed', 'error').","title":"Status"},"description":"Filter by run status (e.g. 'running', 'completed', 'error')."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":100,"minimum":1,"default":20,"title":"Limit"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRunListResponse"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Create an evaluation run

> Launch an evaluation run. Validates access to the specified container, evaluators, data sources, and models, then dispatches the run through the backend gateway interface.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRunReq":{"properties":{"container":{"type":"string","title":"Container","description":"Container id or label."},"evaluators":{"items":{"type":"string"},"type":"array","minItems":1,"title":"Evaluators","description":"Evaluator ids or labels."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Sample-side data source. Omit to default to the container's task logs. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with the specified models."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Ground-truth-side data source. Omit to default to the container's task logs. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with a model."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Baseline-side data source for pairwise evaluation. Use type='dataset' to pull from a dataset, type='container' to pull from task logs, or type='generate' to generate completions with a model."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."},"environment":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Environment","description":"Execution environment name (maps to Modal app suffix).","default":"main"}},"additionalProperties":false,"type":"object","required":["container","evaluators"],"title":"EvaluationRunReq","description":"Request body for creating an evaluation run.\n\nEach side of the evaluation (``sample`` and ``ground_truth``) is described\nby a single data-source object whose ``type`` discriminator determines how\ndata is obtained:\n\n- ``\"dataset\"``   — pull from a dataset.\n- ``\"container\"`` — pull from the container's task logs.\n- ``\"generate\"``  — generate completions using one or more models.\n\nBoth fields are optional, but **at least one must be provided**.  When a\nside is omitted it defaults to the top-level container's task logs.  At\nleast one resolved side must not be ``type='generate'`` so there is seed\ninput to evaluate against."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs":{"post":{"tags":["Evaluation"],"summary":"Create an evaluation run","description":"Launch an evaluation run. Validates access to the specified container, evaluators, data sources, and models, then dispatches the run through the backend gateway interface.","operationId":"evaluation_runs_create","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRunReq"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRun"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Get an evaluation run

> Retrieve a single evaluation run by ID within the authenticated project.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Evaluation","description":"Evaluation and evaluator endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[{"ApiKeyAuth":[]}],"components":{"securitySchemes":{"ApiKeyAuth":{"type":"http","scheme":"bearer","bearerFormat":"API key","description":"API key in Authorization header using Bearer <token>."}},"schemas":{"EvaluationRun":{"properties":{"created_at":{"type":"string","title":"Created At"},"finished_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Finished At"},"error_at":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Error At"},"status":{"type":"string","title":"Status"},"error":{"anyOf":[{},{"type":"null"}],"title":"Error"},"object":{"type":"string","const":"evaluation.run","title":"Object","description":"Object type.","default":"evaluation.run"},"id":{"type":"string","title":"Id","description":"Evaluation run id (run group id)."},"process_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Process Id","description":"Process id for lifecycle tracking."},"evaluators":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Evaluators","description":"Evaluator ids used in this run."},"container":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Container","description":"Container id."},"dataset_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dataset Id","description":"Dataset id (if a dataset was used)."},"sample":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Sample","description":"Resolved sample-side data source."},"ground_truth":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Ground Truth","description":"Resolved ground-truth-side data source."},"baseline":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/DatasetDataSource"},{"$ref":"#/components/schemas/ContainerDataSource"},{"$ref":"#/components/schemas/GenerateDataSource"}],"discriminator":{"propertyName":"type","mapping":{"container":"#/components/schemas/ContainerDataSource","dataset":"#/components/schemas/DatasetDataSource","generate":"#/components/schemas/GenerateDataSource"}}},{"type":"null"}],"title":"Baseline","description":"Resolved baseline-side data source for pairwise evaluation."},"results":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunResults-Output"},{"type":"null"}],"description":"Evaluation results (populated on completion)."},"metrics":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metrics","description":"Evaluation metrics (populated on completion)."},"config":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Config","description":"Run configuration as submitted."},"spend":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Spend","description":"Estimated spend."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional metadata."}},"type":"object","required":["created_at","status","id"],"title":"EvaluationRun","description":"Response model for an evaluation run."},"DatasetDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"dataset","title":"Type"},"dataset":{"type":"string","minLength":1,"title":"Dataset","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","dataset"],"title":"DatasetDataSource"},"ContainerDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"container","title":"Type"},"container":{"type":"string","minLength":1,"title":"Container","description":"Resource id or label"}},"additionalProperties":false,"type":"object","required":["type","container"],"title":"ContainerDataSource"},"GenerateDataSource":{"properties":{"range":{"anyOf":[{"type":"string","pattern":"^\\d+:\\d+$","description":"Range to evaluate, as 'start:end' (e.g. '0:200')"},{"type":"null"}],"title":"Range","default":"0:100"},"type":{"type":"string","const":"generate","title":"Type","default":"generate"},"models":{"items":{"type":"string","minLength":1,"description":"Resource id or label"},"type":"array","minItems":1,"title":"Models","description":"Model ids or slugs to generate with."}},"additionalProperties":false,"type":"object","required":["models"],"title":"GenerateDataSource","description":"Data source that generates completions via one or more models."},"EvaluationRunResults-Output":{"properties":{"overall":{"anyOf":[{"$ref":"#/components/schemas/EvaluationRunOverallResults"},{"type":"null"}],"description":"Aggregate scores across all models and evaluators."},"metadata":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Metadata","description":"Additional result metadata."},"per_model":{"anyOf":[{"items":{"$ref":"#/components/schemas/EvaluationRunPerModelResults"},"type":"array"},{"type":"null"}],"title":"Per Model","description":"Per-model result breakdowns."},"launch_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Count","description":"Number of model launches in this run."}},"additionalProperties":true,"type":"object","title":"EvaluationRunResults","description":"Typed representation of the evaluation run results payload."},"EvaluationRunOverallResults":{"properties":{"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score across all evaluators."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy across all evaluators."}},"additionalProperties":true,"type":"object","title":"EvaluationRunOverallResults","description":"Aggregated scores across all models and evaluators."},"EvaluationRunPerModelResults":{"properties":{"model":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Model","description":"Model configuration used (sample and ground-truth generation models)."},"per_eval":{"anyOf":[{"additionalProperties":{"$ref":"#/components/schemas/EvaluationRunPerEvalResults"},"type":"object"},{"type":"null"}],"title":"Per Eval","description":"Results keyed by evaluator UUID."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score for this model."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this model."},"launch_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Launch Index","description":"Index of this model launch."},"launch_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Launch Call Id","description":"Modal function call id for this launch."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerModelResults","description":"Results for a single model within an evaluation run."},"EvaluationRunPerEvalResults":{"properties":{"accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Accuracy","description":"Accuracy ratio."},"avg_score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Score","description":"Average score."},"num_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Total","description":"Total number of evaluation samples."},"num_errors":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Errors","description":"Number of samples that errored."},"num_failed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Failed","description":"Number of samples that failed."},"num_passed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Passed","description":"Number of samples that passed."},"num_scored":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Scored","description":"Number of samples that were scored."},"num_missing":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Missing","description":"Number of samples with missing data."},"avg_accuracy":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Accuracy","description":"Average accuracy for this evaluator."},"break_reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Break Reason","description":"Why evaluation stopped (e.g. 'expected_count_reached')."},"expected_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Expected Count","description":"Expected sample count for this evaluator."}},"additionalProperties":true,"type":"object","title":"EvaluationRunPerEvalResults","description":"Per-evaluator breakdown within a single model run."},"ErrorResponse":{"properties":{"error":{"$ref":"#/components/schemas/ManiacError","description":"Error payload."}},"additionalProperties":false,"type":"object","required":["error"],"title":"ErrorResponse","description":"Response body for errors."},"ManiacError":{"properties":{"code":{"type":"string","title":"Code","description":"Machine-readable error code."},"message":{"type":"string","title":"Message","description":"Human-readable error message."},"details":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Details","description":"Additional error details."}},"additionalProperties":false,"type":"object","required":["code","message"],"title":"ManiacError","description":"Standard Maniac API error envelope.\n\nThis matches the shape already used by v2 auth (`detail={\"error\": {...}}`)."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}},"paths":{"/v1/evaluation/runs/{run_id}":{"get":{"tags":["Evaluation"],"summary":"Get an evaluation run","description":"Retrieve a single evaluation run by ID within the authenticated project.","operationId":"evaluation_runs_retrieve","parameters":[{"name":"run_id","in":"path","required":true,"schema":{"type":"string","title":"Run Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvaluationRun"}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"409":{"description":"Conflict","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}},"429":{"description":"Too Many Requests","headers":{"X-RateLimit-Limit":{"description":"Request limit per window.","schema":{"type":"integer"}},"X-RateLimit-Remaining":{"description":"Remaining requests in current window.","schema":{"type":"integer"}},"X-RateLimit-Reset":{"description":"Unix timestamp when the rate limit resets.","schema":{"type":"integer"}},"Retry-After":{"description":"Seconds to wait before retrying.","schema":{"type":"integer"}}},"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"503":{"description":"Upstream Unavailable","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}}}
```

## Healthz

> Health check endpoint for load balancers and uptime monitors.

```json
{"openapi":"3.1.0","info":{"title":"Maniac Inference Gateway API","version":"1.0.0"},"tags":[{"name":"Health","description":"Health and status endpoints."}],"servers":[{"url":"https://platform.maniac.ai","description":"The Maniac API"}],"security":[],"paths":{"/healthz":{"get":{"tags":["Health"],"summary":"Healthz","description":"Health check endpoint for load balancers and uptime monitors.","operationId":"healthz_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}}}}}}},"components":{"schemas":{"HealthResponse":{"properties":{"ok":{"type":"boolean","title":"Ok"}},"additionalProperties":false,"type":"object","required":["ok"],"title":"HealthResponse","description":"Health check response."}}}}
```


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter, and the optional `goal` query parameter:

```
GET https://docs.maniac.ai/api-reference/rest-api.md?ask=<question>&goal=<endgoal>
```

`ask` is the immediate question: it should be specific, self-contained, and written in natural language.
`goal` is optional and describes the broader end goal you are ultimately trying to accomplish on behalf of the user. GitBook uses it to tailor the answer towards what is most useful for that goal.

The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
