Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.simplismart.ai/llms.txt

Use this file to discover all available pages before exploring further.

Manage deployments using the client.deployments attribute or convenience methods.

create_deployment

Creates a deployment for a model repo.
Use env for model repo UUID and organization ID (e.g. ORG_ID); do not hardcode secrets.
import os
from dotenv import load_dotenv
load_dotenv()

from simplismart import DeploymentCreate, Simplismart

client = Simplismart()
deployment = client.create_deployment(
    DeploymentCreate(
        model_repo=os.getenv("MODEL_REPO_ID", "model-repo-uuid"),
        org=os.getenv("ORG_ID"),
        gpu_id="nvidia-h100",
        name="vision-private-deploy",
        min_pod_replicas=1,
        max_pod_replicas=2,
        autoscale_config={"targets": [{"metric": "gpu", "target": 80}]},
        env_variables={"KEY": "value"},
        healthcheck={"path": "/", "port": 8000},
        ports={"http": {"port": 8000}},
        metrics_path=["/v1/chat/completions"],
        fast_scaleup=True,
        deployment_tag="v1.0",
    )
)

DeploymentCreate

ParameterTypeDescriptionRequired
model_repostrModel repository UUIDYes
orgstrOrganization UUID (org_id)Yes
gpu_idstrGPU type identifier. Examples: nvidia-h100, nvidia-a10, nvidia-l4Yes
namestrDeployment name (3-60 chars)Yes
min_pod_replicasintMinimum pod replicas (≥ 1)Yes
max_pod_replicasintMaximum pod replicas (≥ 1)Yes
autoscale_configAutoscaleConfigAutoscaling configurationYes
env_variablesdict | NoneEnvironment variablesNo
deployment_custom_configurationdict | NoneCustom deployment configNo
healthcheckdict | NoneHealth check configurationNo
portsdict | NonePort mappingsNo
metrics_pathlist | NoneMetrics pathsNo
persistent_volume_claimsdict | list | NonePVC configurationsNo
fast_scaleupbool | NoneEnable fast scale upNo
deployment_tagstr | NoneDeployment tag labelNo

AutoscaleConfig

autoscale_config = {
    "targets": [
        {
            "metric": "gpu",      # Required
            "target": 80,          # Required (number)
            "percentile": 95      # Optional, only for latency metric
        }
    ]
}
Metric OptionDescription
concurrencyNumber of concurrent requests
cpuCPU utilization percentage
gpuGPU utilization percentage
gramGPU memory utilization
latencyRequest latency (supports percentiles 50, 75, 90, 95)
ramRAM utilization
throughputRequests per second
The percentile field is only supported when metric is set to latency.

list_deployments

Lists deployments with optional filtering.
import os
from dotenv import load_dotenv
load_dotenv()

from simplismart import Simplismart

client = Simplismart()
deployments = client.list_deployments(
    model_repo_id=os.getenv("MODEL_REPO_ID"),  # Optional
    status="DEPLOYED",
    offset=0,
    count=20,
)
print(deployments)
Expected output — list of deployment summary objects:
[
  {
    "deployment_id": "deployment-uuid",
    "deployment_name": "speechbrain-v3",
    "model_repo_id": "model-repo-uuid",
    "model_repo_name": "speechbrain",
    "model_type": "unknown",
    "accelerator_type": ["nvidia-l40s"],
    "accelerator_count": 1,
    "status": "DEPLOYED"
  }
]

Deployment Status Options

ValueDescription
DEPLOYEDDeployment is running
PENDINGDeployment is being created
FAILEDDeployment failed
STOPPEDDeployment is stopped
DELETEDDeployment has been deleted

list_model_deployments

Lists all model deployments for an organization.
deployments = client.list_model_deployments(org_id="org-uuid")

get_model_deployment

Gets deployment details by ID. Set DEPLOYMENT_ID in env or use an id from list_deployments.
deployment = client.get_model_deployment(deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"))

print(deployment)
Expected output — deployment object with uuid, name, status, model_repo, org, autoscale_config, healthcheck, ports, min_pod_replicas, max_pod_replicas, etc.

get_deployment

Get deployment details by ID.
deployment = client.get_deployment(deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"))

update_deployment

Updates deployment configuration.
updated = client.update_deployment(
    deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"),
    payload={
        "min_pod_replicas": 1,
        "max_pod_replicas": 2,
        "autoscale_config": {"targets": [{"metric": "gpu", "target": 80}]},
    },
)

stop_deployment

Stops a running deployment.
result = client.stop_deployment(deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"))

start_deployment

Starts a stopped deployment.
result = client.start_deployment(deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"))

restart_deployment

Restarts a deployment.
result = client.restart_deployment(
    deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"),
)

fetch_deployment_health

Gets deployment health status.
health = client.fetch_deployment_health(deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"))

print(health)
Expected output
{
  "data": "Healthy",
  "messages": [
    {
      "message": "Ready to use, the model is running and available for inference.",
      "severity": "info"
    }
  ],
  "pods": { "ready": 1, "not_ready": 0 }
}

update_deployment_autoscaling

Updates deployment autoscaling configuration.
result = client.update_deployment_autoscaling(
    deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"),
    min_replicas=1,
    max_replicas=3,
)

delete_deployment

Deletes a deployment.
result = client.delete_deployment(
    deployment_id=os.getenv("DEPLOYMENT_ID", "deployment-uuid"),
)

Error Handling

The SDK raises SimplismartError for all API errors.
from simplismart import Simplismart, SimplismartError

client = Simplismart()
try:
    deployment = client.get_deployment(deployment_id="00000000-0000-0000-0000-000000000000")
except SimplismartError as e:
    print("Status:", e.status_code)
    print("Message:", e)
    print("Payload:", e.payload)
Expected output (for invalid or missing deployment):
Caught SimplismartError:
  status_code: 404
  message: No ModelDeployment matches the given query. (status=404)
  payload: {'detail': 'No ModelDeployment matches the given query.'}

SimplismartError Attributes

AttributeTypeDescription
status_codeintHTTP status code
payloaddictFull error response payload
messagestrError message from backend

BYOC Deployment

Create a BYOC deployment with a payload (cluster, nodegroup, etc.). See Bring your own compute and Deploy on imported cluster.
import json
from simplismart import Simplismart

client = Simplismart()

with open("byoc-create.json") as f:
    payload = json.load(f)

deployment = client.create_byoc_deployment(payload=payload)