Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.simplismart.ai/llms.txt

Use this file to discover all available pages before exploring further.

Manage model repositories using the client.model_repos attribute or convenience methods.

list_model_repos

Lists model repositories with optional filtering. Run with SIMPLISMART_PG_TOKEN set as an environment variable (e.g. in .env).
import os
from dotenv import load_dotenv
load_dotenv()

from simplismart import ModelRepoListParams, Simplismart

client = Simplismart()
repos = client.list_model_repos(
    ModelRepoListParams(
        offset=0,
        count=5,
        status="SUCCESS",
        name="vision",           # optional filter
        model_type="BYOM",       # optional filter
        created_by="user@example.com",  # optional filter
    )
)
print(repos)
Expected output
{
  "limit": 5,
  "offset": 0,
  "count": 10,
  "results": [
    {
      "uuid": "model-repo-uuid",
      "name": "whisper-nemo-diarization",
      "source_type": "docker_hub",
      "source_url": "simplismart/MODEL-NAME:latest",
      "is_byom": true,
      "accelerator": null,
      "runtime_gpus": 1,
      "byom": {
        "image": "simplismart/MODEL-NAME:latest",
        "registry": "simplismart/REGISTRY-NAME",
        "tag": "latest"
      },
      "secrets": {
        "source_secret": {
          "uuid": "secret-uuid",
          "name": "SECRET-NAME"
        }
      },
      "status": "SUCCESS",
      "model_type": "byom",
      "env": {},
      "created_at": "2026-03-02T11:52:16.925151Z",
      "updated_at": "2026-03-02T11:52:16.925162Z",
      "org_id": "org-uuid",
      "healthcheck": {
        "path": "/health",
        "port": 8000,
        "periodSeconds": 10,
        "timeoutSeconds": 5,
        "initialDelaySeconds": 30
      },
      "ports": { "http": { "port": 8000 } },
      "metrics_path": [],
      "deployment_custom_configuration": { "command": [] }
    }
  ]
}

ModelRepoListParams

ParameterTypeDescriptionOptions
offsetintPagination offset (default: 0)≥ 0
countintPage size (default: 5, max: 20)0-20
model_idstr | NoneFilter by specific model repo UUID-
namestr | NoneFilter by name (contains match)-
statusstr | NoneFilter by statusSUCCESS, FAILED, DELETED, PROGRESSING
model_typestr | NoneFilter by model type-
created_bystr | NoneFilter by creator email-

get_model_repo

Gets a specific model repository by ID. Set MODEL_REPO_ID in env, or use a UUID from list_model_repos.
import os
from dotenv import load_dotenv
load_dotenv()

from simplismart import Simplismart

client = Simplismart()
repo = client.get_model_repo(
    model_id=os.getenv("MODEL_REPO_ID", "model-repo-uuid"),
)
print(repo)
Expected output
{
  "uuid": "f58265ce-cfc4-4d32-8b4f-848f06c5e181",
  "name": "Tp-8-mdhv-llama",
  "source_type": "docker_hub",
  "source_url": "madhavbohra09/llama-3.2-1b:latest",
  "is_byom": true,
  "accelerator": null,
  "runtime_gpus": 8,
  "byom": {
    "image": "madhavbohra09/llama-3.2-1b:latest",
    "registry": "madhavbohra09/llama-3.2-1b",
    "tag": "latest"
  },
  "secrets": {
    "source_secret": {
      "uuid": "0b8af8a8-d149-4262-b5b4-804ee6b98311",
      "name": "docker creds"
    }
  },
  "status": "SUCCESS",
  "model_type": "byom",
  "env": {},
  "created_at": "2026-03-03T13:14:28.714649Z",
  "updated_at": "2026-03-03T13:14:28.714660Z",
  "org_id": "0bf00b43-430a-4ca3-a8b3-b13cc8dc6d4f",
  "is_public": false,
  "healthcheck": {
    "path": "/health",
    "port": 8000,
    "periodSeconds": 10,
    "timeoutSeconds": 5,
    "initialDelaySeconds": 30
  },
  "ports": {
    "http": {
      "port": 8000
    }
  },
  "metrics_path": [],
  "deployment_custom_configuration": {
    "command": []
  }
}

create_model_repo

Bring your own container-based models from Docker Hub, Depot or NVIDIA NGC registry. Use enviroment vars for credentials (e.g. SOURCE_SECRET_ID); do not hardcode secrets.
import os
from dotenv import load_dotenv
load_dotenv()

from simplismart import ModelRepoCreate, Simplismart

client = Simplismart()

repo = client.create_model_repo(
    ModelRepoCreate(
        name="vision-container-demo",
        source_type="docker_hub",
        runtime_gpus=1,
        source_secret=os.getenv("SOURCE_SECRET_ID"),
        registry_path=os.getenv("REGISTRY_PATH", "your-docker-org/your-model"),
        docker_tag=os.getenv("DOCKER_TAG", "latest"),
        env={"EXAMPLE_KEY": "value"},
        healthcheck={"path": "/", "port": 8000},
        ports={"http": {"port": 8000}},
        metrics_path=["/v1/chat/completions"],
        deployment_custom_configuration={"command": ["python", "-m", "server"]},
    )
)

ModelRepoCreate

ParameterTypeDescriptionRequired
namestrModel repo name (1-255 chars)Yes
source_typestrRegistry source type. Options: docker_hub, depot, nvidiadockersecretYes
runtime_gpusintNumber of GPUs (≥ 0; typically 0 or 1 for BYOM)Yes
source_secretstr | NoneSecret UUID for registry authenticationConditional*
registry_pathstr | NoneRegistry path/repo name (max 255)Conditional*
docker_tagstr | NoneImage tag (max 255)Conditional*
envdict | NoneEnvironment variablesNo
healthcheckdict | NoneHealth check configurationNo
portsdict | NonePort mappingsNo
metrics_pathlist | NoneList of metrics pathsNo
deployment_custom_configurationdict | list | NoneCustom deployment configurationNo
*Required when source_type is docker_hub, depot, or nvidiadockersecret.

Source Type Options

ValueDescription
docker_hubDocker Hub registry
depotDepot registry
nvidiadockersecretNVIDIA NGC registry
Expected output
{'uuid': 'e20dd190-b0ed-4db5-a471-a580696b99bd', 'name': 'TEST-vision-container-demo', 'source_type': 'docker_hub', 'source_url': 'madhavbohra09/llama-3.2-1b:latest', 'is_byom': True, 'accelerator': None, 'runtime_gpus': 1, 'byom': {'image': 'madhavbohra09/llama-3.2-1b:latest', 'registry': 'madhavbohra09/llama-3.2-1b', 'tag': 'latest'}, 'secrets': {'source_secret': {'uuid': '0b8af8a8-d149-4262-b5b4-804ee6b98311', 'name': 'docker creds'}}, 'status': 'SUCCESS', 'model_type': 'byom', 'env': {'EXAMPLE_KEY': 'value'}, 'created_at': '2026-03-03T15:34:50.901581Z', 'updated_at': '2026-03-03T15:34:50.901592Z', 'org_id': '0bf00b43-430a-4ca3-a8b3-b13cc8dc6d4f', 'is_public': False, 'healthcheck': {'path': '/', 'port': 8000, 'initialDelaySeconds': 30, 'periodSeconds': 10, 'timeoutSeconds': 5}, 'ports': {'http': {'port': 8000}}, 'metrics_path': [], 'deployment_custom_configuration': {'command': []}}

create_model_repo_private_compile

Creates a private compile model repository: the platform compiles the model from a source (e.g. Hugging Face) using your model config, optimisation config, and pipeline config.
import json
from simplismart import (
    ModelRepoCompileCreate,
    Simplismart,
)

client = Simplismart()

# Load configs (e.g. from examples/private-compile-sample/)

def load_json_file(path):
    with open(path, "r") as f:
        return json.load(f)


repo = client.create_model_repo_private_compile(
    ModelRepoCompileCreate(
        name="my-llama-repo",
        description="Llama model - private compile",        
        source_type="huggingface",
        source_url="meta-llama/Llama-3.2-1B-Instruct",
        model_class="LlamaForCausalLM",
        accelerator_type="nvidia-h100",
        accelerator_count=0,
        cloud_account="your-cloud-account-uuid",
        model_config_data=load_json_file("model_config.json"),
        optimisation_config=load_json_file("optimisation_config.json"),
        pipeline_config=load_json_file("pipeline_config.json"),
    )
)

ModelRepoCompileCreate

ParameterTypeDescriptionRequired
namestrModel repo nameYes
source_typestrSource type, e.g. huggingfaceYes
source_urlstrSource path/URL (e.g. HF repo id)Yes
modestrCompilation mode (default: public_hf). e.g. public_hf, private_hf, aws, gcp, public_url, simplismartYes
model_classstrModel class (e.g. LlamaForCausalLM)Yes
accelerator_typestrAccelerator type (e.g. nvidia-h100)Yes
org_idstr | NoneOrganization UUID (alias: org); optional if inferred from tokenNo
accelerator_countint | NoneAccelerator count (default: 0)No
cloud_accountstr | NoneCloud account UUIDNo
source_secretstr | NoneSecret UUID for source accessNo
lora_secretstr | NoneLoRA secret UUIDNo
model_config_datadict | NoneModel config JSON (alias: model_config); see belowNo
optimisation_configdict | NoneOptimisation config JSON (see below)No
pipeline_configdict | NonePipeline config JSON (see below)No
envdict | NoneEnvironment variablesNo
output_metadatadict | NoneOutput metadataNo
additional_detailsdict | NoneAdditional detailsNo
tagsdict | NoneTags objectNo
taskslist | NoneList of tasksNo
model_familystr | NoneModel familyNo
descriptionstr | NoneDescriptionNo
short_descriptionstr | NoneShort descriptionNo
dropdown_descriptionstr | NoneDropdown descriptionNo
processing_modestr | NoneOne of: SYNC, ASYNC, REALTIME_ASYNCNo
machine_typestr | NoneMachine typeNo
regionstr | NoneRegionNo
resource_groupstr | NoneResource groupNo
use_simplismart_infrastructurebool | NoneUse Simplismart infrastructureNo

Config files (private compile)

Example configs are in the SDK repo under examples/private-compile-sample/:
  • model_config.json — Model architecture and tokenizer options (e.g. architectures, hidden_size, max_position_embeddings, torch_dtype). Must match the model you are compiling.
Example (Llama-style):
{
  "architectures": ["LlamaForCausalLM"],
  "hidden_size": 2048,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 16,
  "num_key_value_heads": 8,
  "torch_dtype": "bfloat16",
  "vocab_size": 128256
}
  • optimisation_config.json — Backend, warmups, and optimisations (e.g. quantization, tensor_parallel_size, optimisations.dit_optimisation, backend). Example
{
  "model_type": "llm",
  "quantization": "float16",
  "tensor_parallel_size": 1,
  "warmups": { "enabled": true, "iterations": 5, "sample_input_data": [] },
  "backend": { "name": "auto", "version": "latest" },
  "optimisations": {
    "dit_optimisation": {
      "enabled": true,
      "attention_backend": { "type": "auto" },
      "compilation": { "enabled": false, "mode": "auto", "fullgraph": true, "dynamic": true }
    }
  }
}
  • pipeline_config.json — Pipeline type and options (e.g. type, loras, quantized_model_path, enable_model_caching, mode).
{
  "type": "llm",
  "loras": [],
  "lora_repo": { "type": "", "path": "", "ownership": "", "secret": { "type": "" } },
  "quantized_model_path": { "type": "", "path": "", "ownership": "", "secret": { "type": "" } },
  "extra_params": {},
  "enable_model_caching": true,
  "mode": "chat"
}
For a datailed example, checkout this code snippet in Python: Full example: simplismart-python/examples/private-compile-sample/.

delete_model_repo

Deletes a model repository.
result = client.delete_model_repo(model_id=os.getenv("MODEL_REPO_ID", "model-repo-uuid"))
# Returns: True on success