Chat templates
Top K
Top P
Temperature
Maximum response length in tokens
Progress
Console Output
JSON Example
API Call (Text)
{
"text":"A dialog, where User interacts with a helpful, kind, obedient, honest and very reasonable assistant called Steve.\nUser: Hello, Steve.\nSteve: How can I assist you today?\nUser: What is your favourite movie?\nSteve:",
"top_k":40,
"top_p":0.9,
"temperature":0.8,
"client_session_auth_key":"e1b12673-3beb-42eb-bd2c-1747a11dd084",
"wait_for_result":false
}
Progress
{
"success":true,
"job_id":"JID43",
"ep_version":0,
"job_state":"processing",
"progress":{
"job_id":"JID43",
"start_time":1708257097.326644,
"start_time_compute":1708257097.3285873,
"progress":3,
"progress_data":{
"text":"Oh, ",
"num_generated_tokens":3
},
"estimate":1.2,
"queue_position":0,
"num_workers_online":1
}
}
Result
{
"success":true,
"job_id":"JID43",
"ep_version":0,
"job_result":{
"success":true,
"job_id":"JID43",
"ep_version":0,
"text":"Oh, that's a tough one! I have so many favourite movies, but if I had to choose just one, I would say \"Toy Story\". It's such a classic and it always makes me laugh.\n",
"num_generated_tokens":52,
"model_name":"llama-3-70b-instruct",
"compute_duration":1.8,
"total_duration":1.8,
"auth":"neo06_NVIDIA A100-PCIE-40GB_0",
"worker_interface_version":"AIME-API-Worker-Interface 0.8.0"
},
"job_state":"done",
"progress":{
"job_id":"JID43",
"start_time":1708257097.326644,
"start_time_compute":1708257097.3285873,
"progress":45,
"progress_data":{
"text":"Oh, that's a tough one! I have so many favourite movies, but if I had to choose just one, I would say \"Toy Story\". It's such a classic and it ",
"num_generated_tokens":45
},
"estimate":0,
"queue_position":0,
"num_workers_online":1
}
}