SeamlessM4T v2 is a foundational multilingual and multitask model by Meta that delivers state-of-the-art results for translation and transcription across speech and text. It features an architecture with a non-autoregressive text to unit decoder that delivers improved consistency between text and speech output.
Progress
Console Output
Text Output
JSON Example
API Call (Text)
{
"src_lang":"deu",
"tgt_lang":"eng",
"generate_audio":true,
"text_input":"Text to be translated...",
"client_session_auth_key":"2e561c1b-...-2af93ebaa47a",
"wait_for_result":false
}
API Call (Audio)
{
"src_lang":"deu",
"tgt_lang":"eng",
"generate_audio":true,
"audio_input":"data:application/octet-stream;base64,T2dnUwAC...F1Ad",
"client_session_auth_key":"8a59bdb2-...-7880d2ff2bc2",
"wait_for_result":false
}
Progress
{
"success":true,
"job_id":"JID85",
"ep_version":0,
"job_state":"processing",
"progress":{
"progress":0,
"estimate":0.54,
"queue_position":0,
"num_workers_online":1
}
}
Result
{
"success":true,
"job_id":"JID85",
"ep_version":0,
"job_result":{
"success":true,
"job_id":"JID85",
"ep_version":0,
"text_output":"Translated text...",
"audio_output":"data:audio/wav;base64,UklGR.../2X/cf8=",
"task":"S2ST",
"model_name":"seamlessM4T_v2_large",
"compute_duration":0.2,
"total_duration":0.5,
"auth":"instance_06_NVIDIA GeForce RTX 3090 Ti_0",
"worker_interface_version":"AIME-API-Worker-Interface 0.3.8"
},
"job_state":"done",
"progress":{
"progress":0,
"estimate":0.54,
"queue_position":0,
"num_workers_online":1
}
}