| from transformers import pipeline |
| import os |
| import gradio as gr |
| import torch |
| from IPython.display import Audio as IPythonAudio |
| from gtts import gTTS |
| import IPython.display as ipd |
|
|
| |
| asr = pipeline(task="automatic-speech-recognition", |
| model="distil-whisper/distil-small.en") |
| |
| translator = pipeline(task="translation", |
| model="facebook/nllb-200-distilled-600M", |
| torch_dtype=torch.bfloat16) |
| |
| pipe = pipeline("text-to-speech", model="suno/bark-small", |
| torch_dtype=torch.bfloat16) |
|
|
| |
| demo = gr.Blocks() |
| def transcribe_speech(filepath): |
| if filepath is None: |
| gr.Warning("No audio found, please retry.") |
| return "" |
| output = translator(asr(filepath)["text"], |
| src_lang="eng_Latn", |
| tgt_lang="hin_Deva") |
| narrated_text=pipe(output[0]['translation_text']) |
| |
| |
|
|
| |
| return narrated_text |
| |
| mic_transcribe = gr.Interface( |
| fn=transcribe_speech, |
| inputs=gr.Audio(sources="microphone", |
| type="filepath"), |
| outputs="audio", |
| |
| allow_flagging="never") |
|
|
| file_transcribe = gr.Interface( |
| fn=transcribe_speech, |
| inputs=gr.Audio(sources="upload", |
| type="filepath"), |
| outputs="audio", |
| |
| allow_flagging="never" |
| ) |
| with demo: |
| gr.TabbedInterface( |
| [mic_transcribe, |
| file_transcribe], |
| ["Transcribe Microphone", |
| "Transcribe Audio File"], |
| ) |
|
|
| demo.launch(share=True) |
| demo.close() |