import gradio as gr import numpy as np from transformers import pipeline import torch device = "cuda:0" if torch.cuda.is_available() else "cpu" transcriber = pipeline("automatic-speech-recognition", model="mahimairaja/whisper-base-tamil", \ chunk_length_s=15, device=device) transcriber.model.config.forced_decoder_ids = transcriber.tokenizer.get_decoder_prompt_ids(language="ta", task="transcribe") def transcribe(audio): return transcriber(audio)["text"] TITLE = "ASR for ALL - Democratizing Tamil" demo = gr.Blocks() mic_transcribe = gr.Interface( fn=transcribe, inputs=gr.Audio(sources="microphone", type="filepath"), outputs="text", title=TITLE, ) file_transcribe = gr.Interface( fn=transcribe, inputs=gr.Audio(sources="upload", type="filepath"), outputs="text", examples=[ "assets/tamil-audio-01.mp3", "assets/tamil-audio-02.mp3", "assets/tamil-audio-03.mp3", "assets/tamil-audio-04.mp3", ], title=TITLE, ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Real Time Transcription", "Audio File", ] ) demo.launch()