import gradio as gr import torch from transformers import WhisperProcessor, WhisperForConditionalGeneration import soundfile as sf # Load Whisper model and processor from Hugging Face processor = WhisperProcessor.from_pretrained("openai/whisper-base") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu") def transcribe(audio_path): try: # Read audio file audio, sampling_rate = sf.read(audio_path) # Process audio inputs = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features # Move to appropriate device inputs = inputs.to(model.device) # Generate transcription predicted_ids = model.generate(inputs) transcription = processor.decode(predicted_ids[0], skip_special_tokens=True) return transcription except Exception as e: return f"Error: {str(e)}" # Create a Gradio interface iface = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="text", title="Whisper Transcription", description="Upload an audio file and get the transcription using Whisper model." ) if __name__ == "__main__": iface.launch()