Steph1949's picture
Update app.py
9346450 verified
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import soundfile as sf
# Load Whisper model and processor from Hugging Face
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
def transcribe(audio_path):
try:
# Read audio file
audio, sampling_rate = sf.read(audio_path)
# Process audio
inputs = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features
# Move to appropriate device
inputs = inputs.to(model.device)
# Generate transcription
predicted_ids = model.generate(inputs)
transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
return transcription
except Exception as e:
return f"Error: {str(e)}"
# Create a Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Whisper Transcription",
description="Upload an audio file and get the transcription using Whisper model."
)
if __name__ == "__main__":
iface.launch()