Spaces:
Sleeping
Sleeping
import spaces | |
import gradio as gr | |
import torch | |
from transformers import pipeline | |
import librosa | |
# モデルの設定 | |
model_id = "kotoba-tech/kotoba-whisper-v1.0" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
model_kwargs = {"attn_implementation": "sdpa"} if torch.cuda.is_available() else {} | |
generate_kwargs = {"language": "japanese", "task": "transcribe"} | |
# モデルのロード | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model_id, | |
torch_dtype=torch_dtype, | |
device=device, | |
model_kwargs=model_kwargs | |
) | |
def transcribe(audio): | |
# 音声の読み込み | |
audio_data, sr = librosa.load(audio, sr=None) | |
# 音声をリサンプリング | |
target_sr = 16000 | |
audio_resampled = librosa.resample(audio_data, orig_sr=sr, target_sr=target_sr) | |
# 推論の実行 | |
result = pipe(audio_resampled, generate_kwargs=generate_kwargs) | |
return result["text"] | |
description = """ | |
<p align="center"> | |
<img src="https://huggingface.co/datasets/MakiAi/IconAssets/resolve/main/KotobaTranscriber.png" width="70%"> | |
<br> | |
</p> | |
""" | |
theme='JohnSmith9982/small_and_pretty' | |
# Gradioインターフェースの定義 | |
iface = gr.Interface( | |
fn=transcribe, | |
# fn=None, | |
inputs=gr.Audio(type="filepath", label="Upload Audio (MP3 or MP4)"), | |
outputs="text", | |
title="KotobaTranscriber", | |
description=description, | |
theme=theme, | |
) | |
# アプリの起動 | |
iface.launch(server_name="0.0.0.0", server_port=7860, share=True) |