|
import whisper |
|
from pydub import AudioSegment |
|
import gradio as gr |
|
|
|
def convert_6ch_wav_to_stereo(input_file_path, output_file_path): |
|
sound = AudioSegment.from_file(input_file_path, format="wav") |
|
if sound.channels != 6: |
|
sound.export(output_file_path, format="wav") |
|
return |
|
front_left = sound.split_to_mono()[0] |
|
front_right = sound.split_to_mono()[1] |
|
center = sound.split_to_mono()[2] |
|
back_left = sound.split_to_mono()[4] |
|
back_right = sound.split_to_mono()[5] |
|
center = center - 6 |
|
back_left = back_left - 6 |
|
back_right = back_right - 6 |
|
stereo_left = front_left.overlay(center).overlay(back_left) |
|
stereo_right = front_right.overlay(center).overlay(back_right) |
|
stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right) |
|
stereo_sound.export(output_file_path, format="wav") |
|
|
|
|
|
def judge_command(file_path): |
|
whisper_model = whisper.load_model("medium", device="cpu") |
|
out_path='./out.wav' |
|
convert_6ch_wav_to_stereo(file_path,out_path) |
|
result = whisper_model.transcribe(out_path,language="en") |
|
text_result = result['text'] |
|
print(text_result) |
|
return text_result |
|
|
|
|
|
def handle_audio_transcription(file_path): |
|
try: |
|
text_result = judge_command(file_path) |
|
message = "Transcription successful!" |
|
except Exception as e: |
|
message = str(e) |
|
text_result = "" |
|
return message, text_result |
|
|
|
with gr.Blocks() as audio_transcription_page: |
|
|
|
gr.Markdown( |
|
''' |
|
This space transcribes the spoken words from an audio file to text. |
|
## How to use this Space? |
|
- Upload a '.wav' file. |
|
- The transcription of the audio will be shown after you click the transcribe button. |
|
## Examples |
|
- You can get the test examples from our [Roop Dataset Repo.](https://huggingface.co/datasets/SJTU-TES/WAV2COM) |
|
''' |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_file = gr.File( |
|
file_types=[".wav"], |
|
label="Upload a '.wav' file", |
|
) |
|
info = gr.Textbox( |
|
value="", |
|
label="Log", |
|
placeholder="Transcription results will appear here...", |
|
) |
|
transcribe_button = gr.Button("Transcribe") |
|
|
|
transcribe_button.click( |
|
handle_audio_transcription, |
|
[audio_file], |
|
[info] |
|
) |
|
|
|
if __name__ == "__main__": |
|
audio_transcription_page.launch(debug=True) |
|
|