WAV2COM / app.py
faiimea's picture
Update app.py
c487de7 verified
import whisper
from pydub import AudioSegment
import gradio as gr
def convert_6ch_wav_to_stereo(input_file_path, output_file_path):
sound = AudioSegment.from_file(input_file_path, format="wav")
if sound.channels != 6:
sound.export(output_file_path, format="wav")
return
front_left = sound.split_to_mono()[0]
front_right = sound.split_to_mono()[1]
center = sound.split_to_mono()[2]
back_left = sound.split_to_mono()[4]
back_right = sound.split_to_mono()[5]
center = center - 6
back_left = back_left - 6
back_right = back_right - 6
stereo_left = front_left.overlay(center).overlay(back_left)
stereo_right = front_right.overlay(center).overlay(back_right)
stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right)
stereo_sound.export(output_file_path, format="wav")
def judge_command(file_path):
whisper_model = whisper.load_model("medium", device="cpu")
out_path='./out.wav'
convert_6ch_wav_to_stereo(file_path,out_path)
result = whisper_model.transcribe(out_path,language="en")
text_result = result['text']
print(text_result)
return text_result
def handle_audio_transcription(file_path):
try:
text_result = judge_command(file_path)
message = "Transcription successful!"
except Exception as e:
message = str(e)
text_result = ""
return message, text_result
with gr.Blocks() as audio_transcription_page:
gr.Markdown(
'''
This space transcribes the spoken words from an audio file to text.
## How to use this Space?
- Upload a '.wav' file.
- The transcription of the audio will be shown after you click the transcribe button.
## Examples
- You can get the test examples from our [Roop Dataset Repo.](https://huggingface.co/datasets/SJTU-TES/WAV2COM)
'''
)
with gr.Row():
with gr.Column():
audio_file = gr.File(
file_types=[".wav"],
label="Upload a '.wav' file",
)
info = gr.Textbox(
value="",
label="Log",
placeholder="Transcription results will appear here...",
)
transcribe_button = gr.Button("Transcribe")
transcribe_button.click(
handle_audio_transcription,
[audio_file],
[info]
)
if __name__ == "__main__":
audio_transcription_page.launch(debug=True)