Spaces:

SJTU-TES
/

WAV2COM

Sleeping

App Files Files Community

WAV2COM / app.py

faiimea

Update app.py

c487de7 verified 7 months ago

raw

history blame contribute delete

2.49 kB

	import whisper
	from pydub import AudioSegment
	import gradio as gr

	def convert_6ch_wav_to_stereo(input_file_path, output_file_path):
	sound = AudioSegment.from_file(input_file_path, format="wav")
	if sound.channels != 6:
	sound.export(output_file_path, format="wav")
	return
	front_left = sound.split_to_mono()[0]
	front_right = sound.split_to_mono()[1]
	center = sound.split_to_mono()[2]
	back_left = sound.split_to_mono()[4]
	back_right = sound.split_to_mono()[5]
	center = center - 6
	back_left = back_left - 6
	back_right = back_right - 6
	stereo_left = front_left.overlay(center).overlay(back_left)
	stereo_right = front_right.overlay(center).overlay(back_right)
	stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right)
	stereo_sound.export(output_file_path, format="wav")


	def judge_command(file_path):
	whisper_model = whisper.load_model("medium", device="cpu")
	out_path='./out.wav'
	convert_6ch_wav_to_stereo(file_path,out_path)
	result = whisper_model.transcribe(out_path,language="en")
	text_result = result['text']
	print(text_result)
	return text_result


	def handle_audio_transcription(file_path):
	try:
	text_result = judge_command(file_path)
	message = "Transcription successful!"
	except Exception as e:
	message = str(e)
	text_result = ""
	return message, text_result

	with gr.Blocks() as audio_transcription_page:

	gr.Markdown(
	'''
	This space transcribes the spoken words from an audio file to text.
	## How to use this Space?
	- Upload a '.wav' file.
	- The transcription of the audio will be shown after you click the transcribe button.
	## Examples
	- You can get the test examples from our [Roop Dataset Repo.](https://huggingface.co/datasets/SJTU-TES/WAV2COM)
	'''
	)

	with gr.Row():
	with gr.Column():
	audio_file = gr.File(
	file_types=[".wav"],
	label="Upload a '.wav' file",
	)
	info = gr.Textbox(
	value="",
	label="Log",
	placeholder="Transcription results will appear here...",
	)
	transcribe_button = gr.Button("Transcribe")

	transcribe_button.click(
	handle_audio_transcription,
	[audio_file],
	[info]
	)

	if __name__ == "__main__":
	audio_transcription_page.launch(debug=True)