import spaces import gradio as gr from audiosr import super_resolution, build_model import torch import gc # free up memory @spaces.GPU(duration=180) def inference(audio_file, model_name, guidance_scale, ddim_steps, seed): audiosr = build_model(model_name=model_name) if torch.cuda.is_available(): torch.cuda.empty_cache() # empty cuda cache gc.collect() # set random seed when seed input value is 0 if seed == 0: import random seed = random.randint(1, 2**32-1) waveform = super_resolution( audiosr, audio_file, seed, guidance_scale=guidance_scale, ddim_steps=ddim_steps ) if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() return (48000, waveform) iface = gr.Interface( fn=inference, inputs=[ gr.Audio(type="filepath", label="Input Audio"), gr.Dropdown(["basic", "speech"], value="basic", label="Model"), gr.Slider(1, 10, value=3.5, step=0.1, label="Guidance Scale", info="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)"), gr.Slider(1, 100, value=50, step=1, label="DDIM Steps", info="The sampling step for DDIM"), gr.Number(value=42, precision=0, label="Seed", info="Changing this value (any integer number) will lead to a different generation result, put 0 for a random one.") ], outputs=gr.Audio(type="numpy", label="Output Audio"), title="AudioSR", description="Audio Super Resolution with AudioSR" ) iface.launch(share=False)