Spaces:
Running
Running
Kit-Lemonfoot
commited on
Commit
•
12c4d09
1
Parent(s):
61f8fb4
Properly restricted audio length on Spaces. (The space isn't built for song covers. Take that shit to Colab or local.)
Browse files
app.py
CHANGED
@@ -28,6 +28,7 @@ from config import Config
|
|
28 |
config = Config()
|
29 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
30 |
limitation = os.getenv("SYSTEM") == "spaces"
|
|
|
31 |
|
32 |
audio_mode = []
|
33 |
f0method_mode = ["pm", "crepe", "harvest"]
|
@@ -50,7 +51,7 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
|
|
50 |
return "Please upload an audio file.", None
|
51 |
sampling_rate, audio = vc_upload
|
52 |
duration = audio.shape[0] / sampling_rate
|
53 |
-
if duration >
|
54 |
return "Too long! Please upload an audio file that is less than 1 minute.", None
|
55 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
56 |
if len(audio.shape) > 1:
|
@@ -58,12 +59,15 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
|
|
58 |
if sampling_rate != 16000:
|
59 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
60 |
elif vc_audio_mode == "TTS Audio":
|
61 |
-
if len(tts_text) >
|
62 |
return "Text is too long.", None
|
63 |
if tts_text is None or tts_voice is None:
|
64 |
return "You need to enter text and select a voice.", None
|
65 |
asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
|
66 |
audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
|
|
|
|
|
|
|
67 |
vc_input = "tts.mp3"
|
68 |
times = [0, 0, 0]
|
69 |
f0_up_key = int(f0_up_key)
|
@@ -379,7 +383,7 @@ if __name__ == '__main__':
|
|
379 |
vc_inst_preview = gr.Audio(label="Instrumental Preview", visible=False)
|
380 |
vc_audio_preview = gr.Audio(label="Audio Preview", visible=False)
|
381 |
# TTS
|
382 |
-
tts_text = gr.Textbox(visible=True, label="TTS text", info="Text to speech input", interactive=True)
|
383 |
tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=True, allow_custom_value=False, value="en-US-AnaNeural-Female", interactive=True)
|
384 |
with gr.Column():
|
385 |
vc_transform0 = gr.Number(label="Transpose", value=0, info='Type "12" to change from male to female voice. Type "-12" to change female to male voice')
|
|
|
28 |
config = Config()
|
29 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
30 |
limitation = os.getenv("SYSTEM") == "spaces"
|
31 |
+
#limitation=True
|
32 |
|
33 |
audio_mode = []
|
34 |
f0method_mode = ["pm", "crepe", "harvest"]
|
|
|
51 |
return "Please upload an audio file.", None
|
52 |
sampling_rate, audio = vc_upload
|
53 |
duration = audio.shape[0] / sampling_rate
|
54 |
+
if duration > 60 and limitation:
|
55 |
return "Too long! Please upload an audio file that is less than 1 minute.", None
|
56 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
57 |
if len(audio.shape) > 1:
|
|
|
59 |
if sampling_rate != 16000:
|
60 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
61 |
elif vc_audio_mode == "TTS Audio":
|
62 |
+
if len(tts_text) > 250 and limitation:
|
63 |
return "Text is too long.", None
|
64 |
if tts_text is None or tts_voice is None:
|
65 |
return "You need to enter text and select a voice.", None
|
66 |
asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
|
67 |
audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
|
68 |
+
duration = audio.shape[0] / sr
|
69 |
+
if duration > 30 and limitation:
|
70 |
+
return "Your text generated an audio that was too long.", None
|
71 |
vc_input = "tts.mp3"
|
72 |
times = [0, 0, 0]
|
73 |
f0_up_key = int(f0_up_key)
|
|
|
383 |
vc_inst_preview = gr.Audio(label="Instrumental Preview", visible=False)
|
384 |
vc_audio_preview = gr.Audio(label="Audio Preview", visible=False)
|
385 |
# TTS
|
386 |
+
tts_text = gr.Textbox(visible=True, label="TTS text", info="Text to speech input (There is a limit of 250 characters)", interactive=True)
|
387 |
tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=True, allow_custom_value=False, value="en-US-AnaNeural-Female", interactive=True)
|
388 |
with gr.Column():
|
389 |
vc_transform0 = gr.Number(label="Transpose", value=0, info='Type "12" to change from male to female voice. Type "-12" to change female to male voice')
|