Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -65,12 +65,9 @@ def prepare_dataset(example):
|
|
65 |
|
66 |
return example
|
67 |
|
68 |
-
processed_example = prepare_dataset(dataset[
|
69 |
speaker_embeddings = torch.tensor(processed_example["speaker_embeddings"]).unsqueeze(0)
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
# etc.
|
74 |
# embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
75 |
# speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
76 |
|
@@ -96,7 +93,9 @@ def speech_to_speech_translation(audio):
|
|
96 |
title = "Demo STST - Multilingual to Català Speech"
|
97 |
description = """
|
98 |
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Català. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation to català, and Microsoft's
|
99 |
-
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech fine-tuned on [projecte-aina/openslr-slr69-ca-trimmed-denoised](https://huggingface.co/datasets/projecte-aina/openslr-slr69-ca-trimmed-denoised).
|
|
|
|
|
100 |
|
101 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
102 |
"""
|
|
|
65 |
|
66 |
return example
|
67 |
|
68 |
+
processed_example = prepare_dataset(dataset[1])
|
69 |
speaker_embeddings = torch.tensor(processed_example["speaker_embeddings"]).unsqueeze(0)
|
70 |
|
|
|
|
|
|
|
71 |
# embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
72 |
# speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
73 |
|
|
|
93 |
title = "Demo STST - Multilingual to Català Speech"
|
94 |
description = """
|
95 |
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Català. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation to català, and Microsoft's
|
96 |
+
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech fine-tuned on [projecte-aina/openslr-slr69-ca-trimmed-denoised](https://huggingface.co/datasets/projecte-aina/openslr-slr69-ca-trimmed-denoised).
|
97 |
+
|
98 |
+
This demo can be improve updating it with [projecte-aina/tts-ca-coqui-vits-multispeaker](https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker) model:
|
99 |
|
100 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
101 |
"""
|