update
Browse files- .gitignore +1 -1
- CITATION.cff +10 -0
- README.md +27 -0
- api.py +1 -1
- requirements.txt +3 -1
- samples_generator.py +51 -0
- tortoise_v2_examples.html +1 -0
- utils/audio.py +1 -1
.gitignore
CHANGED
@@ -20,7 +20,6 @@ parts/
|
|
20 |
sdist/
|
21 |
var/
|
22 |
wheels/
|
23 |
-
results/*
|
24 |
pip-wheel-metadata/
|
25 |
share/python-wheels/
|
26 |
*.egg-info/
|
@@ -130,3 +129,4 @@ dmypy.json
|
|
130 |
.pyre/
|
131 |
|
132 |
.idea/*
|
|
|
|
20 |
sdist/
|
21 |
var/
|
22 |
wheels/
|
|
|
23 |
pip-wheel-metadata/
|
24 |
share/python-wheels/
|
25 |
*.egg-info/
|
|
|
129 |
.pyre/
|
130 |
|
131 |
.idea/*
|
132 |
+
.models/*
|
CITATION.cff
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cff-version: 1.3.0
|
2 |
+
message: "If you use this software, please cite it as below."
|
3 |
+
authors:
|
4 |
+
- family-names: "Betker"
|
5 |
+
given-names: "James"
|
6 |
+
orcid: "https://orcid.org/my-orcid?orcid=0000-0003-3259-4862"
|
7 |
+
title: "TorToiSe text-to-speech"
|
8 |
+
version: 2.0
|
9 |
+
date-released: 2022-04-28
|
10 |
+
url: "https://github.com/neonbjb/tortoise-tts"
|
README.md
CHANGED
@@ -132,6 +132,21 @@ utterances of a specific string of text. The impact of community involvement in
|
|
132 |
GPT-3 or CLIP) has really surprised me. If you find something neat that you can do with Tortoise that isn't documented here,
|
133 |
please report it to me! I would be glad to publish it to this page.
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
## Model architecture
|
136 |
|
137 |
Tortoise TTS is inspired by OpenAI's DALLE, applied to speech data and using a better decoder. It is made up of 5 separate
|
@@ -186,6 +201,18 @@ Imagine what a TTS model trained at or near GPT-3 or DALLE scale could achieve.
|
|
186 |
If you are an ethical organization with computational resources to spare interested in seeing what this model could do
|
187 |
if properly scaled out, please reach out to me! I would love to collaborate on this.
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
## Notice
|
190 |
|
191 |
Tortoise was built entirely by me using my own hardware. My employer was not involved in any facet of Tortoise's development.
|
|
|
132 |
GPT-3 or CLIP) has really surprised me. If you find something neat that you can do with Tortoise that isn't documented here,
|
133 |
please report it to me! I would be glad to publish it to this page.
|
134 |
|
135 |
+
## Tortoise-detect
|
136 |
+
|
137 |
+
Out of concerns that this model might be misused, I've built a classifier that tells the likelihood that an audio clip
|
138 |
+
came from Tortoise.
|
139 |
+
|
140 |
+
This classifier can be run on any computer, usage is as follows:
|
141 |
+
|
142 |
+
```commandline
|
143 |
+
python is_this_from_tortoise.py --clip=<path_to_suspicious_audio_file>
|
144 |
+
```
|
145 |
+
|
146 |
+
This model has 100% accuracy on the contents of the results/ and voices/ folders in this repo. Still, treat this classifier
|
147 |
+
as a "strong signal". Classifiers can be fooled and it is likewise not impossible for this classifier to exhibit false
|
148 |
+
positives.
|
149 |
+
|
150 |
## Model architecture
|
151 |
|
152 |
Tortoise TTS is inspired by OpenAI's DALLE, applied to speech data and using a better decoder. It is made up of 5 separate
|
|
|
201 |
If you are an ethical organization with computational resources to spare interested in seeing what this model could do
|
202 |
if properly scaled out, please reach out to me! I would love to collaborate on this.
|
203 |
|
204 |
+
## Acknowledgements
|
205 |
+
|
206 |
+
This project has garnered more praise than I expected. I am standing on the shoulders of giants, though, and I want to
|
207 |
+
credit a few of the amazing folks in the community that have helped make this happen:
|
208 |
+
|
209 |
+
- Hugging Face, who wrote the GPT model and the generate API used by Tortoise, and who hosts the model weights.
|
210 |
+
- [Ramesh et al](https://arxiv.org/pdf/2102.12092.pdf) who authored the DALLE paper, which is the inspiration behind Tortoise.
|
211 |
+
- [Nichol and Dhariwal](https://arxiv.org/pdf/2102.09672.pdf) who authored the (revision of) the code that drives the diffusion model.
|
212 |
+
- [Jang et al](https://arxiv.org/pdf/2106.07889.pdf) who developed and open-sourced univnet, the vocoder this repo uses.
|
213 |
+
- [lucidrains](https://github.com/lucidrains) who writes awesome open source pytorch models, many of which are used here.
|
214 |
+
- [Patrick von Platen](https://huggingface.co/patrickvonplaten) whose guides on setting up wav2vec were invaluable to building my dataset.
|
215 |
+
|
216 |
## Notice
|
217 |
|
218 |
Tortoise was built entirely by me using my own hardware. My employer was not involved in any facet of Tortoise's development.
|
api.py
CHANGED
@@ -154,7 +154,7 @@ def classify_audio_clip(clip):
|
|
154 |
:param clip: torch tensor containing audio waveform data (get it from load_audio)
|
155 |
:return: True if the clip was classified as coming from Tortoise and false if it was classified as real.
|
156 |
"""
|
157 |
-
download_models(['classifier'])
|
158 |
classifier = AudioMiniEncoderWithClassifierHead(2, spec_dim=1, embedding_dim=512, depth=5, downsample_factor=4,
|
159 |
resnet_blocks=2, attn_blocks=4, num_attn_heads=4, base_channels=32,
|
160 |
dropout=0, kernel_size=5, distribute_zero_label=False)
|
|
|
154 |
:param clip: torch tensor containing audio waveform data (get it from load_audio)
|
155 |
:return: True if the clip was classified as coming from Tortoise and false if it was classified as real.
|
156 |
"""
|
157 |
+
download_models(['classifier.pth'])
|
158 |
classifier = AudioMiniEncoderWithClassifierHead(2, spec_dim=1, embedding_dim=512, depth=5, downsample_factor=4,
|
159 |
resnet_blocks=2, attn_blocks=4, num_attn_heads=4, base_channels=32,
|
160 |
dropout=0, kernel_size=5, distribute_zero_label=False)
|
requirements.txt
CHANGED
@@ -7,4 +7,6 @@ inflect
|
|
7 |
progressbar
|
8 |
einops
|
9 |
unidecode
|
10 |
-
entmax
|
|
|
|
|
|
7 |
progressbar
|
8 |
einops
|
9 |
unidecode
|
10 |
+
entmax
|
11 |
+
scipy
|
12 |
+
librosa
|
samples_generator.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# This script builds the sample webpage.
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
result = "<html><head><title>These words were never spoken.</title></head><body><h1>Handpicked results</h1>"
|
7 |
+
for fv in os.listdir('results/favorites'):
|
8 |
+
url = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/favorites/{fv}'
|
9 |
+
result = result + f'<audio controls="" style="width: 600px;"><source src="{url}" type="audio/mp3"></audio><br>\n'
|
10 |
+
|
11 |
+
result = result + "<h1>Handpicked longform result:<h1>"
|
12 |
+
url = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/favorite_riding_hood.mp3'
|
13 |
+
result = result + f'<audio controls="" style="width: 600px;"><source src="{url}" type="audio/mp3"></audio><br>\n'
|
14 |
+
|
15 |
+
result = result + "<h1>Compared to Tacotron2 (with the LJSpeech voice):</h1><table><th>Tacotron2+Waveglow</th><th>TorToiSe</th>"
|
16 |
+
for k in range(2,5,1):
|
17 |
+
url1 = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/tacotron_comparison/{k}-tacotron2.mp3'
|
18 |
+
url2 = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/tacotron_comparison/{k}-tortoise.mp3'
|
19 |
+
result = result + f'<tr><td><audio controls="" style="width: 300px;"><source src="{url1}" type="audio/mp3"></audio><br>\n</td>' \
|
20 |
+
f'<td><audio controls="" style="width: 300px;"><source src="{url2}" type="audio/mp3"></audio><br>\n</td></tr>'
|
21 |
+
result = result + "</table>"
|
22 |
+
|
23 |
+
result = result + "<h1>Various spoken texts for all voices:<h1>"
|
24 |
+
voices = ['angie', 'daniel', 'deniro', 'emma', 'freeman', 'geralt', 'halle', 'jlaw', 'lj', 'myself',
|
25 |
+
'pat', 'snakes', 'tom', 'train_atkins', 'train_dotrice', 'train_kennard', 'weaver', 'william']
|
26 |
+
lines = ['<table><th>text</th>' + ''.join([f'<th>{v}</th>' for v in voices])]
|
27 |
+
line = f'<tr><td>reference clip</td>'
|
28 |
+
for v in voices:
|
29 |
+
url = f'https://github.com/neonbjb/tortoise-tts/raw/main/voices/{v}/1.wav'
|
30 |
+
line = line + f'<td><audio controls="" style="width: 150px;"><source src="{url}" type="audio/mp3"></audio></td>'
|
31 |
+
line = line + "</tr>"
|
32 |
+
lines.append(line)
|
33 |
+
for txt in os.listdir('results/various/'):
|
34 |
+
if 'desktop' in txt:
|
35 |
+
continue
|
36 |
+
line = f'<tr><td>{txt}</td>'
|
37 |
+
for v in voices:
|
38 |
+
url = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/various/{txt}/{v}.mp3'
|
39 |
+
line = line + f'<td><audio controls="" style="width: 150px;"><source src="{url}" type="audio/mp3"></audio></td>'
|
40 |
+
line = line + "</tr>"
|
41 |
+
lines.append(line)
|
42 |
+
result = result + '\n'.join(lines) + "</table>"
|
43 |
+
|
44 |
+
result = result + "<h1>Longform result for all voices:</h1>"
|
45 |
+
for lf in os.listdir('results/riding_hood'):
|
46 |
+
url = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/riding_hood/{lf}'
|
47 |
+
result = result + f'<audio controls="" style="width: 600px;"><source src="{url}" type="audio/mp3"></audio><br>\n'
|
48 |
+
|
49 |
+
result = result + "</body></html>"
|
50 |
+
with open('result.html', 'w', encoding='utf-8') as f:
|
51 |
+
f.write(result)
|
tortoise_v2_examples.html
CHANGED
@@ -24,6 +24,7 @@
|
|
24 |
</td></tr><tr><td><audio controls="" style="width: 300px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/tacotron_comparison/4-tacotron2.mp3" type="audio/mp3"></audio><br>
|
25 |
</td><td><audio controls="" style="width: 300px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/tacotron_comparison/4-tortoise.mp3" type="audio/mp3"></audio><br>
|
26 |
</td></tr></table><h1>Various spoken texts for all voices:<h1><table><th>text</th><th>angie</th><th>daniel</th><th>deniro</th><th>emma</th><th>freeman</th><th>geralt</th><th>halle</th><th>jlaw</th><th>lj</th><th>myself</th><th>pat</th><th>snakes</th><th>tom</th><th>train_atkins</th><th>train_dotrice</th><th>train_kennard</th><th>weaver</th><th>william</th>
|
|
|
27 |
<tr><td>autoregressive_ml</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/angie.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/daniel.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/deniro.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/emma.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/freeman.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/geralt.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/halle.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/jlaw.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/lj.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/myself.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/pat.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/snakes.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/tom.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/train_atkins.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/train_dotrice.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/train_kennard.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/weaver.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/william.mp3" type="audio/mp3"></audio></td></tr>
|
28 |
<tr><td>bengio_it_needs_to_know_what_is_bad</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/angie.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/daniel.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/deniro.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/emma.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/freeman.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/geralt.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/halle.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/jlaw.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/lj.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/myself.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/pat.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/snakes.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/tom.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/train_atkins.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/train_dotrice.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/train_kennard.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/weaver.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/william.mp3" type="audio/mp3"></audio></td></tr>
|
29 |
<tr><td>dickinson_stop_for_death</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/angie.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/daniel.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/deniro.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/emma.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/freeman.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/geralt.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/halle.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/jlaw.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/lj.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/myself.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/pat.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/snakes.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/tom.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/train_atkins.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/train_dotrice.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/train_kennard.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/weaver.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/william.mp3" type="audio/mp3"></audio></td></tr>
|
|
|
24 |
</td></tr><tr><td><audio controls="" style="width: 300px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/tacotron_comparison/4-tacotron2.mp3" type="audio/mp3"></audio><br>
|
25 |
</td><td><audio controls="" style="width: 300px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/tacotron_comparison/4-tortoise.mp3" type="audio/mp3"></audio><br>
|
26 |
</td></tr></table><h1>Various spoken texts for all voices:<h1><table><th>text</th><th>angie</th><th>daniel</th><th>deniro</th><th>emma</th><th>freeman</th><th>geralt</th><th>halle</th><th>jlaw</th><th>lj</th><th>myself</th><th>pat</th><th>snakes</th><th>tom</th><th>train_atkins</th><th>train_dotrice</th><th>train_kennard</th><th>weaver</th><th>william</th>
|
27 |
+
<tr><td>reference clip</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/angie/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/daniel/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/deniro/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/emma/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/freeman/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/geralt/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/halle/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/jlaw/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/lj/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/myself/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/pat/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/snakes/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/tom/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/train_atkins/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/train_dotrice/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/train_kennard/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/weaver/1.wav" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/voices/william/1.wav" type="audio/mp3"></audio></td></tr>
|
28 |
<tr><td>autoregressive_ml</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/angie.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/daniel.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/deniro.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/emma.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/freeman.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/geralt.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/halle.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/jlaw.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/lj.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/myself.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/pat.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/snakes.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/tom.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/train_atkins.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/train_dotrice.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/train_kennard.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/weaver.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/autoregressive_ml/william.mp3" type="audio/mp3"></audio></td></tr>
|
29 |
<tr><td>bengio_it_needs_to_know_what_is_bad</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/angie.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/daniel.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/deniro.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/emma.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/freeman.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/geralt.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/halle.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/jlaw.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/lj.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/myself.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/pat.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/snakes.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/tom.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/train_atkins.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/train_dotrice.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/train_kennard.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/weaver.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/bengio_it_needs_to_know_what_is_bad/william.mp3" type="audio/mp3"></audio></td></tr>
|
30 |
<tr><td>dickinson_stop_for_death</td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/angie.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/daniel.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/deniro.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/emma.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/freeman.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/geralt.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/halle.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/jlaw.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/lj.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/myself.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/pat.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/snakes.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/tom.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/train_atkins.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/train_dotrice.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/train_kennard.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/weaver.mp3" type="audio/mp3"></audio></td><td><audio controls="" style="width: 150px;"><source src="https://github.com/neonbjb/tortoise-tts/raw/main/results/various/dickinson_stop_for_death/william.mp3" type="audio/mp3"></audio></td></tr>
|
utils/audio.py
CHANGED
@@ -87,7 +87,7 @@ def get_voices():
|
|
87 |
for sub in subs:
|
88 |
subj = os.path.join('voices', sub)
|
89 |
if os.path.isdir(subj):
|
90 |
-
voices[sub] = glob(f'{subj}/*.wav')
|
91 |
return voices
|
92 |
|
93 |
|
|
|
87 |
for sub in subs:
|
88 |
subj = os.path.join('voices', sub)
|
89 |
if os.path.isdir(subj):
|
90 |
+
voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3'))
|
91 |
return voices
|
92 |
|
93 |
|