Spaces:
Runtime error
Runtime error
File size: 2,126 Bytes
614861a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import base64
from abc import ABC, abstractmethod
from gtts import gTTS
from io import BytesIO
import numpy as np
class ExpressiveModel(ABC):
@abstractmethod
def load(self):
pass
@abstractmethod
def synthesize(self, text: str, emotion: str):
"""
Synthesis audio with emotion
:param text: (str)
:param emotion: (str) neutral | happy | ...
:return: np.array
"""
pass
class StyleTransferModel(ABC):
@abstractmethod
def load(self):
pass
@abstractmethod
def synthesize(self, text: str, ref_audio):
"""
Synthesis audio with reference audio
:param text: (str)
:param ref_audio: (np.array)
:return: np.array
"""
pass
class TTSService:
"""
Get input text (str), emotion label (str) or reference audio (np.array)
Synthesis audio (np.array)
Convert audio to base64
"""
@staticmethod
def synthesis(text: str) -> str:
tts = gTTS(text)
# Using in-memory handling
audio_data = BytesIO()
tts.write_to_fp(audio_data)
encoded_audio = base64.b64encode(audio_data.getvalue()).decode('utf-8')
return encoded_audio
#
# tts.save("output.mp3")
# with open("output.mp3", "rb") as audio_file:
# audio_data = audio_file.read()
# encoded_audio = base64.b64encode(audio_data).decode('utf-8')
# return encoded_audio
@staticmethod
def transfer(input_text: str, ref_audio: np.array) -> str:
# Process reference audio
# ..
# np.array to audio
# tts_output_np_array = np.array([0, 1, 0, 1])
# tts_output_bytes = tts_output_np_array.tobytes()
# audio_data = base64.b64encode(tts_output_bytes).decode('utf-8')
# return audio_data
#
# example
tts_text = input_text
tts = gTTS(tts_text)
audio_data = BytesIO()
tts.write_to_fp(audio_data)
encoded_audio = base64.b64encode(audio_data.getvalue()).decode('utf-8')
return encoded_audio |