Spaces:
Runtime error
Runtime error
StephaneBah
commited on
Commit
•
dc7109c
1
Parent(s):
3b6ee3b
init2
Browse files
app.py
CHANGED
@@ -1,4 +1,49 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
x = st.slider('Select a value')
|
4 |
-
st.write(x, 'squared is', x * x)
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
3 |
+
from diffusers import DiffusionPipeline
|
4 |
+
import torch
|
5 |
+
import accelerate
|
6 |
+
|
7 |
+
# Load the models and tokenizers
|
8 |
+
translation_model_name = "google/madlad400-3b-mt"
|
9 |
+
translation_model = AutoModelForSeq2SeqLM.from_pretrained(translation_model_name)
|
10 |
+
translation_tokenizer = AutoTokenizer.from_pretrained(translation_model_name)
|
11 |
+
|
12 |
+
transcription_model = "chrisjay/fonxlsr"
|
13 |
+
|
14 |
+
diffusion_model_name = "stabilityai/stable-diffusion-xl-base-1.0"
|
15 |
+
diffusion_pipeline = DiffusionPipeline.from_pretrained(diffusion_model_name, torch_dtype=torch.float16)
|
16 |
+
diffusion_pipeline = diffusion_pipeline.to("cuda")
|
17 |
+
|
18 |
+
# Define the translation and transcription pipeline with accelerate
|
19 |
+
translation_pipeline = pipeline("translation", model=translation_model, tokenizer=translation_tokenizer, device_map="auto")
|
20 |
+
transcription_pipeline = pipeline("automatic-speech-recognition", model=transcription_model, device_map="auto")
|
21 |
+
|
22 |
+
# Define the function for transcribing and translating audio in Fon
|
23 |
+
def transcribe_and_translate_audio_fon(audio_path, num_images=1):
|
24 |
+
# Transcribe the audio to Fon using the transcription pipeline
|
25 |
+
transcription_fon = transcription_pipeline(audio_path)["text"]
|
26 |
+
|
27 |
+
# Translate the Fon transcription to French using the translation pipeline
|
28 |
+
translation_result = translation_pipeline(transcription_fon, source_lang="fon", target_lang="fr")
|
29 |
+
translation_fr = translation_result[0]["translation_text"]
|
30 |
+
|
31 |
+
images = diffusion_pipeline(translation_fr, num_images_per_prompt=num_images)["images"]
|
32 |
+
|
33 |
+
return images
|
34 |
+
|
35 |
+
# Create a Streamlit app
|
36 |
+
st.title("Fon Audio to Image Translation")
|
37 |
+
|
38 |
+
# Upload audio file
|
39 |
+
audio_file = st.file_uploader("Upload an audio file", type=["wav"])
|
40 |
+
|
41 |
+
# Transcribe, translate and generate images
|
42 |
+
if audio_file:
|
43 |
+
images = transcribe_and_translate_audio_fon(audio_file)
|
44 |
+
st.image(images[0])
|
45 |
+
|
46 |
+
|
47 |
+
# Use Accelerate to distribute the computation across available GPUs
|
48 |
+
#images = accelerate.launch(transcribe_and_translate_and_generate, audio_file="Fongbe_Speech_Dataset/Fongbe_Speech_Dataset/fongbe_speech_audio_files/wav/64_fongbe_6b36d45b77344caeb1c8d773303c9dcb_for_validation_2022-03-11-23-50-13.wav", num_images=2)
|
49 |
|
|
|
|