Spaces:
Build error
Build error
juancopi81
commited on
Commit
•
8400511
1
Parent(s):
16a0358
Support for longer videos
Browse files- app.py +26 -2
- textprocessor.py +3 -3
- utils.py +5 -1
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from typing import Any
|
|
|
3 |
|
4 |
import torch
|
5 |
from transformers import pipeline
|
@@ -14,6 +15,9 @@ from textprocessor import TextProcessor
|
|
14 |
from videocreator import VideoCreator
|
15 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
16 |
|
|
|
|
|
|
|
17 |
spanish_transcribe_model = "juancopi81/whisper-medium-es"
|
18 |
languages = {"Spanish": "es", "English": "en"}
|
19 |
|
@@ -82,8 +86,28 @@ def datapipeline(url: str,
|
|
82 |
return "Language not supported"
|
83 |
transcribed_text = audio_transcriber.transcribe(audio_path_file)
|
84 |
print("Audio transcription ready!")
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
print("Scenes ready")
|
88 |
video = video_creator.create_video(json_scenes, video_styles)
|
89 |
print("Video at", video)
|
|
|
1 |
import gradio as gr
|
2 |
from typing import Any
|
3 |
+
import math
|
4 |
|
5 |
import torch
|
6 |
from transformers import pipeline
|
|
|
15 |
from videocreator import VideoCreator
|
16 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
17 |
|
18 |
+
MAX_NUM_WORDS = 20000
|
19 |
+
MAX_CHUNK_LENGTH = 600
|
20 |
+
|
21 |
spanish_transcribe_model = "juancopi81/whisper-medium-es"
|
22 |
languages = {"Spanish": "es", "English": "en"}
|
23 |
|
|
|
86 |
return "Language not supported"
|
87 |
transcribed_text = audio_transcriber.transcribe(audio_path_file)
|
88 |
print("Audio transcription ready!")
|
89 |
+
# Get total number of words in text
|
90 |
+
num_words_transcription = len(transcribed_text.split())
|
91 |
+
|
92 |
+
if num_words_transcription > MAX_NUM_WORDS:
|
93 |
+
print("to add return here")
|
94 |
+
|
95 |
+
if num_words_transcription > MAX_CHUNK_LENGTH:
|
96 |
+
num_chunks = math.ceil(num_words_transcription / MAX_CHUNK_LENGTH)
|
97 |
+
num_words_per_chunk = num_words_transcription // num_chunks
|
98 |
+
chunks = utils.splitter(num_words_per_chunk, transcribed_text)
|
99 |
+
json_scenes = {}
|
100 |
+
for chunk in chunks:
|
101 |
+
if len(chunk.split()) > 50:
|
102 |
+
max_key = max(json_scenes.keys(), default=0)
|
103 |
+
chunk_scenes = text_processor.get_json_scenes(chunk,
|
104 |
+
summary_language)
|
105 |
+
chunk_scenes = {k+max_key: v for k, v in chunk_scenes.items()}
|
106 |
+
json_scenes.update(chunk_scenes)
|
107 |
+
else:
|
108 |
+
json_scenes = text_processor.get_json_scenes(transcribed_text,
|
109 |
+
summary_language)
|
110 |
+
|
111 |
print("Scenes ready")
|
112 |
video = video_creator.create_video(json_scenes, video_styles)
|
113 |
print("Video at", video)
|
textprocessor.py
CHANGED
@@ -9,10 +9,10 @@ You are a creator of illustrated books building a series of scenes for your book
|
|
9 |
Your boss asked you to write a summary and illustrations of this text:
|
10 |
$TRANSCRIPTION
|
11 |
You have to write the summary using a maximum of 7 scenes in a JSON object following these instructions:
|
12 |
-
[{Scene": int, "Summary": $SUMMARY_LANGUAGE str, "Illustration": English str}, ...] where:
|
13 |
"Scene": The number of the scene.
|
14 |
-
"Summary": $SUMMARY_LANGUAGE string with a summary of the scene. It should be in $SUMMARY_LANGUAGE, and it should be less than
|
15 |
-
"Illustration": English string with a
|
16 |
Just answer with the JSON object:
|
17 |
"""
|
18 |
|
|
|
9 |
Your boss asked you to write a summary and illustrations of this text:
|
10 |
$TRANSCRIPTION
|
11 |
You have to write the summary using a maximum of 7 scenes in a JSON object following these instructions:
|
12 |
+
[{"Scene": int, "Summary": "$SUMMARY_LANGUAGE str", "Illustration": "English str"}, ...] where:
|
13 |
"Scene": The number of the scene.
|
14 |
+
"Summary": $SUMMARY_LANGUAGE string with a summary of the scene. It should be in $SUMMARY_LANGUAGE, and it should be less than 50 words. Readers should understand it without looking at the illustration.
|
15 |
+
"Illustration": English string with a description of an illustration for this scene in less than 20 words. It should represent the scene, but it should be understandable without the context of the scene.
|
16 |
Just answer with the JSON object:
|
17 |
"""
|
18 |
|
utils.py
CHANGED
@@ -3,4 +3,8 @@ def is_google_colab():
|
|
3 |
import google.colab
|
4 |
return True
|
5 |
except:
|
6 |
-
return False
|
|
|
|
|
|
|
|
|
|
3 |
import google.colab
|
4 |
return True
|
5 |
except:
|
6 |
+
return False
|
7 |
+
|
8 |
+
def splitter(n, s):
|
9 |
+
pieces = s.split()
|
10 |
+
return (" ".join(pieces[i:i+n]) for i in range(0, len(pieces), n))
|