whisper-demo-es-medium

Build error

App Files Files Community

juancopi81 commited on Jan 19, 2023

Commit

8400511

•

1 Parent(s): 16a0358

Support for longer videos

Browse files

Files changed (3) hide show

app.py +26 -2
textprocessor.py +3 -3
utils.py +5 -1

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 from typing import Any
 import torch
 from transformers import pipeline
@@ -14,6 +15,9 @@ from textprocessor import TextProcessor
 from videocreator import VideoCreator
 from share_btn import community_icon_html, loading_icon_html, share_js
 spanish_transcribe_model = "juancopi81/whisper-medium-es"
 languages = {"Spanish": "es", "English": "en"}
@@ -82,8 +86,28 @@ def datapipeline(url: str,
         return "Language not supported"
     transcribed_text = audio_transcriber.transcribe(audio_path_file)
     print("Audio transcription ready!")
-    json_scenes = text_processor.get_json_scenes(transcribed_text,
-                                                 summary_language)
     print("Scenes ready")
     video = video_creator.create_video(json_scenes, video_styles)
     print("Video at", video)

 import gradio as gr
 from typing import Any
+import math
 import torch
 from transformers import pipeline
 from videocreator import VideoCreator
 from share_btn import community_icon_html, loading_icon_html, share_js
+MAX_NUM_WORDS = 20000
+MAX_CHUNK_LENGTH = 600
 spanish_transcribe_model = "juancopi81/whisper-medium-es"
 languages = {"Spanish": "es", "English": "en"}
         return "Language not supported"
     transcribed_text = audio_transcriber.transcribe(audio_path_file)
     print("Audio transcription ready!")
+    # Get total number of words in text
+    num_words_transcription = len(transcribed_text.split())
+    if num_words_transcription > MAX_NUM_WORDS:
+        print("to add return here")
+    if num_words_transcription > MAX_CHUNK_LENGTH:
+        num_chunks = math.ceil(num_words_transcription / MAX_CHUNK_LENGTH)
+        num_words_per_chunk = num_words_transcription // num_chunks
+        chunks = utils.splitter(num_words_per_chunk, transcribed_text)
+        json_scenes = {}
+        for chunk in chunks:
+            if len(chunk.split()) > 50:
+                max_key = max(json_scenes.keys(), default=0)
+                chunk_scenes = text_processor.get_json_scenes(chunk,
+                                                              summary_language)
+                chunk_scenes = {k+max_key: v for k, v in chunk_scenes.items()}
+                json_scenes.update(chunk_scenes)
+    else:
+        json_scenes = text_processor.get_json_scenes(transcribed_text,
+                                                     summary_language)
     print("Scenes ready")
     video = video_creator.create_video(json_scenes, video_styles)
     print("Video at", video)

textprocessor.py CHANGED Viewed

@@ -9,10 +9,10 @@ You are a creator of illustrated books building a series of scenes for your book
 Your boss asked you to write a summary and illustrations of this text:
 $TRANSCRIPTION
 You have to write the summary using a maximum of 7 scenes in a JSON object following these instructions:
-[{Scene": int, "Summary": $SUMMARY_LANGUAGE str, "Illustration": English str}, ...] where:
 "Scene": The number of the scene.
-"Summary": $SUMMARY_LANGUAGE string with a summary of the scene. It should be in $SUMMARY_LANGUAGE, and it should be less than 30 words. Readers should understand it without looking at the illustration.
-"Illustration": English string with a detailed English description of an illustration for this scene. It must be written in English and in less than 20 words. It should include many details and an artistic style for the image that matches the text.
 Just answer with the JSON object:
 """

 Your boss asked you to write a summary and illustrations of this text:
 $TRANSCRIPTION
 You have to write the summary using a maximum of 7 scenes in a JSON object following these instructions:
+[{"Scene": int, "Summary": "$SUMMARY_LANGUAGE str", "Illustration": "English str"}, ...] where:
 "Scene": The number of the scene.
+"Summary": $SUMMARY_LANGUAGE string with a summary of the scene. It should be in $SUMMARY_LANGUAGE, and it should be less than 50 words. Readers should understand it without looking at the illustration.
+"Illustration": English string with a description of an illustration for this scene in less than 20 words. It should represent the scene, but it should be understandable without the context of the scene.
 Just answer with the JSON object:
 """

utils.py CHANGED Viewed

@@ -3,4 +3,8 @@ def is_google_colab():
         import google.colab
         return True
     except:
-        return False

         import google.colab
         return True
     except:
+        return False
+def splitter(n, s):
+    pieces = s.split()
+    return (" ".join(pieces[i:i+n]) for i in range(0, len(pieces), n))