Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Jun 3, 2023

Commit

9390f92

•

2 Parent(s): f282b7c 67cc6b1

Merge pull request #15 from damho1104/mitigate-cuda-out-of-memory

Browse files

Files changed (3) hide show

modules/base_interface.py +20 -0
modules/nllb_inference.py +60 -53
modules/whisper_Inference.py +12 -11

modules/base_interface.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+import torch
+from typing import List
+class BaseInterface:
+    def __init__(self):
+        pass
+    @staticmethod
+    def release_cuda_memory():
+        torch.cuda.empty_cache()
+        torch.cuda.reset_max_memory_allocated()
+    @staticmethod
+    def remove_input_files(file_paths: List[str]):
+        for file_path in file_paths:
+            if not os.path.exists(file_path):
+                continue
+            os.remove(file_path)

modules/nllb_inference.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import gradio as gr
 import torch
@@ -10,8 +11,9 @@ DEFAULT_MODEL_SIZE = "facebook/nllb-200-1.3B"
 NLLB_MODELS = ["facebook/nllb-200-3.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-distilled-600M"]
-class NLLBInference:
     def __init__(self):
         self.default_model_size = DEFAULT_MODEL_SIZE
         self.current_model_size = None
         self.model = None
@@ -29,69 +31,74 @@ class NLLBInference:
     def translate_file(self, fileobjs
                        , model_size, src_lang, tgt_lang,
                        progress=gr.Progress()):
-        if model_size != self.current_model_size or self.model is None:
-            print("\nInitializing NLLB Model..\n")
-            progress(0, desc="Initializing NLLB Model..")
-            self.current_model_size = model_size
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path=model_size,
-                                                               cache_dir="models/NLLB")
-            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_size,
-                                                           cache_dir=f"models/NLLB/tokenizers")
-        src_lang = NLLB_AVAILABLE_LANGS[src_lang]
-        tgt_lang = NLLB_AVAILABLE_LANGS[tgt_lang]
-        self.pipeline = pipeline("translation",
-                                 model=self.model,
-                                 tokenizer=self.tokenizer,
-                                 src_lang=src_lang,
-                                 tgt_lang=tgt_lang,
-                                 device=self.device)
-        files_info = {}
-        for fileobj in fileobjs:
-            file_path = fileobj.name
-            file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
-            if file_ext == ".srt":
-                parsed_dicts = parse_srt(file_path=file_path)
-                total_progress = len(parsed_dicts)
-                for index, dic in enumerate(parsed_dicts):
-                    progress(index / total_progress, desc="Translating..")
-                    translated_text = self.translate_text(dic["sentence"])
-                    dic["sentence"] = translated_text
-                subtitle = get_serialized_srt(parsed_dicts)
-                timestamp = datetime.now().strftime("%m%d%H%M%S")
-                file_name = file_name[:-9]
-                output_path = f"outputs/translations/{file_name}-{timestamp}"
-                write_file(subtitle, f"{output_path}.srt")
-            elif file_ext == ".vtt":
-                parsed_dicts = parse_vtt(file_path=file_path)
-                total_progress = len(parsed_dicts)
-                for index, dic in enumerate(parsed_dicts):
-                    progress(index / total_progress, desc="Translating..")
-                    translated_text = self.translate_text(dic["sentence"])
-                    dic["sentence"] = translated_text
-                subtitle = get_serialized_vtt(parsed_dicts)
-                timestamp = datetime.now().strftime("%m%d%H%M%S")
-                file_name = file_name[:-9]
-                output_path = f"outputs/translations/{file_name}-{timestamp}"
-                write_file(subtitle, f"{output_path}.vtt")
-            files_info[file_name] = subtitle
-        total_result = ''
-        for file_name, subtitle in files_info.items():
-            total_result += '------------------------------------\n'
-            total_result += f'{file_name}\n\n'
-            total_result += f'{subtitle}'
-        return f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
 NLLB_AVAILABLE_LANGS = {

+from .base_interface import BaseInterface
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import gradio as gr
 import torch
 NLLB_MODELS = ["facebook/nllb-200-3.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-distilled-600M"]
+class NLLBInference(BaseInterface):
     def __init__(self):
+        super().__init__()
         self.default_model_size = DEFAULT_MODEL_SIZE
         self.current_model_size = None
         self.model = None
     def translate_file(self, fileobjs
                        , model_size, src_lang, tgt_lang,
                        progress=gr.Progress()):
+        try:
+            if model_size != self.current_model_size or self.model is None:
+                print("\nInitializing NLLB Model..\n")
+                progress(0, desc="Initializing NLLB Model..")
+                self.current_model_size = model_size
+                self.model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path=model_size,
+                                                                   cache_dir="models/NLLB")
+                self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_size,
+                                                               cache_dir=f"models/NLLB/tokenizers")
+            src_lang = NLLB_AVAILABLE_LANGS[src_lang]
+            tgt_lang = NLLB_AVAILABLE_LANGS[tgt_lang]
+            self.pipeline = pipeline("translation",
+                                     model=self.model,
+                                     tokenizer=self.tokenizer,
+                                     src_lang=src_lang,
+                                     tgt_lang=tgt_lang,
+                                     device=self.device)
+            files_info = {}
+            for fileobj in fileobjs:
+                file_path = fileobj.name
+                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
+                if file_ext == ".srt":
+                    parsed_dicts = parse_srt(file_path=file_path)
+                    total_progress = len(parsed_dicts)
+                    for index, dic in enumerate(parsed_dicts):
+                        progress(index / total_progress, desc="Translating..")
+                        translated_text = self.translate_text(dic["sentence"])
+                        dic["sentence"] = translated_text
+                    subtitle = get_serialized_srt(parsed_dicts)
+                    timestamp = datetime.now().strftime("%m%d%H%M%S")
+                    file_name = file_name[:-9]
+                    output_path = f"outputs/translations/{file_name}-{timestamp}"
+                    write_file(subtitle, f"{output_path}.srt")
+                elif file_ext == ".vtt":
+                    parsed_dicts = parse_vtt(file_path=file_path)
+                    total_progress = len(parsed_dicts)
+                    for index, dic in enumerate(parsed_dicts):
+                        progress(index / total_progress, desc="Translating..")
+                        translated_text = self.translate_text(dic["sentence"])
+                        dic["sentence"] = translated_text
+                    subtitle = get_serialized_vtt(parsed_dicts)
+                    timestamp = datetime.now().strftime("%m%d%H%M%S")
+                    file_name = file_name[:-9]
+                    output_path = f"outputs/translations/{file_name}-{timestamp}"
+                    write_file(subtitle, f"{output_path}.vtt")
+                files_info[file_name] = subtitle
+            total_result = ''
+            for file_name, subtitle in files_info.items():
+                total_result += '------------------------------------\n'
+                total_result += f'{file_name}\n\n'
+                total_result += f'{subtitle}'
+            return f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
+        except Exception as e:
+            return f"Error: {str(e)}"
+        finally:
+            self.release_cuda_memory()
+            self.remove_input_files([fileobj.name for fileobj in fileobjs])
 NLLB_AVAILABLE_LANGS = {

modules/whisper_Inference.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import whisper
 from modules.subtitle_manager import get_srt, get_vtt, write_file, safe_filename
 from modules.youtube_manager import get_ytdata, get_ytaudio
 import gradio as gr
@@ -8,8 +9,9 @@ from datetime import datetime
 DEFAULT_MODEL_SIZE = "large-v2"
-class WhisperInference:
     def __init__(self):
         self.current_model_size = None
         self.model = None
         self.available_models = whisper.available_models()
@@ -71,11 +73,10 @@ class WhisperInference:
             return f"Done! Subtitle is in the outputs folder.\n\n{total_result}"
         except Exception as e:
-            return str(e)
         finally:
-            for fileobj in fileobjs:
-                if os.path.exists(fileobj.name):
-                    os.remove(fileobj.name)
     def transcribe_youtube(self, youtubelink,
                            model_size, lang, subformat, istranslate,
@@ -120,12 +121,12 @@ class WhisperInference:
             return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
-            return str(e)
         finally:
             yt = get_ytdata(youtubelink)
             file_path = get_ytaudio(yt)
-            if os.path.exists(file_path):
-                os.remove(file_path)
     def transcribe_mic(self, micaudio,
                        model_size, lang, subformat, istranslate,
@@ -167,7 +168,7 @@ class WhisperInference:
             return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
-            print(str(e))
         finally:
-            if os.path.exists(micaudio):
-                os.remove(micaudio)

 import whisper
+from .base_interface import BaseInterface
 from modules.subtitle_manager import get_srt, get_vtt, write_file, safe_filename
 from modules.youtube_manager import get_ytdata, get_ytaudio
 import gradio as gr
 DEFAULT_MODEL_SIZE = "large-v2"
+class WhisperInference(BaseInterface):
     def __init__(self):
+        super().__init__()
         self.current_model_size = None
         self.model = None
         self.available_models = whisper.available_models()
             return f"Done! Subtitle is in the outputs folder.\n\n{total_result}"
         except Exception as e:
+            return f"Error: {str(e)}"
         finally:
+            self.release_cuda_memory()
+            self.remove_input_files([fileobj.name for fileobj in fileobjs])
     def transcribe_youtube(self, youtubelink,
                            model_size, lang, subformat, istranslate,
             return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
+            return f"Error: {str(e)}"
         finally:
             yt = get_ytdata(youtubelink)
             file_path = get_ytaudio(yt)
+            self.release_cuda_memory()
+            self.remove_input_files([file_path])
     def transcribe_mic(self, micaudio,
                        model_size, lang, subformat, istranslate,
             return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
         except Exception as e:
+            return f"Error: {str(e)}"
         finally:
+            self.release_cuda_memory()
+            self.remove_input_files([micaudio])