jhj0517 commited on
Commit
b554e58
2 Parent(s): 1298c67 b4b9205

Merge pull request #67 from aierlma/master

Browse files

Added the large-v3 option to some files, making it easier to run the repository directly

app.py CHANGED
@@ -30,7 +30,7 @@ class App:
30
 
31
  @staticmethod
32
  def on_change_models(model_size: str):
33
- translatable_model = ["large", "large-v1", "large-v2"]
34
  if model_size not in translatable_model:
35
  return gr.Checkbox.update(visible=False, value=False, interactive=False)
36
  else:
@@ -46,7 +46,7 @@ class App:
46
  with gr.Row():
47
  input_file = gr.Files(type="file", label="Upload File here")
48
  with gr.Row():
49
- dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
50
  label="Model")
51
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
52
  value="Automatic Detection", label="Language")
@@ -84,7 +84,7 @@ class App:
84
  tb_title = gr.Label(label="Youtube Title")
85
  tb_description = gr.Textbox(label="Youtube Description", max_lines=15)
86
  with gr.Row():
87
- dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
88
  label="Model")
89
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
90
  value="Automatic Detection", label="Language")
@@ -119,7 +119,7 @@ class App:
119
  with gr.Row():
120
  mic_input = gr.Microphone(label="Record with Mic", type="filepath", interactive=True)
121
  with gr.Row():
122
- dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
123
  label="Model")
124
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
125
  value="Automatic Detection", label="Language")
 
30
 
31
  @staticmethod
32
  def on_change_models(model_size: str):
33
+ translatable_model = ["large", "large-v1", "large-v2", "large-v3"]
34
  if model_size not in translatable_model:
35
  return gr.Checkbox.update(visible=False, value=False, interactive=False)
36
  else:
 
46
  with gr.Row():
47
  input_file = gr.Files(type="file", label="Upload File here")
48
  with gr.Row():
49
+ dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
50
  label="Model")
51
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
52
  value="Automatic Detection", label="Language")
 
84
  tb_title = gr.Label(label="Youtube Title")
85
  tb_description = gr.Textbox(label="Youtube Description", max_lines=15)
86
  with gr.Row():
87
+ dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
88
  label="Model")
89
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
90
  value="Automatic Detection", label="Language")
 
119
  with gr.Row():
120
  mic_input = gr.Microphone(label="Record with Mic", type="filepath", interactive=True)
121
  with gr.Row():
122
+ dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v3",
123
  label="Model")
124
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
125
  value="Automatic Detection", label="Language")
modules/faster_whisper_inference.py CHANGED
@@ -24,7 +24,7 @@ class FasterWhisperInference(BaseInterface):
24
  self.model = None
25
  self.available_models = whisper.available_models()
26
  self.available_langs = sorted(list(whisper.tokenizer.LANGUAGES.values()))
27
- self.translatable_models = ["large", "large-v1", "large-v2"]
28
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
29
  self.available_compute_types = ctranslate2.get_supported_compute_types("cuda") if self.device == "cuda" else ctranslate2.get_supported_compute_types("cpu")
30
  self.current_compute_type = "float16" if self.device == "cuda" else "float32"
 
24
  self.model = None
25
  self.available_models = whisper.available_models()
26
  self.available_langs = sorted(list(whisper.tokenizer.LANGUAGES.values()))
27
+ self.translatable_models = ["large", "large-v1", "large-v2", "large-v3"]
28
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
29
  self.available_compute_types = ctranslate2.get_supported_compute_types("cuda") if self.device == "cuda" else ctranslate2.get_supported_compute_types("cpu")
30
  self.current_compute_type = "float16" if self.device == "cuda" else "float32"
modules/whisper_Inference.py CHANGED
@@ -11,7 +11,7 @@ from .base_interface import BaseInterface
11
  from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
12
  from modules.youtube_manager import get_ytdata, get_ytaudio
13
 
14
- DEFAULT_MODEL_SIZE = "large-v2"
15
 
16
 
17
  class WhisperInference(BaseInterface):
@@ -327,7 +327,7 @@ class WhisperInference(BaseInterface):
327
  if lang == "Automatic Detection":
328
  lang = None
329
 
330
- translatable_model = ["large", "large-v1", "large-v2"]
331
  segments_result = self.model.transcribe(audio=audio,
332
  language=lang,
333
  verbose=False,
 
11
  from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
12
  from modules.youtube_manager import get_ytdata, get_ytaudio
13
 
14
+ DEFAULT_MODEL_SIZE = "large-v3"
15
 
16
 
17
  class WhisperInference(BaseInterface):
 
327
  if lang == "Automatic Detection":
328
  lang = None
329
 
330
+ translatable_model = ["large", "large-v1", "large-v2", "large-v3"]
331
  segments_result = self.model.transcribe(audio=audio,
332
  language=lang,
333
  verbose=False,