whisper-large-v2

Running on T4

App Files Files Community

Add support for file size limits in audio and YouTube transcription, and use yt_dlp for video downloads

by alamin655 - opened May 1, 2023

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

+16

-22

Files changed (1) hide show

app.py +16 -22

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import torch
 import gradio as gr
-import pytube as pt
 from transformers import pipeline
 MODEL_NAME = "openai/whisper-large-v2"
 BATCH_SIZE = 8
@@ -35,7 +35,7 @@ def transcribe(microphone, file_upload, task):
     elif (microphone is None) and (file_upload is None):
         raise gr.Error("You have to either use the microphone or upload an audio file")
-    file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
     if file_size_mb > FILE_LIMIT_MB:
         raise gr.Error(
                 f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
@@ -59,25 +59,19 @@ def _return_yt_html_embed(yt_url):
     return HTML_str
-def yt_transcribe(yt_url, task, max_filesize=75.0):
-    yt = pt.YouTube(yt_url)
-    html_embed_str = _return_yt_html_embed(yt_url)
-    for attempt in range(YT_ATTEMPT_LIMIT):
         try:
-            yt = pytube.YouTube(yt_url)
-            stream = yt.streams.filter(only_audio=True)[0]
-            break
-        except KeyError:
-            if attempt + 1 == YT_ATTEMPT_LIMIT:
-                raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
-    if stream.filesize_mb > max_filesize:
-        raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
     pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
-    text = pipe("audio.mp3", batch_size=BATCH_SIZE)["text"]
     return html_embed_str, text
@@ -120,8 +114,8 @@ yt_transcribe = gr.Interface(
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
-demo.launch(enable_queue=True)

 import torch
 import gradio as gr
+import yt_dlp
 from transformers import pipeline
+import os
 MODEL_NAME = "openai/whisper-large-v2"
 BATCH_SIZE = 8
     elif (microphone is None) and (file_upload is None):
         raise gr.Error("You have to either use the microphone or upload an audio file")
+    file_size_mb = os.stat(file_upload).st_size / (1024 * 1024)
     if file_size_mb > FILE_LIMIT_MB:
         raise gr.Error(
                 f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
     return HTML_str
+def yt_transcribe(yt_url, task, max_filesize=FILE_LIMIT_MB):
+    with yt_dlp.YoutubeDL({'format': 'bestaudio/best'}) as ydl:
         try:
+            info_dict = ydl.extract_info(yt_url, download=True)
+            a = ydl.prepare_filename(info_dict)
+        except Exception as e:
+            raise gr.Error(f"Error downloading YouTube video: {str(e)}")
+    html_embed_str = _return_yt_html_embed(yt_url)
+    if os.stat(a).st_size / (1024 * 1024) > max_filesize:
+        raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {os.stat(a).st_size / (1024 * 1024):.2f}MB.")
     pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
+    text = pipe(a, batch_size=BATCH_SIZE)["text"]
+    os.remove(a)
     return html_embed_str, text
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
+demo.launch(enable_queue=True)