Spaces:
Running
on
T4
Running
on
T4
Add support for file size limits in audio and YouTube transcription, and use yt_dlp for video downloads
#6
by
alamin655
- opened
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import torch
|
2 |
-
|
3 |
import gradio as gr
|
4 |
-
import
|
5 |
from transformers import pipeline
|
|
|
6 |
|
7 |
MODEL_NAME = "openai/whisper-large-v2"
|
8 |
BATCH_SIZE = 8
|
@@ -35,7 +35,7 @@ def transcribe(microphone, file_upload, task):
|
|
35 |
elif (microphone is None) and (file_upload is None):
|
36 |
raise gr.Error("You have to either use the microphone or upload an audio file")
|
37 |
|
38 |
-
file_size_mb = os.stat(
|
39 |
if file_size_mb > FILE_LIMIT_MB:
|
40 |
raise gr.Error(
|
41 |
f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
|
@@ -59,25 +59,19 @@ def _return_yt_html_embed(yt_url):
|
|
59 |
return HTML_str
|
60 |
|
61 |
|
62 |
-
def yt_transcribe(yt_url, task, max_filesize=
|
63 |
-
|
64 |
-
html_embed_str = _return_yt_html_embed(yt_url)
|
65 |
-
for attempt in range(YT_ATTEMPT_LIMIT):
|
66 |
try:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
if stream.filesize_mb > max_filesize:
|
75 |
-
raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
|
76 |
-
|
77 |
pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
return html_embed_str, text
|
82 |
|
83 |
|
@@ -120,8 +114,8 @@ yt_transcribe = gr.Interface(
|
|
120 |
allow_flagging="never",
|
121 |
)
|
122 |
|
|
|
123 |
with demo:
|
124 |
gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
125 |
|
126 |
-
demo.launch(enable_queue=True)
|
127 |
-
|
|
|
1 |
import torch
|
|
|
2 |
import gradio as gr
|
3 |
+
import yt_dlp
|
4 |
from transformers import pipeline
|
5 |
+
import os
|
6 |
|
7 |
MODEL_NAME = "openai/whisper-large-v2"
|
8 |
BATCH_SIZE = 8
|
|
|
35 |
elif (microphone is None) and (file_upload is None):
|
36 |
raise gr.Error("You have to either use the microphone or upload an audio file")
|
37 |
|
38 |
+
file_size_mb = os.stat(file_upload).st_size / (1024 * 1024)
|
39 |
if file_size_mb > FILE_LIMIT_MB:
|
40 |
raise gr.Error(
|
41 |
f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
|
|
|
59 |
return HTML_str
|
60 |
|
61 |
|
62 |
+
def yt_transcribe(yt_url, task, max_filesize=FILE_LIMIT_MB):
|
63 |
+
with yt_dlp.YoutubeDL({'format': 'bestaudio/best'}) as ydl:
|
|
|
|
|
64 |
try:
|
65 |
+
info_dict = ydl.extract_info(yt_url, download=True)
|
66 |
+
a = ydl.prepare_filename(info_dict)
|
67 |
+
except Exception as e:
|
68 |
+
raise gr.Error(f"Error downloading YouTube video: {str(e)}")
|
69 |
+
html_embed_str = _return_yt_html_embed(yt_url)
|
70 |
+
if os.stat(a).st_size / (1024 * 1024) > max_filesize:
|
71 |
+
raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {os.stat(a).st_size / (1024 * 1024):.2f}MB.")
|
|
|
|
|
|
|
72 |
pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
|
73 |
+
text = pipe(a, batch_size=BATCH_SIZE)["text"]
|
74 |
+
os.remove(a)
|
|
|
75 |
return html_embed_str, text
|
76 |
|
77 |
|
|
|
114 |
allow_flagging="never",
|
115 |
)
|
116 |
|
117 |
+
|
118 |
with demo:
|
119 |
gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
120 |
|
121 |
+
demo.launch(enable_queue=True)
|
|