Spaces:
Sleeping
Sleeping
File size: 2,678 Bytes
093a866 ba9fae4 093a866 ba9fae4 093a866 ba9fae4 093a866 ba9fae4 13bd0f6 ba9fae4 093a866 ba9fae4 093a866 ba9fae4 093a866 ba9fae4 093a866 ba9fae4 093a866 ba9fae4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import json
from typing import Optional
import yt_dlp
def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"):
langs = item.keys()
key = lang if langs.get(lang) else ('en' if langs.get('en') else langs[0] )
for item in langs[key]:
if(item.get("ext") == type):
return item.get("url")
return None
def getSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]:
ydl_opts = {
"writesubtitles": True,
"allsubtitles": True,
"subtitleslangs": [lang] if lang else [],
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
if info_dict.get("subtitles"):
# get first available subtitle
subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType)
with ydl.urlopen(subtitle_url) as subtitle:
return subtitle.read().decode()
if info_dict.get("automatic_captions"):
subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), lang, vttType)
with ydl.urlopen(subtitle_url) as subtitle:
return subtitle.read().decode()
return None
def fetchSubtitleUrls(url: str) -> json:
ydl_opts = {
"writesubtitles": True,
"allsubtitles": True,
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
if info_dict.get("subtitles"):
langs = info_dict.get("subtitles").keys()
if not (langs.length == 1 and "live_chat" in langs):
return info_dict.get("subtitles")
if info_dict.get("automatic_captions"):
return info_dict.get("automatic_captions")
return None
def get_subtitle(url, lang='en'):
if lang is None:
lang = 'en'
# Download subtitles if available
ydl_opts = {
'writesubtitles': True,
'outtmpl': '%(id)s.%(ext)s',
'subtitleslangs': [lang],
'skip_download': True,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
video_id = info_dict.get("id", None)
if video_id is None:
return None
subtitle_file = f"{video_id}.{lang}.vtt"
with open(subtitle_file, 'r') as f:
subtitle_content = f.read()
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
return subtitle_content
except error:
print(error)
return None
return None |