ytdlp_subtitle_dev / fetchYoutubeSubtitle.py
lanbogao's picture
Fix item.keys not list.
4fb8d8a
raw
history blame
2.69 kB
import json
from typing import Optional
import yt_dlp
def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"):
langs = item.keys()
key = lang if lang in langs else ('en' if 'en' in langs else list(langs)[0] )
for item in langs[key]:
if(item.get("ext") == vttType):
return item.get("url")
return None
async def fetchSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]:
ydl_opts = {
"writesubtitles": True,
"allsubtitles": True,
"subtitleslangs": [lang] if lang else [],
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
if info_dict.get("subtitles"):
# get first available subtitle
subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType)
with ydl.urlopen(subtitle_url) as subtitle:
return subtitle.read().decode()
if info_dict.get("automatic_captions"):
subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), lang, vttType)
with ydl.urlopen(subtitle_url) as subtitle:
return subtitle.read().decode()
return None
async def fetchSubtitleUrls(url: str) -> json:
ydl_opts = {
"writesubtitles": True,
"allsubtitles": True,
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
if info_dict.get("subtitles"):
langs = info_dict.get("subtitles").keys()
if not (len(langs) == 1 and "live_chat" in langs):
return info_dict.get("subtitles")
if info_dict.get("automatic_captions"):
return info_dict.get("automatic_captions")
return None
def get_subtitle(url, lang='en'):
if lang is None:
lang = 'en'
# Download subtitles if available
ydl_opts = {
'writesubtitles': True,
'outtmpl': '%(id)s.%(ext)s',
'subtitleslangs': [lang],
'skip_download': True,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
video_id = info_dict.get("id", None)
if video_id is None:
return None
subtitle_file = f"{video_id}.{lang}.vtt"
with open(subtitle_file, 'r') as f:
subtitle_content = f.read()
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
return subtitle_content
except error:
print(error)
return None
return None