import json from typing import Optional import yt_dlp def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"): langs = item.keys() key = lang if langs.get(lang) else ('en' if langs.get('en') else langs[0] ) for item in langs[key]: if(item.get("ext") == type): return item.get("url") return None def getSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]: ydl_opts = { "writesubtitles": True, "allsubtitles": True, "subtitleslangs": [lang] if lang else [], "skip_download": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=False) if info_dict.get("subtitles"): # get first available subtitle subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType) with ydl.urlopen(subtitle_url) as subtitle: return subtitle.read().decode() if info_dict.get("automatic_captions"): subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), lang, vttType) with ydl.urlopen(subtitle_url) as subtitle: return subtitle.read().decode() return None def fetchSubtitleUrls(url: str) -> json: ydl_opts = { "writesubtitles": True, "allsubtitles": True, "skip_download": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=False) if info_dict.get("subtitles"): langs = info_dict.get("subtitles").keys() if not (langs.length == 1 and "live_chat" in langs): return info_dict.get("subtitles") if info_dict.get("automatic_captions"): return info_dict.get("automatic_captions") return None def get_subtitle(url, lang='en'): if lang is None: lang = 'en' # Download subtitles if available ydl_opts = { 'writesubtitles': True, 'outtmpl': '%(id)s.%(ext)s', 'subtitleslangs': [lang], 'skip_download': True, } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) video_id = info_dict.get("id", None) if video_id is None: return None subtitle_file = f"{video_id}.{lang}.vtt" with open(subtitle_file, 'r') as f: subtitle_content = f.read() subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content) return subtitle_content except error: print(error) return None return None