lanbogao commited on
Commit
c997914
1 Parent(s): d030b89

Add try except to fetchSubtitle and fetchSubtitleUrls.

Browse files
Files changed (1) hide show
  1. fetchYoutubeSubtitle.py +38 -31
fetchYoutubeSubtitle.py CHANGED
@@ -24,28 +24,21 @@ def getUrlFromSubtitles(item, lang='en', subType="vtt"):
24
  return None
25
 
26
  l = lang if lang in langs else ('en' if 'en' in langs else list(langs)[0] )
27
- print("getUrlFromSubtitles l: %s, item: %s" % (l, item))
 
28
 
29
  for subtitle in item[l]:
30
- print("getUrlFromSubtitles subtitle: %s" % subtitle)
31
  if l != "live_chat" and subType =="xml":
32
  return subtitle.get("url").replace("fmt="+subtitle.get("ext"),"")
33
  if subtitle.get("ext") == subType:
34
  return subtitle.get("url")
35
  return None
36
 
37
- async def fetchSubtitle(url: str, lang: Optional[str] = 'en', subType: Optional[str] = "vtt") -> Optional[str]:
38
- if subType == "srt":
39
- subtitle = await fetchSubtitlebyType(url, lang, subType, True)
40
- if subtitle:
41
- return subtitle
42
- subtitle = await fetchSubtitlebyType(url, lang, "xml", True)
43
- print(subtitle)
44
- return xml_caption_to_srt(subtitle)
45
- else:
46
- return await fetchSubtitlebyType(url, lang, subType, True)
47
 
48
- async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt", decode: bool = False) -> Optional[str]:
49
  ydl_opts = {
50
  "writesubtitles": True,
51
  "allsubtitles": True,
@@ -54,16 +47,25 @@ async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt
54
  "socket_timeout": 20
55
  }
56
 
57
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
58
- info_dict = ydl.extract_info(url, download=False)
59
- for subtitle_item in ["subtitles", "automatic_captions"]: # "requested_subtitles" item is dict
60
- if info_dict.get(subtitle_item) :
61
- subtitle_url = getUrlFromSubtitles(info_dict.get(subtitle_item), lang, subType)
62
- if subtitle_url:
63
- with ydl.urlopen(subtitle_url) as subtitle:
64
- return subtitle.read().decode() if decode else subtitle.read()
65
 
66
- return None
 
 
 
 
 
 
 
 
 
 
67
 
68
  def float_to_srt_time_format(d: float) -> str:
69
  """Convert decimal durations into proper srt format.
@@ -109,13 +111,18 @@ async def fetchSubtitleUrls(url: str) -> json:
109
  "allsubtitles": True,
110
  "skip_download": True,
111
  }
 
 
 
 
 
112
 
113
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
114
- info_dict = ydl.extract_info(url, download=False)
115
- if info_dict.get("subtitles"):
116
- langs = info_dict.get("subtitles").keys()
117
- if not (len(langs) == 1 and "live_chat" in langs):
118
- return info_dict.get("subtitles")
119
- if info_dict.get("automatic_captions"):
120
- return info_dict.get("automatic_captions")
121
- return None
 
24
  return None
25
 
26
  l = lang if lang in langs else ('en' if 'en' in langs else list(langs)[0] )
27
+ if l is None:
28
+ return
29
 
30
  for subtitle in item[l]:
31
+ # print("getUrlFromSubtitles subtitle: %s" % subtitle)
32
  if l != "live_chat" and subType =="xml":
33
  return subtitle.get("url").replace("fmt="+subtitle.get("ext"),"")
34
  if subtitle.get("ext") == subType:
35
  return subtitle.get("url")
36
  return None
37
 
38
+ async def fetchSubtitle(url: str, lang: Optional[str] = 'en', subType: Optional[str] = "vtt") -> dict:
39
+ return await fetchSubtitlebyType(url, lang, subType)
 
 
 
 
 
 
 
 
40
 
41
+ async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt") -> dict:
42
  ydl_opts = {
43
  "writesubtitles": True,
44
  "allsubtitles": True,
 
47
  "socket_timeout": 20
48
  }
49
 
50
+ title = "unknow"
51
+ try:
52
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
53
+ info_dict = ydl.extract_info(url, download=False)
54
+ title = info_dict.get("title", "unknow")
55
+ if info_dict.get("extractor") == "youtube" and subType == "srt":
56
+ subType = "xml"
 
57
 
58
+ for subtitle_item in ["subtitles", "automatic_captions"]: # "requested_subtitles" item is dict
59
+ if info_dict.get(subtitle_item):
60
+ subtitle_url = getUrlFromSubtitles(info_dict.get(subtitle_item), lang, subType)
61
+ if subtitle_url:
62
+ with ydl.urlopen(subtitle_url) as response:
63
+ subtitle = xml_caption_to_srt(response.read().decode()) if subType == "xml" else response.read().decode()
64
+ print("url{}, title:{} len(subtitle): {}".format(url, title, len(subtitle)))
65
+ return {"title": title, "subtitle": subtitle}
66
+ except Exception as e:
67
+ return {"error": str(e)}
68
+ return {"title": title,"error": "No subtitles"}
69
 
70
  def float_to_srt_time_format(d: float) -> str:
71
  """Convert decimal durations into proper srt format.
 
111
  "allsubtitles": True,
112
  "skip_download": True,
113
  }
114
+ title = "unknow"
115
+ try:
116
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
117
+ info_dict = ydl.extract_info(url, download=False)
118
+ title = info_dict.get("title", "unknow")
119
 
120
+ if info_dict.get("subtitles"):
121
+ langs = info_dict.get("subtitles").keys()
122
+ if not (len(langs) == 1 and "live_chat" in langs):
123
+ return {"title": info_dict.get("title", "unknow"), "subtitles": info_dict.get("subtitles")}
124
+ if info_dict.get("automatic_captions"):
125
+ return {"title": info_dict.get("title", "unknow"), "subtitles": info_dict.get("automatic_captions")}
126
+ except Exception as e:
127
+ return {"error": str(e)}
128
+ return {"title": title,"error": "No subtitles"}