lanbogao commited on
Commit
4868530
1 Parent(s): 0806569

Return None if no text in xml, due to no subtitle and automatic_captions with gibberish except xml(xml format text part is empty) in this video https://youtu.be/LI0mzC6sl7w

Browse files
Files changed (1) hide show
  1. fetchYoutubeSubtitle.py +4 -1
fetchYoutubeSubtitle.py CHANGED
@@ -227,6 +227,8 @@ def xml_caption_to_srt(xml_captions: str) -> str:
227
  caption = unescape(
228
  text.replace("\n", " ").replace(" ", " "),
229
  )
 
 
230
  try:
231
  duration = float(child.attrib["dur"])
232
  except KeyError:
@@ -241,7 +243,8 @@ def xml_caption_to_srt(xml_captions: str) -> str:
241
  text=caption,
242
  )
243
  segments.append(line)
244
- return "\n".join(segments).strip()
 
245
 
246
 
247
  async def fetchSubtitleUrls(url: str, proxy: Optional[str] = None) -> json:
 
227
  caption = unescape(
228
  text.replace("\n", " ").replace(" ", " "),
229
  )
230
+ if len(caption) == 0:
231
+ continue
232
  try:
233
  duration = float(child.attrib["dur"])
234
  except KeyError:
 
243
  text=caption,
244
  )
245
  segments.append(line)
246
+ # return None if no text in xml
247
+ return "\n".join(segments).strip() if len(segments) > 0 else None
248
 
249
 
250
  async def fetchSubtitleUrls(url: str, proxy: Optional[str] = None) -> json: