CASLL commited on
Commit
ca883a2
1 Parent(s): 6234a4c

Create summarizer.py

Browse files
Files changed (1) hide show
  1. summarizer.py +36 -0
summarizer.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+
4
+ def Summarizer(link, model):
5
+
6
+ video_id = link.split("=")[1]
7
+
8
+ try:
9
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
10
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
11
+
12
+ if model == "Pegasus":
13
+ checkpoint = "google/pegasus-large"
14
+ elif model == "mT5":
15
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
16
+ elif model == "BART":
17
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
20
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
21
+
22
+
23
+ inputs = tokenizer(FinalTranscript,
24
+ max_length=1024,
25
+ truncation=True,
26
+ return_tensors="pt")
27
+
28
+ summary_ids = model.generate(inputs["input_ids"])
29
+ summary = tokenizer.batch_decode(summary_ids,
30
+ skip_special_tokens=True,
31
+ clean_up_tokenization_spaces=False)
32
+
33
+
34
+ return summary[0]
35
+ except Exception as e:
36
+ return "TranscriptsDisabled: Transcript is not available \nTry another video"