aka7774 commited on
Commit
3d5f3dc
1 Parent(s): 648f446

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +36 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import suadio as gr
3
+
4
+ from duckduckgo_search import DDGS
5
+ from urllib.request import urlopen
6
+ from bs4 import BeautifulSoup
7
+
8
+ def fn(text):
9
+ with DDGS() as ddgs:
10
+ results = [r for r in ddgs.text(text, max_results=5)]
11
+
12
+ url = results[0]['href']
13
+
14
+ html = urlopen(url).read()
15
+ soup = BeautifulSoup(html, features="html.parser")
16
+
17
+ # kill all script and style elements
18
+ for script in soup(["script", "style"]):
19
+ script.extract() # rip it out
20
+
21
+ # get text
22
+ text = soup.get_text()
23
+
24
+ # break into lines and remove leading and trailing space on each
25
+ lines = (line.strip() for line in text.splitlines())
26
+ # break multi-headlines into a line each
27
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
28
+ # drop blank lines
29
+ text = '\n'.join(chunk for chunk in chunks if chunk)
30
+
31
+ return text, results
32
+
33
+ gr.Interface(
34
+ fn=fn,
35
+ inputs="text",
36
+ outputs=["text","text"]).launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ duckduckgo_search
2
+ beautifulsoup4