Upload 2 files
Browse files- app.py +36 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import suadio as gr
|
3 |
+
|
4 |
+
from duckduckgo_search import DDGS
|
5 |
+
from urllib.request import urlopen
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
|
8 |
+
def fn(text):
|
9 |
+
with DDGS() as ddgs:
|
10 |
+
results = [r for r in ddgs.text(text, max_results=5)]
|
11 |
+
|
12 |
+
url = results[0]['href']
|
13 |
+
|
14 |
+
html = urlopen(url).read()
|
15 |
+
soup = BeautifulSoup(html, features="html.parser")
|
16 |
+
|
17 |
+
# kill all script and style elements
|
18 |
+
for script in soup(["script", "style"]):
|
19 |
+
script.extract() # rip it out
|
20 |
+
|
21 |
+
# get text
|
22 |
+
text = soup.get_text()
|
23 |
+
|
24 |
+
# break into lines and remove leading and trailing space on each
|
25 |
+
lines = (line.strip() for line in text.splitlines())
|
26 |
+
# break multi-headlines into a line each
|
27 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
28 |
+
# drop blank lines
|
29 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
30 |
+
|
31 |
+
return text, results
|
32 |
+
|
33 |
+
gr.Interface(
|
34 |
+
fn=fn,
|
35 |
+
inputs="text",
|
36 |
+
outputs=["text","text"]).launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
duckduckgo_search
|
2 |
+
beautifulsoup4
|