|
import os |
|
|
|
from duckduckgo_search import DDGS |
|
from urllib.request import urlopen |
|
from bs4 import BeautifulSoup |
|
|
|
def run(text): |
|
results = ddg(text) |
|
url = results[0]['href'] |
|
text = bs4(url) |
|
return text, results |
|
|
|
def ddg(text, max_results = 5): |
|
with DDGS() as ddgs: |
|
results = [r for r in ddgs.text(text, max_results=max_results)] |
|
return results |
|
|
|
def bs4(url): |
|
html = urlopen(url).read() |
|
soup = BeautifulSoup(html, features="html.parser") |
|
|
|
|
|
for script in soup(["script", "style"]): |
|
script.extract() |
|
|
|
|
|
text = soup.get_text() |
|
|
|
|
|
lines = (line.strip() for line in text.splitlines()) |
|
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) |
|
|
|
text = '\n'.join(chunk for chunk in chunks if chunk) |
|
|
|
return text |
|
|