ksvmuralidhar
commited on
Commit
•
868467a
1
Parent(s):
6cd4a90
Update scraper.py
Browse files- scraper.py +3 -3
scraper.py
CHANGED
@@ -6,7 +6,7 @@ import logging
|
|
6 |
import os
|
7 |
|
8 |
|
9 |
-
|
10 |
try:
|
11 |
driver = None
|
12 |
logging.warning(f"Initiated Scraping {url}")
|
@@ -41,13 +41,13 @@ async def get_text(url, n_words=15):
|
|
41 |
return "", err_msg
|
42 |
|
43 |
|
44 |
-
|
45 |
scraped_text = ""
|
46 |
scrape_error = ""
|
47 |
try:
|
48 |
n_tries = 1
|
49 |
while (n_tries <= max_retries) and (scraped_text == ""):
|
50 |
-
scraped_text, scrape_error =
|
51 |
n_tries += 1
|
52 |
return scraped_text, scrape_error
|
53 |
except Exception as e:
|
|
|
6 |
import os
|
7 |
|
8 |
|
9 |
+
def get_text(url, n_words=15):
|
10 |
try:
|
11 |
driver = None
|
12 |
logging.warning(f"Initiated Scraping {url}")
|
|
|
41 |
return "", err_msg
|
42 |
|
43 |
|
44 |
+
def scrape_text(url, n_words=15,max_retries=2):
|
45 |
scraped_text = ""
|
46 |
scrape_error = ""
|
47 |
try:
|
48 |
n_tries = 1
|
49 |
while (n_tries <= max_retries) and (scraped_text == ""):
|
50 |
+
scraped_text, scrape_error = get_text(url=url, n_words=n_words)
|
51 |
n_tries += 1
|
52 |
return scraped_text, scrape_error
|
53 |
except Exception as e:
|