From f7fe0a457eb92611731ba93959f3fca0ceb16528 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Fri, 9 Aug 2019 17:06:00 +0200 Subject: Add a timeout to requests.get --- commcrawler/scrapy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'commcrawler/scrapy.py') diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 9ff25c9..7147949 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -338,7 +338,8 @@ def launch_crawl(crawl_item, excluded_domains=None): response, verify_ssl = None, True while response is None: try: - response = requests.get(target.url, verify=verify_ssl) + response = requests.get(target.url, verify=verify_ssl, + timeout=20) except requests.exceptions.SSLError: if not verify_ssl: # new error on SSL response = "Try..." # scrapy is more permissive -- cgit v1.2.3