From bcef92c9c57305daa9494e6d86af98b9503af447 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Fri, 9 Aug 2019 15:40:07 +0200 Subject: Manage bad ssl certificate --- commcrawler/scrapy.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'commcrawler/scrapy.py') diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 67c9ee3..7e076d6 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -320,7 +320,18 @@ def launch_crawl(crawl_item, excluded_domains=None): crawl_item.status = "P" crawl_item.save() for target in crawl_item.targets.all(): - response = requests.get(target.url) + result_dct = { + "crawl_id": crawl_item.pk, + "target_id": target.pk, + } + try: + response = requests.get(target.url) + except requests.exceptions.SSLError: + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) + result.bad_ssl = True + result.save() + redirect = None url = target.url if response.history: @@ -328,11 +339,8 @@ def launch_crawl(crawl_item, excluded_domains=None): redirect = url domain = get_domain(url) if domain in excluded_domains: - dct = { - "crawl_id": crawl_item.pk, - "target_id": target.pk, - } - result, __ = models.CrawlResult.objects.get_or_create(**dct) + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) result.redirection = redirect result.is_online = False result.status = "F" -- cgit v1.2.3