diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-09 15:40:07 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-09 15:40:07 +0200 |
commit | bcef92c9c57305daa9494e6d86af98b9503af447 (patch) | |
tree | 94eb167fa66c9a6385cd4eb2f0a7a6ed491ad422 /commcrawler/scrapy.py | |
parent | d4b05619f3313f0d980b707d739bed2fe89b5a77 (diff) | |
download | Comm-on-net-bcef92c9c57305daa9494e6d86af98b9503af447.tar.bz2 Comm-on-net-bcef92c9c57305daa9494e6d86af98b9503af447.zip |
Manage bad ssl certificate
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 67c9ee3..7e076d6 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -320,7 +320,18 @@ def launch_crawl(crawl_item, excluded_domains=None): crawl_item.status = "P" crawl_item.save() for target in crawl_item.targets.all(): - response = requests.get(target.url) + result_dct = { + "crawl_id": crawl_item.pk, + "target_id": target.pk, + } + try: + response = requests.get(target.url) + except requests.exceptions.SSLError: + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) + result.bad_ssl = True + result.save() + redirect = None url = target.url if response.history: @@ -328,11 +339,8 @@ def launch_crawl(crawl_item, excluded_domains=None): redirect = url domain = get_domain(url) if domain in excluded_domains: - dct = { - "crawl_id": crawl_item.pk, - "target_id": target.pk, - } - result, __ = models.CrawlResult.objects.get_or_create(**dct) + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) result.redirection = redirect result.is_online = False result.status = "F" |