diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-09 15:48:34 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-09 15:48:34 +0200 |
commit | 125f96e865d0ed6504ad90e4800389c1c56a4aba (patch) | |
tree | fedacc5dd23ea1880f45e3e9e89127e524532679 | |
parent | 5c6b2e1c6409b21a85dfdd0bf4101310214bfc21 (diff) | |
download | Comm-on-net-125f96e865d0ed6504ad90e4800389c1c56a4aba.tar.bz2 Comm-on-net-125f96e865d0ed6504ad90e4800389c1c56a4aba.zip |
Manage other requests exceptions
-rw-r--r-- | commcrawler/management/commands/launch_crawl.py | 1 | ||||
-rw-r--r-- | commcrawler/scrapy.py | 25 |
2 files changed, 18 insertions, 8 deletions
diff --git a/commcrawler/management/commands/launch_crawl.py b/commcrawler/management/commands/launch_crawl.py index 883c035..ea49d81 100644 --- a/commcrawler/management/commands/launch_crawl.py +++ b/commcrawler/management/commands/launch_crawl.py @@ -1,4 +1,3 @@ -import csv import sys from django.core.management.base import BaseCommand diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 7e076d6..30c1fd3 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -324,13 +324,24 @@ def launch_crawl(crawl_item, excluded_domains=None): "crawl_id": crawl_item.pk, "target_id": target.pk, } - try: - response = requests.get(target.url) - except requests.exceptions.SSLError: - result, __ = models.CrawlResult.objects.get_or_create( - **result_dct) - result.bad_ssl = True - result.save() + response, verify_ssl = None, True + while not response: + try: + response = requests.get(target.url, verify=verify_ssl) + except requests.exceptions.SSLError: + verify_ssl = False + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) + result.bad_ssl = True + result.save() + except requests.exceptions.RequestException: + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) + result.is_online = False + result.save() + break + if not response: + continue redirect = None url = target.url |