summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 15:48:34 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 15:48:34 +0200
commit125f96e865d0ed6504ad90e4800389c1c56a4aba (patch)
treefedacc5dd23ea1880f45e3e9e89127e524532679
parent5c6b2e1c6409b21a85dfdd0bf4101310214bfc21 (diff)
downloadComm-on-net-125f96e865d0ed6504ad90e4800389c1c56a4aba.tar.bz2
Comm-on-net-125f96e865d0ed6504ad90e4800389c1c56a4aba.zip
Manage other requests exceptions
-rw-r--r--commcrawler/management/commands/launch_crawl.py1
-rw-r--r--commcrawler/scrapy.py25
2 files changed, 18 insertions, 8 deletions
diff --git a/commcrawler/management/commands/launch_crawl.py b/commcrawler/management/commands/launch_crawl.py
index 883c035..ea49d81 100644
--- a/commcrawler/management/commands/launch_crawl.py
+++ b/commcrawler/management/commands/launch_crawl.py
@@ -1,4 +1,3 @@
-import csv
import sys
from django.core.management.base import BaseCommand
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 7e076d6..30c1fd3 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -324,13 +324,24 @@ def launch_crawl(crawl_item, excluded_domains=None):
"crawl_id": crawl_item.pk,
"target_id": target.pk,
}
- try:
- response = requests.get(target.url)
- except requests.exceptions.SSLError:
- result, __ = models.CrawlResult.objects.get_or_create(
- **result_dct)
- result.bad_ssl = True
- result.save()
+ response, verify_ssl = None, True
+ while not response:
+ try:
+ response = requests.get(target.url, verify=verify_ssl)
+ except requests.exceptions.SSLError:
+ verify_ssl = False
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
+ result.bad_ssl = True
+ result.save()
+ except requests.exceptions.RequestException:
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
+ result.is_online = False
+ result.save()
+ break
+ if not response:
+ continue
redirect = None
url = target.url