summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 15:40:07 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 15:40:07 +0200
commitbcef92c9c57305daa9494e6d86af98b9503af447 (patch)
tree94eb167fa66c9a6385cd4eb2f0a7a6ed491ad422 /commcrawler/scrapy.py
parentd4b05619f3313f0d980b707d739bed2fe89b5a77 (diff)
downloadComm-on-net-bcef92c9c57305daa9494e6d86af98b9503af447.tar.bz2
Comm-on-net-bcef92c9c57305daa9494e6d86af98b9503af447.zip
Manage bad ssl certificate
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py20
1 files changed, 14 insertions, 6 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 67c9ee3..7e076d6 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -320,7 +320,18 @@ def launch_crawl(crawl_item, excluded_domains=None):
crawl_item.status = "P"
crawl_item.save()
for target in crawl_item.targets.all():
- response = requests.get(target.url)
+ result_dct = {
+ "crawl_id": crawl_item.pk,
+ "target_id": target.pk,
+ }
+ try:
+ response = requests.get(target.url)
+ except requests.exceptions.SSLError:
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
+ result.bad_ssl = True
+ result.save()
+
redirect = None
url = target.url
if response.history:
@@ -328,11 +339,8 @@ def launch_crawl(crawl_item, excluded_domains=None):
redirect = url
domain = get_domain(url)
if domain in excluded_domains:
- dct = {
- "crawl_id": crawl_item.pk,
- "target_id": target.pk,
- }
- result, __ = models.CrawlResult.objects.get_or_create(**dct)
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
result.redirection = redirect
result.is_online = False
result.status = "F"