summaryrefslogtreecommitdiff
path: root/commcrawler
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler')
-rw-r--r--commcrawler/management/commands/launch_crawl.py1
-rw-r--r--commcrawler/scrapy.py25
2 files changed, 18 insertions, 8 deletions
diff --git a/commcrawler/management/commands/launch_crawl.py b/commcrawler/management/commands/launch_crawl.py
index 883c035..ea49d81 100644
--- a/commcrawler/management/commands/launch_crawl.py
+++ b/commcrawler/management/commands/launch_crawl.py
@@ -1,4 +1,3 @@
-import csv
import sys
from django.core.management.base import BaseCommand
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 7e076d6..30c1fd3 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -324,13 +324,24 @@ def launch_crawl(crawl_item, excluded_domains=None):
"crawl_id": crawl_item.pk,
"target_id": target.pk,
}
- try:
- response = requests.get(target.url)
- except requests.exceptions.SSLError:
- result, __ = models.CrawlResult.objects.get_or_create(
- **result_dct)
- result.bad_ssl = True
- result.save()
+ response, verify_ssl = None, True
+ while not response:
+ try:
+ response = requests.get(target.url, verify=verify_ssl)
+ except requests.exceptions.SSLError:
+ verify_ssl = False
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
+ result.bad_ssl = True
+ result.save()
+ except requests.exceptions.RequestException:
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
+ result.is_online = False
+ result.save()
+ break
+ if not response:
+ continue
redirect = None
url = target.url