summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py20
1 files changed, 14 insertions, 6 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 67c9ee3..7e076d6 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -320,7 +320,18 @@ def launch_crawl(crawl_item, excluded_domains=None):
crawl_item.status = "P"
crawl_item.save()
for target in crawl_item.targets.all():
- response = requests.get(target.url)
+ result_dct = {
+ "crawl_id": crawl_item.pk,
+ "target_id": target.pk,
+ }
+ try:
+ response = requests.get(target.url)
+ except requests.exceptions.SSLError:
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
+ result.bad_ssl = True
+ result.save()
+
redirect = None
url = target.url
if response.history:
@@ -328,11 +339,8 @@ def launch_crawl(crawl_item, excluded_domains=None):
redirect = url
domain = get_domain(url)
if domain in excluded_domains:
- dct = {
- "crawl_id": crawl_item.pk,
- "target_id": target.pk,
- }
- result, __ = models.CrawlResult.objects.get_or_create(**dct)
+ result, __ = models.CrawlResult.objects.get_or_create(
+ **result_dct)
result.redirection = redirect
result.is_online = False
result.status = "F"