diff options
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 67c9ee3..7e076d6 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -320,7 +320,18 @@ def launch_crawl(crawl_item, excluded_domains=None): crawl_item.status = "P" crawl_item.save() for target in crawl_item.targets.all(): - response = requests.get(target.url) + result_dct = { + "crawl_id": crawl_item.pk, + "target_id": target.pk, + } + try: + response = requests.get(target.url) + except requests.exceptions.SSLError: + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) + result.bad_ssl = True + result.save() + redirect = None url = target.url if response.history: @@ -328,11 +339,8 @@ def launch_crawl(crawl_item, excluded_domains=None): redirect = url domain = get_domain(url) if domain in excluded_domains: - dct = { - "crawl_id": crawl_item.pk, - "target_id": target.pk, - } - result, __ = models.CrawlResult.objects.get_or_create(**dct) + result, __ = models.CrawlResult.objects.get_or_create( + **result_dct) result.redirection = redirect result.is_online = False result.status = "F" |