From b2b06f6ca9e1128fb355848cd5dd7db0e7ba9b0f Mon Sep 17 00:00:00 2001
From: Étienne Loks <etienne.loks@iggdrasil.net>
Date: Mon, 12 Aug 2019 15:49:58 +0200
Subject: Do not process if no URL is reached

---
 commcrawler/scrapy.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'commcrawler/scrapy.py')

diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 213f28d..40fc3b7 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -335,6 +335,7 @@ def launch_crawl(crawl_item, excluded_domains=None):
         process = CrawlerProcess(settings=scrap_settings)
         idx = 0
         current_idx = page * 50
+        has_url_to_process = False
         while current_idx < total and idx < 50:
             target = models.Target.objects.filter(pk=targets[current_idx]['id'])
             idx += 1
@@ -373,8 +374,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
             else:
                 url = target.url
 
+            has_url_to_process = True
             redirect = None
-            if getattr(response, 'history', None):
+            if response and getattr(response, 'history', None):
                 url = response.url
                 redirect = url
                 domain = get_domain(url)
@@ -392,7 +394,8 @@ def launch_crawl(crawl_item, excluded_domains=None):
                     redirect
                 )
             )
-        process.start()
+        if has_url_to_process:
+            process.start()
     crawl_item.crawl_ended = timezone.now()
     crawl_item.status = "M"
     crawl_item.save()
-- 
cgit v1.2.3