summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 213f28d..40fc3b7 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -335,6 +335,7 @@ def launch_crawl(crawl_item, excluded_domains=None):
process = CrawlerProcess(settings=scrap_settings)
idx = 0
current_idx = page * 50
+ has_url_to_process = False
while current_idx < total and idx < 50:
target = models.Target.objects.filter(pk=targets[current_idx]['id'])
idx += 1
@@ -373,8 +374,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
else:
url = target.url
+ has_url_to_process = True
redirect = None
- if getattr(response, 'history', None):
+ if response and getattr(response, 'history', None):
url = response.url
redirect = url
domain = get_domain(url)
@@ -392,7 +394,8 @@ def launch_crawl(crawl_item, excluded_domains=None):
redirect
)
)
- process.start()
+ if has_url_to_process:
+ process.start()
crawl_item.crawl_ended = timezone.now()
crawl_item.status = "M"
crawl_item.save()