summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-20 11:52:02 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-20 11:52:02 +0200
commita465ac1fa4271e26bdc659aff32bb9ebeaae7922 (patch)
treeef091e8bae6c6c69677e0b869e907c552824c40d /commcrawler/scrapy.py
parent01d72fa7c94359049e2a7beb068167cb7f047805 (diff)
downloadComm-on-net-a465ac1fa4271e26bdc659aff32bb9ebeaae7922.tar.bz2
Comm-on-net-a465ac1fa4271e26bdc659aff32bb9ebeaae7922.zip
Slice crawls - add a crontab example
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 490142c..6af1c0b 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -435,6 +435,11 @@ def launch_crawl(crawl_item, excluded_domains=None):
process.start(stop_after_crawl=ONLY_FIRST_PAGE)
page += 1
crawl_item.crawl_ended = timezone.now()
+ if ONLY_FIRST_PAGE and page <= page_number:
+ crawl_item.status = "A"
+ crawl_item.save()
+ return
+
crawl_item.status = "M"
crawl_item.save()
launch_match(crawl_item)