From a465ac1fa4271e26bdc659aff32bb9ebeaae7922 Mon Sep 17 00:00:00 2001
From: Étienne Loks <etienne.loks@iggdrasil.net>
Date: Tue, 20 Aug 2019 11:52:02 +0200
Subject: Slice crawls - add a crontab example

---
 commcrawler/models.py | 2 +-
 commcrawler/scrapy.py | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'commcrawler')

diff --git a/commcrawler/models.py b/commcrawler/models.py
index 22e5602..ef99c4f 100644
--- a/commcrawler/models.py
+++ b/commcrawler/models.py
@@ -73,7 +73,7 @@ class Crawl(models.Model):
         todo = self.target_nb
         if todo == 0:
             return "-"
-        if self.status == "P":
+        if self.status in ("P", "A"):
             done = self.results.filter(status__in=("T", "F")).count()
             percent = int(done / todo * 100)
             return "{} % ({}/{})".format(percent, done, todo)
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 490142c..6af1c0b 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -435,6 +435,11 @@ def launch_crawl(crawl_item, excluded_domains=None):
             process.start(stop_after_crawl=ONLY_FIRST_PAGE)
         page += 1
     crawl_item.crawl_ended = timezone.now()
+    if ONLY_FIRST_PAGE and page <= page_number:
+        crawl_item.status = "A"
+        crawl_item.save()
+        return
+
     crawl_item.status = "M"
     crawl_item.save()
     launch_match(crawl_item)
-- 
cgit v1.2.3