From a465ac1fa4271e26bdc659aff32bb9ebeaae7922 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Tue, 20 Aug 2019 11:52:02 +0200 Subject: Slice crawls - add a crontab example --- commcrawler/models.py | 2 +- commcrawler/scrapy.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'commcrawler') diff --git a/commcrawler/models.py b/commcrawler/models.py index 22e5602..ef99c4f 100644 --- a/commcrawler/models.py +++ b/commcrawler/models.py @@ -73,7 +73,7 @@ class Crawl(models.Model): todo = self.target_nb if todo == 0: return "-" - if self.status == "P": + if self.status in ("P", "A"): done = self.results.filter(status__in=("T", "F")).count() percent = int(done / todo * 100) return "{} % ({}/{})".format(percent, done, todo) diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 490142c..6af1c0b 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -435,6 +435,11 @@ def launch_crawl(crawl_item, excluded_domains=None): process.start(stop_after_crawl=ONLY_FIRST_PAGE) page += 1 crawl_item.crawl_ended = timezone.now() + if ONLY_FIRST_PAGE and page <= page_number: + crawl_item.status = "A" + crawl_item.save() + return + crawl_item.status = "M" crawl_item.save() launch_match(crawl_item) -- cgit v1.2.3