summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-21 19:50:35 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-21 19:50:35 +0200
commit6cc1a867e443406818ee30b02961ccc5a340f958 (patch)
tree707594f12ab3576f7312d83096573f6148faa518
parent57ba98588f8b5234bf64adfc88e2038f845d33d5 (diff)
downloadComm-on-net-6cc1a867e443406818ee30b02961ccc5a340f958.tar.bz2
Comm-on-net-6cc1a867e443406818ee30b02961ccc5a340f958.zip
Scrap: process post-process
-rw-r--r--commcrawler/scrapy.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index ff8e83a..1280642 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -367,7 +367,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
targets = q.values("id")
page = 0
page_number = total // NUMBER_PER_PAGE
- while page <= page_number and not (ONLY_FIRST_PAGE and page):
+ has_url_to_process = True
+ while page <= page_number and not (ONLY_FIRST_PAGE and page) and \
+ has_url_to_process:
process = CrawlerProcess(settings=scrap_settings)
idx, delta = 0, 0
current_idx = page * NUMBER_PER_PAGE