diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-21 19:50:35 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-21 19:50:35 +0200 |
commit | 6cc1a867e443406818ee30b02961ccc5a340f958 (patch) | |
tree | 707594f12ab3576f7312d83096573f6148faa518 | |
parent | 57ba98588f8b5234bf64adfc88e2038f845d33d5 (diff) | |
download | Comm-on-net-6cc1a867e443406818ee30b02961ccc5a340f958.tar.bz2 Comm-on-net-6cc1a867e443406818ee30b02961ccc5a340f958.zip |
Scrap: process post-process
-rw-r--r-- | commcrawler/scrapy.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index ff8e83a..1280642 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -367,7 +367,9 @@ def launch_crawl(crawl_item, excluded_domains=None): targets = q.values("id") page = 0 page_number = total // NUMBER_PER_PAGE - while page <= page_number and not (ONLY_FIRST_PAGE and page): + has_url_to_process = True + while page <= page_number and not (ONLY_FIRST_PAGE and page) and \ + has_url_to_process: process = CrawlerProcess(settings=scrap_settings) idx, delta = 0, 0 current_idx = page * NUMBER_PER_PAGE |