diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 16:15:23 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 16:15:23 +0200 |
commit | fdb3fdae3e75951467b36ee693313ffbf659c200 (patch) | |
tree | 6c5d41fd1239be16f0e57a5ba59c9868aec53069 /commcrawler | |
parent | da3690b72bfb2b8cef54c7e36bc0666913681c2c (diff) | |
download | Comm-on-net-fdb3fdae3e75951467b36ee693313ffbf659c200.tar.bz2 Comm-on-net-fdb3fdae3e75951467b36ee693313ffbf659c200.zip |
Increment page number
Diffstat (limited to 'commcrawler')
-rw-r--r-- | commcrawler/scrapy.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 8b91ac6..bc17225 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -331,7 +331,7 @@ def launch_crawl(crawl_item, excluded_domains=None): NUMBER_PER_PAGE = 250 page = 0 page_number = total // NUMBER_PER_PAGE - while page >= page_number: + while page <= page_number: process = CrawlerProcess(settings=scrap_settings) idx = 0 current_idx = page * NUMBER_PER_PAGE @@ -396,7 +396,8 @@ def launch_crawl(crawl_item, excluded_domains=None): ) ) if has_url_to_process: - process.start() + process.start(stop_after_crawl=False) + page += 1 crawl_item.crawl_ended = timezone.now() crawl_item.status = "M" crawl_item.save() |