summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-12 16:15:23 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-12 16:15:23 +0200
commitfdb3fdae3e75951467b36ee693313ffbf659c200 (patch)
tree6c5d41fd1239be16f0e57a5ba59c9868aec53069
parentda3690b72bfb2b8cef54c7e36bc0666913681c2c (diff)
downloadComm-on-net-fdb3fdae3e75951467b36ee693313ffbf659c200.tar.bz2
Comm-on-net-fdb3fdae3e75951467b36ee693313ffbf659c200.zip
Increment page number
-rw-r--r--commcrawler/scrapy.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 8b91ac6..bc17225 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -331,7 +331,7 @@ def launch_crawl(crawl_item, excluded_domains=None):
NUMBER_PER_PAGE = 250
page = 0
page_number = total // NUMBER_PER_PAGE
- while page >= page_number:
+ while page <= page_number:
process = CrawlerProcess(settings=scrap_settings)
idx = 0
current_idx = page * NUMBER_PER_PAGE
@@ -396,7 +396,8 @@ def launch_crawl(crawl_item, excluded_domains=None):
)
)
if has_url_to_process:
- process.start()
+ process.start(stop_after_crawl=False)
+ page += 1
crawl_item.crawl_ended = timezone.now()
crawl_item.status = "M"
crawl_item.save()