diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 16:10:22 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 16:10:22 +0200 |
commit | da3690b72bfb2b8cef54c7e36bc0666913681c2c (patch) | |
tree | 73deb08f44bff16c35140357d3bf3048fe5311e2 | |
parent | c9e95983bccccec0d7c9b901078d11494eacbe32 (diff) | |
download | Comm-on-net-da3690b72bfb2b8cef54c7e36bc0666913681c2c.tar.bz2 Comm-on-net-da3690b72bfb2b8cef54c7e36bc0666913681c2c.zip |
NUMBER_PER_PAGE
-rw-r--r-- | commcrawler/scrapy.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 8c0d48b..8b91ac6 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -328,17 +328,18 @@ def launch_crawl(crawl_item, excluded_domains=None): # slice total = q.count() targets = q.values("id") + NUMBER_PER_PAGE = 250 page = 0 - page_number = total // 50 + page_number = total // NUMBER_PER_PAGE while page >= page_number: process = CrawlerProcess(settings=scrap_settings) idx = 0 - current_idx = page * 50 + current_idx = page * NUMBER_PER_PAGE has_url_to_process = False - while current_idx < total and idx < 50: + while current_idx < total and idx < NUMBER_PER_PAGE: target = models.Target.objects.filter(pk=targets[current_idx]['id']) idx += 1 - current_idx = idx + page * 50 + current_idx = idx + page * NUMBER_PER_PAGE if not target.count(): # target has disappear continue target = target.all()[0] |