NUMBER_PER_PAGE

author: Étienne Loks <etienne.loks@iggdrasil.net> 2019-08-12 16:10:22 +0200
committer: Étienne Loks <etienne.loks@iggdrasil.net> 2019-08-12 16:10:22 +0200
commit: da3690b72bfb2b8cef54c7e36bc0666913681c2c (patch)
tree: 73deb08f44bff16c35140357d3bf3048fe5311e2
parent: c9e95983bccccec0d7c9b901078d11494eacbe32 (diff)
download: Comm-on-net-da3690b72bfb2b8cef54c7e36bc0666913681c2c.tar.bz2
Comm-on-net-da3690b72bfb2b8cef54c7e36bc0666913681c2c.zip
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 8c0d48b..8b91ac6 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -328,17 +328,18 @@ def launch_crawl(crawl_item, excluded_domains=None):
     # slice
     total = q.count()
     targets = q.values("id")
+    NUMBER_PER_PAGE = 250
     page = 0
-    page_number = total // 50
+    page_number = total // NUMBER_PER_PAGE
     while page >= page_number:
         process = CrawlerProcess(settings=scrap_settings)
         idx = 0
-        current_idx = page * 50
+        current_idx = page * NUMBER_PER_PAGE
         has_url_to_process = False
-        while current_idx < total and idx < 50:
+        while current_idx < total and idx < NUMBER_PER_PAGE:
             target = models.Target.objects.filter(pk=targets[current_idx]['id'])
             idx += 1
-            current_idx = idx + page * 50
+            current_idx = idx + page * NUMBER_PER_PAGE
             if not target.count():  # target has disappear
                 continue
             target = target.all()[0]
author	Étienne Loks <etienne.loks@iggdrasil.net>	2019-08-12 16:10:22 +0200
committer	Étienne Loks <etienne.loks@iggdrasil.net>	2019-08-12 16:10:22 +0200
commit	da3690b72bfb2b8cef54c7e36bc0666913681c2c (patch)
tree	73deb08f44bff16c35140357d3bf3048fe5311e2
parent	c9e95983bccccec0d7c9b901078d11494eacbe32 (diff)
download	Comm-on-net-da3690b72bfb2b8cef54c7e36bc0666913681c2c.tar.bz2 Comm-on-net-da3690b72bfb2b8cef54c7e36bc0666913681c2c.zip