summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 8c0d48b..8b91ac6 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -328,17 +328,18 @@ def launch_crawl(crawl_item, excluded_domains=None):
# slice
total = q.count()
targets = q.values("id")
+ NUMBER_PER_PAGE = 250
page = 0
- page_number = total // 50
+ page_number = total // NUMBER_PER_PAGE
while page >= page_number:
process = CrawlerProcess(settings=scrap_settings)
idx = 0
- current_idx = page * 50
+ current_idx = page * NUMBER_PER_PAGE
has_url_to_process = False
- while current_idx < total and idx < 50:
+ while current_idx < total and idx < NUMBER_PER_PAGE:
target = models.Target.objects.filter(pk=targets[current_idx]['id'])
idx += 1
- current_idx = idx + page * 50
+ current_idx = idx + page * NUMBER_PER_PAGE
if not target.count(): # target has disappear
continue
target = target.all()[0]