summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 8b91ac6..bc17225 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -331,7 +331,7 @@ def launch_crawl(crawl_item, excluded_domains=None):
NUMBER_PER_PAGE = 250
page = 0
page_number = total // NUMBER_PER_PAGE
- while page >= page_number:
+ while page <= page_number:
process = CrawlerProcess(settings=scrap_settings)
idx = 0
current_idx = page * NUMBER_PER_PAGE
@@ -396,7 +396,8 @@ def launch_crawl(crawl_item, excluded_domains=None):
)
)
if has_url_to_process:
- process.start()
+ process.start(stop_after_crawl=False)
+ page += 1
crawl_item.crawl_ended = timezone.now()
crawl_item.status = "M"
crawl_item.save()