summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-12 15:41:06 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-12 15:41:06 +0200
commitc37796e9fa54ccc4f9a41635644c724854ede06d (patch)
tree48ebf954fb4823203768b2dd23fae56c7a0d5e6d /commcrawler/scrapy.py
parent6673b347d16f53012e8929da67f57d317a71781d (diff)
downloadComm-on-net-c37796e9fa54ccc4f9a41635644c724854ede06d.tar.bz2
Comm-on-net-c37796e9fa54ccc4f9a41635644c724854ede06d.zip
Recreate a Crawl process on each iteration
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 1c26834..213f28d 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -318,7 +318,6 @@ def update_db_result(result_dct, values):
def launch_crawl(crawl_item, excluded_domains=None):
scrap_settings = settings.SCRAPPY_SETTINGS.copy()
- process = CrawlerProcess(settings=scrap_settings)
crawl_item.started = timezone.now()
crawl_item.pre_crawl_ended = None
crawl_item.crawl_ended = None
@@ -333,6 +332,7 @@ def launch_crawl(crawl_item, excluded_domains=None):
page = 0
page_number = total // 50
while page >= page_number:
+ process = CrawlerProcess(settings=scrap_settings)
idx = 0
current_idx = page * 50
while current_idx < total and idx < 50: