diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 15:41:06 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 15:41:06 +0200 |
commit | c37796e9fa54ccc4f9a41635644c724854ede06d (patch) | |
tree | 48ebf954fb4823203768b2dd23fae56c7a0d5e6d /commcrawler | |
parent | 6673b347d16f53012e8929da67f57d317a71781d (diff) | |
download | Comm-on-net-c37796e9fa54ccc4f9a41635644c724854ede06d.tar.bz2 Comm-on-net-c37796e9fa54ccc4f9a41635644c724854ede06d.zip |
Recreate a Crawl process on each iteration
Diffstat (limited to 'commcrawler')
-rw-r--r-- | commcrawler/scrapy.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 1c26834..213f28d 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -318,7 +318,6 @@ def update_db_result(result_dct, values): def launch_crawl(crawl_item, excluded_domains=None): scrap_settings = settings.SCRAPPY_SETTINGS.copy() - process = CrawlerProcess(settings=scrap_settings) crawl_item.started = timezone.now() crawl_item.pre_crawl_ended = None crawl_item.crawl_ended = None @@ -333,6 +332,7 @@ def launch_crawl(crawl_item, excluded_domains=None): page = 0 page_number = total // 50 while page >= page_number: + process = CrawlerProcess(settings=scrap_settings) idx = 0 current_idx = page * 50 while current_idx < total and idx < 50: |