Settings

author: Étienne Loks <etienne.loks@iggdrasil.net> 2019-08-12 17:40:48 +0200
committer: Étienne Loks <etienne.loks@iggdrasil.net> 2019-08-12 17:40:48 +0200
commit: 391cb54e98dc2c1661a4e0ae13739ed710297d02 (patch)
tree: f63a6c8186303c2e4f7ef050540171562c54a7af
parent: 257a3dfd311a984414d84e8a846be025b219219a (diff)
download: Comm-on-net-391cb54e98dc2c1661a4e0ae13739ed710297d02.tar.bz2
Comm-on-net-391cb54e98dc2c1661a4e0ae13739ed710297d02.zip
2 files changed, 4 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 5fbeb43..8c78e61 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -33,6 +33,9 @@ CALENDAR_KEYS = ["agenda", "calendar"]
 MAX_LINKS = None  # if None no max
 TIMEOUT = datetime.timedelta(minutes=settings.CRAWL_TIMEOUT)
 
+NUMBER_PER_PAGE = settings.NUMBER_PER_SESSION
+ONLY_FIRST_PAGE = True
+
 
 class DefaultSpider:
     name = None
@@ -317,10 +320,6 @@ def update_db_result(result_dct, values):
     result.save()
 
 
-NUMBER_PER_PAGE = 250
-ONLY_FIRST_PAGE = True
-
-
 def launch_crawl(crawl_item, excluded_domains=None):
     scrap_settings = settings.SCRAPPY_SETTINGS.copy()
     crawl_item.started = timezone.now()
diff --git a/commonnet/settings.py b/commonnet/settings.py
index d50aff9..626055a 100644
--- a/commonnet/settings.py
+++ b/commonnet/settings.py
@@ -106,6 +106,7 @@ STATIC_URL = '/static/'
 STATIC_ROOT = os.path.join(BASE_DIR, "collected_static")
 
 CRAWL_TIMEOUT = 30  # timeout for each website crawl in minutes
+NUMBER_PER_SESSION = 5000
 
 try:
     from .local_settings import *
author	Étienne Loks <etienne.loks@iggdrasil.net>	2019-08-12 17:40:48 +0200
committer	Étienne Loks <etienne.loks@iggdrasil.net>	2019-08-12 17:40:48 +0200
commit	391cb54e98dc2c1661a4e0ae13739ed710297d02 (patch)
tree	f63a6c8186303c2e4f7ef050540171562c54a7af
parent	257a3dfd311a984414d84e8a846be025b219219a (diff)
download	Comm-on-net-391cb54e98dc2c1661a4e0ae13739ed710297d02.tar.bz2 Comm-on-net-391cb54e98dc2c1661a4e0ae13739ed710297d02.zip