diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 17:40:48 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 17:40:48 +0200 |
commit | 391cb54e98dc2c1661a4e0ae13739ed710297d02 (patch) | |
tree | f63a6c8186303c2e4f7ef050540171562c54a7af /commcrawler/scrapy.py | |
parent | 257a3dfd311a984414d84e8a846be025b219219a (diff) | |
download | Comm-on-net-391cb54e98dc2c1661a4e0ae13739ed710297d02.tar.bz2 Comm-on-net-391cb54e98dc2c1661a4e0ae13739ed710297d02.zip |
Settings
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 5fbeb43..8c78e61 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -33,6 +33,9 @@ CALENDAR_KEYS = ["agenda", "calendar"] MAX_LINKS = None # if None no max TIMEOUT = datetime.timedelta(minutes=settings.CRAWL_TIMEOUT) +NUMBER_PER_PAGE = settings.NUMBER_PER_SESSION +ONLY_FIRST_PAGE = True + class DefaultSpider: name = None @@ -317,10 +320,6 @@ def update_db_result(result_dct, values): result.save() -NUMBER_PER_PAGE = 250 -ONLY_FIRST_PAGE = True - - def launch_crawl(crawl_item, excluded_domains=None): scrap_settings = settings.SCRAPPY_SETTINGS.copy() crawl_item.started = timezone.now() |