diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 17:29:47 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 17:29:47 +0200 |
commit | 257a3dfd311a984414d84e8a846be025b219219a (patch) | |
tree | 89d630f085bed32c691404d5d9be8abdca847ffa | |
parent | 06dbe5e389add5c424b09c21c93fdd7d575fc57b (diff) | |
download | Comm-on-net-257a3dfd311a984414d84e8a846be025b219219a.tar.bz2 Comm-on-net-257a3dfd311a984414d84e8a846be025b219219a.zip |
Stop process on only first page
-rw-r--r-- | commcrawler/scrapy.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index e20dcdc..5fbeb43 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -28,6 +28,7 @@ AUDIO_EXTS = (".aac", ".flac", ".m4a", ".mp3", ".ogg", ".oga", ".opus", ".wma", ".webm") OFFICE_EXTS = (".csv", ".doc", ".docx", ".odt", ".rtf", ".ods", ".xls", ".xlsx") +CALENDAR_KEYS = ["agenda", "calendar"] MAX_LINKS = None # if None no max TIMEOUT = datetime.timedelta(minutes=settings.CRAWL_TIMEOUT) @@ -400,7 +401,7 @@ def launch_crawl(crawl_item, excluded_domains=None): ) ) if has_url_to_process: - process.start(stop_after_crawl=False) + process.start(stop_after_crawl=ONLY_FIRST_PAGE) page += 1 crawl_item.crawl_ended = timezone.now() crawl_item.status = "M" |