From 257a3dfd311a984414d84e8a846be025b219219a Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Mon, 12 Aug 2019 17:29:47 +0200 Subject: Stop process on only first page --- commcrawler/scrapy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index e20dcdc..5fbeb43 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -28,6 +28,7 @@ AUDIO_EXTS = (".aac", ".flac", ".m4a", ".mp3", ".ogg", ".oga", ".opus", ".wma", ".webm") OFFICE_EXTS = (".csv", ".doc", ".docx", ".odt", ".rtf", ".ods", ".xls", ".xlsx") +CALENDAR_KEYS = ["agenda", "calendar"] MAX_LINKS = None # if None no max TIMEOUT = datetime.timedelta(minutes=settings.CRAWL_TIMEOUT) @@ -400,7 +401,7 @@ def launch_crawl(crawl_item, excluded_domains=None): ) ) if has_url_to_process: - process.start(stop_after_crawl=False) + process.start(stop_after_crawl=ONLY_FIRST_PAGE) page += 1 crawl_item.crawl_ended = timezone.now() crawl_item.status = "M" -- cgit v1.2.3