diff options
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index e20dcdc..5fbeb43 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -28,6 +28,7 @@ AUDIO_EXTS = (".aac", ".flac", ".m4a", ".mp3", ".ogg", ".oga", ".opus", ".wma", ".webm") OFFICE_EXTS = (".csv", ".doc", ".docx", ".odt", ".rtf", ".ods", ".xls", ".xlsx") +CALENDAR_KEYS = ["agenda", "calendar"] MAX_LINKS = None # if None no max TIMEOUT = datetime.timedelta(minutes=settings.CRAWL_TIMEOUT) @@ -400,7 +401,7 @@ def launch_crawl(crawl_item, excluded_domains=None): ) ) if has_url_to_process: - process.start(stop_after_crawl=False) + process.start(stop_after_crawl=ONLY_FIRST_PAGE) page += 1 crawl_item.crawl_ended = timezone.now() crawl_item.status = "M" |