1 files changed, 88 insertions, 0 deletions
diff --git a/commonnet/scrapy_setting.py b/commonnet/scrapy_setting.py
new file mode 100644
index 0000000..6330705
--- /dev/null
+++ b/commonnet/scrapy_setting.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+SCRAPPY_SETTINGS = {
+    # Scrapy settings for scraper project
+    #
+    # For simplicity, this file contains only settings considered important or
+    # commonly used. You can find more settings consulting the documentation:
+    #
+    #     https://doc.scrapy.org/en/latest/topics/settings.html
+    #     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+    #     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+    "BOT_NAME": 'commonnet-scraper',
+
+    # Obey robots.txt rules
+    "ROBOTSTXT_OBEY": True,
+
+    # Configure maximum concurrent requests performed by Scrapy (default: 16)
+    "CONCURRENT_REQUESTS": 16,
+
+    # Configure a delay for requests for the same website (default: 0)
+    # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
+    # See also autothrottle settings and docs
+    "DOWNLOAD_DELAY": 3,
+
+    # increase or decrease it with CPU usage
+    "CONCURRENT_REQUESTS_PER_DOMAIN": 100,
+
+    # Disable Telnet Console (enabled by default)
+    "TELNETCONSOLE_ENABLED": False,
+
+    # Override the default request headers:
+    #DEFAULT_REQUEST_HEADERS = {
+    #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    #   'Accept-Language': 'en',
+    #}
+
+    # Enable or disable spider middlewares
+    # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+    #SPIDER_MIDDLEWARES = {
+    #    'scraper.middlewares.ScraperSpiderMiddleware': 543,
+    #}
+
+    # Enable or disable downloader middlewares
+    # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+    #DOWNLOADER_MIDDLEWARES = {
+    #    'scraper.middlewares.ScraperDownloaderMiddleware': 543,
+    #}
+
+    # Enable or disable extensions
+    # See https://doc.scrapy.org/en/latest/topics/extensions.html
+    #EXTENSIONS = {
+    #    'scrapy.extensions.telnet.TelnetConsole': None,
+    #}
+
+    # Configure item pipelines
+    # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+    #ITEM_PIPELINES = {
+    #    'scraper.pipelines.ScraperPipeline': 300,
+    #}
+
+    # Enable and configure the AutoThrottle extension (disabled by default)
+    # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
+    "AUTOTHROTTLE_ENABLED": True,
+
+    # The initial download delay
+    #AUTOTHROTTLE_START_DELAY = 5
+    # The maximum download delay to be set in case of high latencies
+    #AUTOTHROTTLE_MAX_DELAY = 60
+    # The average number of requests Scrapy should be sending in parallel to
+    # each remote server
+    #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+    # Enable showing throttling stats for every response received:
+    #AUTOTHROTTLE_DEBUG = False
+
+    # better queue for crawling many different domains in parallel
+    "SCHEDULER_PRIORITY_QUEUE": 'scrapy.pqueues.DownloaderAwarePriorityQueue',
+
+    # Enable and configure HTTP caching (disabled by default)
+    # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+    #HTTPCACHE_ENABLED = True
+    #HTTPCACHE_EXPIRATION_SECS = 0
+    #HTTPCACHE_DIR = 'httpcache'
+    #HTTPCACHE_IGNORE_HTTP_CODES = []
+    #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+    "COOKIES_ENABLED": False,
+}
+