diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-09 17:02:18 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-09 17:02:18 +0200 |
commit | 176917b75c3a71e9ecf955b9fdb6f9d8b1a47c7c (patch) | |
tree | 70cb05d7715458162192e9a748d28bd4785e07a9 | |
parent | e8cbb1cd58853e28ea756ed2ccd77363c54e04d2 (diff) | |
download | Comm-on-net-176917b75c3a71e9ecf955b9fdb6f9d8b1a47c7c.tar.bz2 Comm-on-net-176917b75c3a71e9ecf955b9fdb6f9d8b1a47c7c.zip |
Add download delay after bad certificate
-rw-r--r-- | commcrawler/scrapy.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index e3782d6..9ff25c9 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -1,4 +1,6 @@ import datetime +import time +from random import randint import requests import scrapy @@ -13,10 +15,6 @@ from django.utils import timezone from . import models from .utils import clean_url, append_to_results, get_domain -""" -CrawlLink -""" - FACEBOOK_DOMAINS = ("facebook.com", "facebook.net", "fbcdn.net") TWITTER_DOMAINS = ("twitter.com", "twimg.com", "twttr.net", "twttr.com", "abs.twimg.com") @@ -347,6 +345,9 @@ def launch_crawl(crawl_item, excluded_domains=None): else: update_db_result(result_dct, {"bad_ssl": True}) verify_ssl = False + time.sleep( + settings.SCRAPPY_SETTINGS["DOWNLOAD_DELAY"] - 1 + + randint(0, 20)/10) except requests.exceptions.RequestException: update_db_result(result_dct, {"is_online": False, "status": "F"}) |