summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 17:02:18 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 17:02:18 +0200
commit176917b75c3a71e9ecf955b9fdb6f9d8b1a47c7c (patch)
tree70cb05d7715458162192e9a748d28bd4785e07a9
parente8cbb1cd58853e28ea756ed2ccd77363c54e04d2 (diff)
downloadComm-on-net-176917b75c3a71e9ecf955b9fdb6f9d8b1a47c7c.tar.bz2
Comm-on-net-176917b75c3a71e9ecf955b9fdb6f9d8b1a47c7c.zip
Add download delay after bad certificate
-rw-r--r--commcrawler/scrapy.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index e3782d6..9ff25c9 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -1,4 +1,6 @@
import datetime
+import time
+from random import randint
import requests
import scrapy
@@ -13,10 +15,6 @@ from django.utils import timezone
from . import models
from .utils import clean_url, append_to_results, get_domain
-"""
-CrawlLink
-"""
-
FACEBOOK_DOMAINS = ("facebook.com", "facebook.net", "fbcdn.net")
TWITTER_DOMAINS = ("twitter.com", "twimg.com", "twttr.net", "twttr.com",
"abs.twimg.com")
@@ -347,6 +345,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
else:
update_db_result(result_dct, {"bad_ssl": True})
verify_ssl = False
+ time.sleep(
+ settings.SCRAPPY_SETTINGS["DOWNLOAD_DELAY"] - 1
+ + randint(0, 20)/10)
except requests.exceptions.RequestException:
update_db_result(result_dct, {"is_online": False,
"status": "F"})