summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index e3782d6..9ff25c9 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -1,4 +1,6 @@
import datetime
+import time
+from random import randint
import requests
import scrapy
@@ -13,10 +15,6 @@ from django.utils import timezone
from . import models
from .utils import clean_url, append_to_results, get_domain
-"""
-CrawlLink
-"""
-
FACEBOOK_DOMAINS = ("facebook.com", "facebook.net", "fbcdn.net")
TWITTER_DOMAINS = ("twitter.com", "twimg.com", "twttr.net", "twttr.com",
"abs.twimg.com")
@@ -347,6 +345,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
else:
update_db_result(result_dct, {"bad_ssl": True})
verify_ssl = False
+ time.sleep(
+ settings.SCRAPPY_SETTINGS["DOWNLOAD_DELAY"] - 1
+ + randint(0, 20)/10)
except requests.exceptions.RequestException:
update_db_result(result_dct, {"is_online": False,
"status": "F"})