diff options
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index e3782d6..9ff25c9 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -1,4 +1,6 @@ import datetime +import time +from random import randint import requests import scrapy @@ -13,10 +15,6 @@ from django.utils import timezone from . import models from .utils import clean_url, append_to_results, get_domain -""" -CrawlLink -""" - FACEBOOK_DOMAINS = ("facebook.com", "facebook.net", "fbcdn.net") TWITTER_DOMAINS = ("twitter.com", "twimg.com", "twttr.net", "twttr.com", "abs.twimg.com") @@ -347,6 +345,9 @@ def launch_crawl(crawl_item, excluded_domains=None): else: update_db_result(result_dct, {"bad_ssl": True}) verify_ssl = False + time.sleep( + settings.SCRAPPY_SETTINGS["DOWNLOAD_DELAY"] - 1 + + randint(0, 20)/10) except requests.exceptions.RequestException: update_db_result(result_dct, {"is_online": False, "status": "F"}) |