summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index bdd28c3..767827a 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -5,7 +5,7 @@ import requests
import scrapy
from scrapy.crawler import CrawlerProcess
-from scrapy.exceptions import NotSupported
+from scrapy.exceptions import NotSupported, CloseSpider
from scrapy.linkextractors import LinkExtractor
from django.conf import settings
@@ -129,15 +129,15 @@ class DefaultSpider:
pk=self.crawl_result.pk)
result.status = "T"
result.save()
- self.is_timeout = True
- return True
+ self.is_timeout = True
+ raise CloseSpider('timeout')
def parse(self, response):
result = {
"url": response.url,
}
if self.is_timeout or self.timeout():
- return []
+ raise CloseSpider('timeout')
for domain in self.excluded_domains:
if domain in response.url:
result["is_online"] = False