summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 16:09:58 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 16:09:58 +0200
commite8068395d642fa36d7f6c53fe8088beabe7c2a31 (patch)
tree337c1e1f89653741a3b9266503a3aae8dc9f127b /commcrawler/scrapy.py
parent0f26c668bcc86d1a4cfc91f1b8154055409e8aab (diff)
downloadComm-on-net-e8068395d642fa36d7f6c53fe8088beabe7c2a31.tar.bz2
Comm-on-net-e8068395d642fa36d7f6c53fe8088beabe7c2a31.zip
Display pre-crawl progression
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index a430f0e..39e3a3e 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -324,10 +324,15 @@ def launch_crawl(crawl_item, excluded_domains=None):
scrap_settings = settings.SCRAPPY_SETTINGS.copy()
process = CrawlerProcess(settings=scrap_settings)
crawl_item.started = timezone.now()
+ crawl_item.pre_crawl_ended = None
+ crawl_item.crawl_ended = None
crawl_item.ended = None
- crawl_item.status = "P"
+ crawl_item.progression = 0
+ crawl_item.status = "W"
crawl_item.save()
for target in crawl_item.targets.all():
+ crawl_item.progression += 1
+ crawl_item.save()
result_dct = {
"crawl_id": crawl_item.pk,
"target_id": target.pk,
@@ -370,6 +375,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
redirect
)
)
+ crawl_item.pre_crawl_ended = timezone.now()
+ crawl_item.status = "P"
+ crawl_item.save()
process.start()
crawl_item.crawl_ended = timezone.now()
crawl_item.status = "M"