From bcacb3dcae815230c106cd773130f7b0ea5f720d Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Fri, 9 Aug 2019 00:15:26 +0200 Subject: Display progress --- commcrawler/admin.py | 4 ++-- commcrawler/models.py | 14 +++++++++++++- commcrawler/scrapy.py | 7 +++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/commcrawler/admin.py b/commcrawler/admin.py index 71a44a5..fcd1a1b 100644 --- a/commcrawler/admin.py +++ b/commcrawler/admin.py @@ -16,9 +16,9 @@ from commcrawler import models class CrawlAdmin(admin.ModelAdmin): model = models.Crawl list_display = ("name", "status", "target_nb", "created", "started", - "ended") + "ended", "progress") list_filter = ("status",) - readonly_fields = ("status", "created", "started", "ended") + readonly_fields = ("created", "started", "ended") form = make_ajax_form(model, {'targets': 'target'}) diff --git a/commcrawler/models.py b/commcrawler/models.py index e715408..f526fb5 100644 --- a/commcrawler/models.py +++ b/commcrawler/models.py @@ -50,6 +50,17 @@ class Crawl(models.Model): def target_nb(self): return self.targets.count() + @property + def progress(self): + if self.status == "P": + todo = self.target_nb + if todo == 0: + return "-" + done = self.results.filter(status__in=("T", "F")).count() + percent = int(done / todo * 100) + return "{} % ({}/{})".format(percent, done, todo) + return "-" + class CrawlResult(models.Model): STATUS = ( @@ -57,7 +68,8 @@ class CrawlResult(models.Model): ('T', _("Time out")), ('F', _("Finished")) ) - crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl")) + crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"), + related_name="results") target = models.ForeignKey(Target, verbose_name=_("Target")) started = models.DateTimeField( verbose_name=_("Start date"), default=datetime.datetime.now) diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index d24c3c2..1fabafe 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -298,6 +298,10 @@ def create_spider(name, urls, crawl, target, excluded_domains=None): def launch_crawl(crawl_item, excluded_domains=None): scrap_settings = settings.SCRAPPY_SETTINGS.copy() process = CrawlerProcess(settings=scrap_settings) + crawl_item.started = timezone.now() + crawl_item.ended = None + crawl_item.status = "P" + crawl_item.save() for target in crawl_item.targets.all(): process.crawl( create_spider( @@ -308,3 +312,6 @@ def launch_crawl(crawl_item, excluded_domains=None): ) ) process.start() + crawl_item.ended = timezone.now() + crawl_item.status = "F" + crawl_item.save() -- cgit v1.2.3