summaryrefslogtreecommitdiff
path: root/commcrawler
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 00:15:26 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 00:15:26 +0200
commitbcacb3dcae815230c106cd773130f7b0ea5f720d (patch)
tree3dcb99ff067740d04689f7baa40a58b9289b98d9 /commcrawler
parent2d946cafdf247245bb58c41dbf4e8de699f950ba (diff)
downloadComm-on-net-bcacb3dcae815230c106cd773130f7b0ea5f720d.tar.bz2
Comm-on-net-bcacb3dcae815230c106cd773130f7b0ea5f720d.zip
Display progress
Diffstat (limited to 'commcrawler')
-rw-r--r--commcrawler/admin.py4
-rw-r--r--commcrawler/models.py14
-rw-r--r--commcrawler/scrapy.py7
3 files changed, 22 insertions, 3 deletions
diff --git a/commcrawler/admin.py b/commcrawler/admin.py
index 71a44a5..fcd1a1b 100644
--- a/commcrawler/admin.py
+++ b/commcrawler/admin.py
@@ -16,9 +16,9 @@ from commcrawler import models
class CrawlAdmin(admin.ModelAdmin):
model = models.Crawl
list_display = ("name", "status", "target_nb", "created", "started",
- "ended")
+ "ended", "progress")
list_filter = ("status",)
- readonly_fields = ("status", "created", "started", "ended")
+ readonly_fields = ("created", "started", "ended")
form = make_ajax_form(model, {'targets': 'target'})
diff --git a/commcrawler/models.py b/commcrawler/models.py
index e715408..f526fb5 100644
--- a/commcrawler/models.py
+++ b/commcrawler/models.py
@@ -50,6 +50,17 @@ class Crawl(models.Model):
def target_nb(self):
return self.targets.count()
+ @property
+ def progress(self):
+ if self.status == "P":
+ todo = self.target_nb
+ if todo == 0:
+ return "-"
+ done = self.results.filter(status__in=("T", "F")).count()
+ percent = int(done / todo * 100)
+ return "{} % ({}/{})".format(percent, done, todo)
+ return "-"
+
class CrawlResult(models.Model):
STATUS = (
@@ -57,7 +68,8 @@ class CrawlResult(models.Model):
('T', _("Time out")),
('F', _("Finished"))
)
- crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
+ crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"),
+ related_name="results")
target = models.ForeignKey(Target, verbose_name=_("Target"))
started = models.DateTimeField(
verbose_name=_("Start date"), default=datetime.datetime.now)
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index d24c3c2..1fabafe 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -298,6 +298,10 @@ def create_spider(name, urls, crawl, target, excluded_domains=None):
def launch_crawl(crawl_item, excluded_domains=None):
scrap_settings = settings.SCRAPPY_SETTINGS.copy()
process = CrawlerProcess(settings=scrap_settings)
+ crawl_item.started = timezone.now()
+ crawl_item.ended = None
+ crawl_item.status = "P"
+ crawl_item.save()
for target in crawl_item.targets.all():
process.crawl(
create_spider(
@@ -308,3 +312,6 @@ def launch_crawl(crawl_item, excluded_domains=None):
)
)
process.start()
+ crawl_item.ended = timezone.now()
+ crawl_item.status = "F"
+ crawl_item.save()