summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 16:09:58 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-09 16:09:58 +0200
commite8068395d642fa36d7f6c53fe8088beabe7c2a31 (patch)
tree337c1e1f89653741a3b9266503a3aae8dc9f127b
parent0f26c668bcc86d1a4cfc91f1b8154055409e8aab (diff)
downloadComm-on-net-e8068395d642fa36d7f6c53fe8088beabe7c2a31.tar.bz2
Comm-on-net-e8068395d642fa36d7f6c53fe8088beabe7c2a31.zip
Display pre-crawl progression
-rw-r--r--commcrawler/admin.py5
-rw-r--r--commcrawler/migrations/0003_auto_20190809_1607.py25
-rw-r--r--commcrawler/models.py5
-rw-r--r--commcrawler/scrapy.py10
4 files changed, 41 insertions, 4 deletions
diff --git a/commcrawler/admin.py b/commcrawler/admin.py
index 46c5aab..6b06228 100644
--- a/commcrawler/admin.py
+++ b/commcrawler/admin.py
@@ -16,9 +16,10 @@ from commcrawler import models
class CrawlAdmin(admin.ModelAdmin):
model = models.Crawl
list_display = ("name", "status", "target_nb", "created", "started",
- "crawl_ended", "ended", "progress")
+ "pre_crawl_ended", "crawl_ended", "ended", "progress")
list_filter = ("status",)
- exclude = ("progression","created", "started", "crawl_ended", "ended")
+ exclude = ("progression", "created", "started", "pre_crawl_ended",
+ "crawl_ended", "ended")
readonly_fields = ()
form = make_ajax_form(model, {'targets': 'target'})
diff --git a/commcrawler/migrations/0003_auto_20190809_1607.py b/commcrawler/migrations/0003_auto_20190809_1607.py
new file mode 100644
index 0000000..3018fc2
--- /dev/null
+++ b/commcrawler/migrations/0003_auto_20190809_1607.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2019-08-09 14:07
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('commcrawler', '0002_crawlresult_bad_ssl'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='crawl',
+ name='pre_crawl_ended',
+ field=models.DateTimeField(blank=True, null=True, verbose_name='Pre-crawl end'),
+ ),
+ migrations.AlterField(
+ model_name='crawl',
+ name='status',
+ field=models.CharField(choices=[('C', 'Created'), ('A', 'Planned'), ('W', 'Pre-crawl in progress'), ('P', 'Crawl in progress'), ('M', 'Match link in progress'), ('F', 'Finished')], default='C', max_length=1, verbose_name='Status'),
+ ),
+ ]
diff --git a/commcrawler/models.py b/commcrawler/models.py
index da578bb..ce3deb0 100644
--- a/commcrawler/models.py
+++ b/commcrawler/models.py
@@ -28,6 +28,7 @@ class Crawl(models.Model):
STATUS = (
('C', _("Created")),
('A', _("Planned")),
+ ('W', _("Pre-crawl in progress")),
('P', _("Crawl in progress")),
('M', _("Match link in progress")),
('F', _("Finished"))
@@ -37,6 +38,8 @@ class Crawl(models.Model):
verbose_name=_("Creation"), default=datetime.datetime.now)
started = models.DateTimeField(
verbose_name=_("Started"), blank=True, null=True)
+ pre_crawl_ended = models.DateTimeField(
+ verbose_name=_("Pre-crawl end"), blank=True, null=True)
crawl_ended = models.DateTimeField(
verbose_name=_("Crawl end"), blank=True, null=True)
ended = models.DateTimeField(
@@ -69,7 +72,7 @@ class Crawl(models.Model):
done = self.results.filter(status__in=("T", "F")).count()
percent = int(done / todo * 100)
return "{} % ({}/{})".format(percent, done, todo)
- if self.status == "M":
+ if self.status in ("W", "M"):
done = self.progression or 0
percent = int(done / todo * 100)
return "{} % ({}/{})".format(percent, done, todo)
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index a430f0e..39e3a3e 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -324,10 +324,15 @@ def launch_crawl(crawl_item, excluded_domains=None):
scrap_settings = settings.SCRAPPY_SETTINGS.copy()
process = CrawlerProcess(settings=scrap_settings)
crawl_item.started = timezone.now()
+ crawl_item.pre_crawl_ended = None
+ crawl_item.crawl_ended = None
crawl_item.ended = None
- crawl_item.status = "P"
+ crawl_item.progression = 0
+ crawl_item.status = "W"
crawl_item.save()
for target in crawl_item.targets.all():
+ crawl_item.progression += 1
+ crawl_item.save()
result_dct = {
"crawl_id": crawl_item.pk,
"target_id": target.pk,
@@ -370,6 +375,9 @@ def launch_crawl(crawl_item, excluded_domains=None):
redirect
)
)
+ crawl_item.pre_crawl_ended = timezone.now()
+ crawl_item.status = "P"
+ crawl_item.save()
process.start()
crawl_item.crawl_ended = timezone.now()
crawl_item.status = "M"