summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-07-31 10:01:14 +0200
committerÉtienne Loks <etienne@peacefrogs.net>2019-07-31 10:01:14 +0200
commit345a41be0318da980422ee74a67c363008d4de8d (patch)
tree39e98ec4a30718ebd677f5e6be197d008546d65d
parent56a33989278a8fe2985f0d36d3c589136c1ec30d (diff)
downloadComm-on-net-345a41be0318da980422ee74a67c363008d4de8d.tar.bz2
Comm-on-net-345a41be0318da980422ee74a67c363008d4de8d.zip
Models: crawl results
-rw-r--r--commcrawler/admin.py27
-rw-r--r--commcrawler/migrations/0001_initial.py85
-rw-r--r--commcrawler/models.py74
3 files changed, 179 insertions, 7 deletions
diff --git a/commcrawler/admin.py b/commcrawler/admin.py
index f4df5e2..7ff1eed 100644
--- a/commcrawler/admin.py
+++ b/commcrawler/admin.py
@@ -122,3 +122,30 @@ class CrawlAdmin(admin.ModelAdmin):
admin_site.register(models.Crawl, CrawlAdmin)
+
+
+class CrawlResultAdmin(admin.ModelAdmin):
+ model = models.Crawl
+ list_display = ("target", "crawl", "is_online")
+ list_filter = ("crawl",)
+
+
+admin_site.register(models.CrawlResult, CrawlResultAdmin)
+
+
+class CrawlLinkAdmin(admin.ModelAdmin):
+ model = models.Crawl
+ list_display = ("result", "link")
+
+
+admin_site.register(models.CrawlLink, CrawlLinkAdmin)
+
+
+class CrawlRelationAdmin(admin.ModelAdmin):
+ model = models.CrawlRelation
+ list_display = ("crawl", "source", "destination", "number")
+ list_filter = ("crawl",)
+ search_fields = ["source__name", "destination__name"]
+
+
+admin_site.register(models.CrawlRelation, CrawlRelationAdmin)
diff --git a/commcrawler/migrations/0001_initial.py b/commcrawler/migrations/0001_initial.py
index 2fbb9fe..1468584 100644
--- a/commcrawler/migrations/0001_initial.py
+++ b/commcrawler/migrations/0001_initial.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Generated by Django 1.11 on 2019-07-30 15:47
+# Generated by Django 1.11 on 2019-07-31 07:48
from __future__ import unicode_literals
import datetime
@@ -24,9 +24,9 @@ class Migration(migrations.Migration):
('reference', models.CharField(blank=True, help_text='For instance, INSEE code for towns', max_length=100, null=True, verbose_name='Reference')),
],
options={
+ 'verbose_name': 'Area',
'verbose_name_plural': 'Areas',
'ordering': ('name',),
- 'verbose_name': 'Area',
},
),
migrations.CreateModel(
@@ -36,9 +36,9 @@ class Migration(migrations.Migration):
('name', models.CharField(max_length=200, verbose_name='Name')),
],
options={
+ 'verbose_name': 'Area type',
'verbose_name_plural': 'Area types',
'ordering': ('name',),
- 'verbose_name': 'Area type',
},
),
migrations.CreateModel(
@@ -52,9 +52,60 @@ class Migration(migrations.Migration):
('status', models.CharField(choices=[('C', 'Created'), ('P', 'In progress'), ('F', 'Finished')], default='C', max_length=1)),
],
options={
+ 'verbose_name': 'Crawl',
'verbose_name_plural': 'Crawls',
'ordering': ('created', 'name'),
- 'verbose_name': 'Crawl',
+ },
+ ),
+ migrations.CreateModel(
+ name='CrawlLink',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('link', models.URLField(verbose_name='Link')),
+ ],
+ options={
+ 'verbose_name': 'Crawl link',
+ 'verbose_name_plural': 'Crawl links',
+ },
+ ),
+ migrations.CreateModel(
+ name='CrawlRelation',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('number', models.IntegerField(default=1, verbose_name='Number')),
+ ('crawl', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.Crawl', verbose_name='Crawl')),
+ ],
+ options={
+ 'verbose_name': 'Crawl relation',
+ 'verbose_name_plural': 'Crawl relations',
+ },
+ ),
+ migrations.CreateModel(
+ name='CrawlResult',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('nb_external_link', models.IntegerField(default=0, verbose_name='Number of external links')),
+ ('nb_internal_link', models.IntegerField(default=0, verbose_name='Number of internal links')),
+ ('nb_images', models.IntegerField(default=0, verbose_name='Number of images')),
+ ('nb_facebook', models.IntegerField(default=0, verbose_name='Number of Facebook links')),
+ ('nb_twitter', models.IntegerField(default=0, verbose_name='Number of Twitter links')),
+ ('nb_instagram', models.IntegerField(default=0, verbose_name='Number of Instagram links')),
+ ('nb_youtube', models.IntegerField(default=0, verbose_name='Number of Youtube links')),
+ ('nb_dailymotion', models.IntegerField(default=0, verbose_name='Number of Dailymotion links')),
+ ('nb_vimeo', models.IntegerField(default=0, verbose_name='Number of Vimeo links')),
+ ('nb_video', models.IntegerField(default=0, verbose_name='Number of videos')),
+ ('nb_audio', models.IntegerField(default=0, verbose_name='Number of audios')),
+ ('nb_internal_pdf', models.IntegerField(default=0, verbose_name='Number of internal PDF')),
+ ('nb_external_pdf', models.IntegerField(default=0, verbose_name='Number of external PDF')),
+ ('nb_internal_office', models.IntegerField(default=0, verbose_name='Number of internal PDF')),
+ ('nb_external_office', models.IntegerField(default=0, verbose_name='Number of external PDF')),
+ ('is_online', models.BooleanField(default=False, verbose_name='Website is online')),
+ ('redirection', models.URLField(blank=True, null=True, verbose_name='Redirection')),
+ ('crawl', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.Crawl', verbose_name='Crawl')),
+ ],
+ options={
+ 'verbose_name': 'Crawl result',
+ 'verbose_name_plural': 'Crawl results',
},
),
migrations.CreateModel(
@@ -66,9 +117,9 @@ class Migration(migrations.Migration):
('area', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='commcrawler.Area', verbose_name='Area')),
],
options={
+ 'verbose_name': 'Organization',
'verbose_name_plural': 'Organizations',
'ordering': ('name',),
- 'verbose_name': 'Organization',
},
),
migrations.CreateModel(
@@ -79,9 +130,9 @@ class Migration(migrations.Migration):
('parent', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='commcrawler.OrganizationType', verbose_name='Parent')),
],
options={
+ 'verbose_name': 'Organization type',
'verbose_name_plural': 'Organization types',
'ordering': ('parent__name', 'name'),
- 'verbose_name': 'Organization type',
},
),
migrations.CreateModel(
@@ -93,9 +144,9 @@ class Migration(migrations.Migration):
('organization', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='commcrawler.Organization', verbose_name='Organization')),
],
options={
+ 'verbose_name': 'Target',
'verbose_name_plural': 'Targets',
'ordering': ('name',),
- 'verbose_name': 'Target',
},
),
migrations.AddField(
@@ -104,6 +155,26 @@ class Migration(migrations.Migration):
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.OrganizationType', verbose_name='Type'),
),
migrations.AddField(
+ model_name='crawlresult',
+ name='target',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.Target', verbose_name='Target'),
+ ),
+ migrations.AddField(
+ model_name='crawlrelation',
+ name='destination',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='relation_destination', to='commcrawler.Target', verbose_name='Destination'),
+ ),
+ migrations.AddField(
+ model_name='crawlrelation',
+ name='source',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='relation_source', to='commcrawler.Target', verbose_name='Source'),
+ ),
+ migrations.AddField(
+ model_name='crawllink',
+ name='result',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.CrawlResult', verbose_name='Result'),
+ ),
+ migrations.AddField(
model_name='crawl',
name='targets',
field=models.ManyToManyField(blank=True, to='commcrawler.Target'),
diff --git a/commcrawler/models.py b/commcrawler/models.py
index f62157e..c15ce7e 100644
--- a/commcrawler/models.py
+++ b/commcrawler/models.py
@@ -121,3 +121,77 @@ class Crawl(models.Model):
@property
def target_nb(self):
return self.targets.count()
+
+
+class CrawlResult(models.Model):
+ crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
+ target = models.ForeignKey(Target, verbose_name=_("Target"))
+ nb_external_link = models.IntegerField(
+ verbose_name=_("Number of external links"), default=0)
+ nb_internal_link = models.IntegerField(
+ verbose_name=_("Number of internal links"), default=0)
+ nb_images = models.IntegerField(
+ verbose_name=_("Number of images"), default=0)
+ nb_facebook = models.IntegerField(
+ verbose_name=_("Number of Facebook links"), default=0)
+ nb_twitter = models.IntegerField(
+ verbose_name=_("Number of Twitter links"), default=0)
+ nb_instagram = models.IntegerField(
+ verbose_name=_("Number of Instagram links"), default=0)
+ nb_youtube = models.IntegerField(
+ verbose_name=_("Number of Youtube links"), default=0)
+ nb_dailymotion = models.IntegerField(
+ verbose_name=_("Number of Dailymotion links"), default=0)
+ nb_vimeo = models.IntegerField(
+ verbose_name=_("Number of Vimeo links"), default=0)
+ nb_video = models.IntegerField(
+ verbose_name=_("Number of videos"), default=0)
+ nb_audio = models.IntegerField(
+ verbose_name=_("Number of audios"), default=0)
+ nb_internal_pdf = models.IntegerField(
+ verbose_name=_("Number of internal PDF"), default=0)
+ nb_external_pdf = models.IntegerField(
+ verbose_name=_("Number of external PDF"), default=0)
+ nb_internal_office = models.IntegerField(
+ verbose_name=_("Number of internal PDF"), default=0)
+ nb_external_office = models.IntegerField(
+ verbose_name=_("Number of external PDF"), default=0)
+ is_online = models.BooleanField(
+ verbose_name=_("Website is online"), default=False)
+ redirection = models.URLField(
+ verbose_name=_("Redirection"), blank=True, null=True)
+
+ class Meta:
+ verbose_name = _("Crawl result")
+ verbose_name_plural = _("Crawl results")
+
+ def __str__(self):
+ return "{} - {}".format(self.crawl, self.target)
+
+
+class CrawlLink(models.Model):
+ result = models.ForeignKey(CrawlResult, verbose_name=_("Result"))
+ link = models.URLField(verbose_name=_("Link"))
+
+ class Meta:
+ verbose_name = _("Crawl link")
+ verbose_name_plural = _("Crawl links")
+
+ def __str__(self):
+ return "{} - {}".format(self.result, self.link)
+
+
+class CrawlRelation(models.Model):
+ crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
+ source = models.ForeignKey(Target, verbose_name=_("Source"),
+ related_name="relation_source")
+ destination = models.ForeignKey(Target, verbose_name=_("Destination"),
+ related_name="relation_destination")
+ number = models.IntegerField(verbose_name=_("Number"), default=1)
+
+ class Meta:
+ verbose_name = _("Crawl relation")
+ verbose_name_plural = _("Crawl relations")
+
+ def __str__(self):
+ return "{} - {}".format(self.crawl, self.source, self.destination)