diff options
-rw-r--r-- | commcrawler/admin.py | 27 | ||||
-rw-r--r-- | commcrawler/migrations/0001_initial.py | 85 | ||||
-rw-r--r-- | commcrawler/models.py | 74 |
3 files changed, 179 insertions, 7 deletions
diff --git a/commcrawler/admin.py b/commcrawler/admin.py index f4df5e2..7ff1eed 100644 --- a/commcrawler/admin.py +++ b/commcrawler/admin.py @@ -122,3 +122,30 @@ class CrawlAdmin(admin.ModelAdmin): admin_site.register(models.Crawl, CrawlAdmin) + + +class CrawlResultAdmin(admin.ModelAdmin): + model = models.Crawl + list_display = ("target", "crawl", "is_online") + list_filter = ("crawl",) + + +admin_site.register(models.CrawlResult, CrawlResultAdmin) + + +class CrawlLinkAdmin(admin.ModelAdmin): + model = models.Crawl + list_display = ("result", "link") + + +admin_site.register(models.CrawlLink, CrawlLinkAdmin) + + +class CrawlRelationAdmin(admin.ModelAdmin): + model = models.CrawlRelation + list_display = ("crawl", "source", "destination", "number") + list_filter = ("crawl",) + search_fields = ["source__name", "destination__name"] + + +admin_site.register(models.CrawlRelation, CrawlRelationAdmin) diff --git a/commcrawler/migrations/0001_initial.py b/commcrawler/migrations/0001_initial.py index 2fbb9fe..1468584 100644 --- a/commcrawler/migrations/0001_initial.py +++ b/commcrawler/migrations/0001_initial.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11 on 2019-07-30 15:47 +# Generated by Django 1.11 on 2019-07-31 07:48 from __future__ import unicode_literals import datetime @@ -24,9 +24,9 @@ class Migration(migrations.Migration): ('reference', models.CharField(blank=True, help_text='For instance, INSEE code for towns', max_length=100, null=True, verbose_name='Reference')), ], options={ + 'verbose_name': 'Area', 'verbose_name_plural': 'Areas', 'ordering': ('name',), - 'verbose_name': 'Area', }, ), migrations.CreateModel( @@ -36,9 +36,9 @@ class Migration(migrations.Migration): ('name', models.CharField(max_length=200, verbose_name='Name')), ], options={ + 'verbose_name': 'Area type', 'verbose_name_plural': 'Area types', 'ordering': ('name',), - 'verbose_name': 'Area type', }, ), migrations.CreateModel( @@ -52,9 +52,60 @@ class Migration(migrations.Migration): ('status', models.CharField(choices=[('C', 'Created'), ('P', 'In progress'), ('F', 'Finished')], default='C', max_length=1)), ], options={ + 'verbose_name': 'Crawl', 'verbose_name_plural': 'Crawls', 'ordering': ('created', 'name'), - 'verbose_name': 'Crawl', + }, + ), + migrations.CreateModel( + name='CrawlLink', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('link', models.URLField(verbose_name='Link')), + ], + options={ + 'verbose_name': 'Crawl link', + 'verbose_name_plural': 'Crawl links', + }, + ), + migrations.CreateModel( + name='CrawlRelation', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('number', models.IntegerField(default=1, verbose_name='Number')), + ('crawl', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.Crawl', verbose_name='Crawl')), + ], + options={ + 'verbose_name': 'Crawl relation', + 'verbose_name_plural': 'Crawl relations', + }, + ), + migrations.CreateModel( + name='CrawlResult', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('nb_external_link', models.IntegerField(default=0, verbose_name='Number of external links')), + ('nb_internal_link', models.IntegerField(default=0, verbose_name='Number of internal links')), + ('nb_images', models.IntegerField(default=0, verbose_name='Number of images')), + ('nb_facebook', models.IntegerField(default=0, verbose_name='Number of Facebook links')), + ('nb_twitter', models.IntegerField(default=0, verbose_name='Number of Twitter links')), + ('nb_instagram', models.IntegerField(default=0, verbose_name='Number of Instagram links')), + ('nb_youtube', models.IntegerField(default=0, verbose_name='Number of Youtube links')), + ('nb_dailymotion', models.IntegerField(default=0, verbose_name='Number of Dailymotion links')), + ('nb_vimeo', models.IntegerField(default=0, verbose_name='Number of Vimeo links')), + ('nb_video', models.IntegerField(default=0, verbose_name='Number of videos')), + ('nb_audio', models.IntegerField(default=0, verbose_name='Number of audios')), + ('nb_internal_pdf', models.IntegerField(default=0, verbose_name='Number of internal PDF')), + ('nb_external_pdf', models.IntegerField(default=0, verbose_name='Number of external PDF')), + ('nb_internal_office', models.IntegerField(default=0, verbose_name='Number of internal PDF')), + ('nb_external_office', models.IntegerField(default=0, verbose_name='Number of external PDF')), + ('is_online', models.BooleanField(default=False, verbose_name='Website is online')), + ('redirection', models.URLField(blank=True, null=True, verbose_name='Redirection')), + ('crawl', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.Crawl', verbose_name='Crawl')), + ], + options={ + 'verbose_name': 'Crawl result', + 'verbose_name_plural': 'Crawl results', }, ), migrations.CreateModel( @@ -66,9 +117,9 @@ class Migration(migrations.Migration): ('area', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='commcrawler.Area', verbose_name='Area')), ], options={ + 'verbose_name': 'Organization', 'verbose_name_plural': 'Organizations', 'ordering': ('name',), - 'verbose_name': 'Organization', }, ), migrations.CreateModel( @@ -79,9 +130,9 @@ class Migration(migrations.Migration): ('parent', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='commcrawler.OrganizationType', verbose_name='Parent')), ], options={ + 'verbose_name': 'Organization type', 'verbose_name_plural': 'Organization types', 'ordering': ('parent__name', 'name'), - 'verbose_name': 'Organization type', }, ), migrations.CreateModel( @@ -93,9 +144,9 @@ class Migration(migrations.Migration): ('organization', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='commcrawler.Organization', verbose_name='Organization')), ], options={ + 'verbose_name': 'Target', 'verbose_name_plural': 'Targets', 'ordering': ('name',), - 'verbose_name': 'Target', }, ), migrations.AddField( @@ -104,6 +155,26 @@ class Migration(migrations.Migration): field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.OrganizationType', verbose_name='Type'), ), migrations.AddField( + model_name='crawlresult', + name='target', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.Target', verbose_name='Target'), + ), + migrations.AddField( + model_name='crawlrelation', + name='destination', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='relation_destination', to='commcrawler.Target', verbose_name='Destination'), + ), + migrations.AddField( + model_name='crawlrelation', + name='source', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='relation_source', to='commcrawler.Target', verbose_name='Source'), + ), + migrations.AddField( + model_name='crawllink', + name='result', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='commcrawler.CrawlResult', verbose_name='Result'), + ), + migrations.AddField( model_name='crawl', name='targets', field=models.ManyToManyField(blank=True, to='commcrawler.Target'), diff --git a/commcrawler/models.py b/commcrawler/models.py index f62157e..c15ce7e 100644 --- a/commcrawler/models.py +++ b/commcrawler/models.py @@ -121,3 +121,77 @@ class Crawl(models.Model): @property def target_nb(self): return self.targets.count() + + +class CrawlResult(models.Model): + crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl")) + target = models.ForeignKey(Target, verbose_name=_("Target")) + nb_external_link = models.IntegerField( + verbose_name=_("Number of external links"), default=0) + nb_internal_link = models.IntegerField( + verbose_name=_("Number of internal links"), default=0) + nb_images = models.IntegerField( + verbose_name=_("Number of images"), default=0) + nb_facebook = models.IntegerField( + verbose_name=_("Number of Facebook links"), default=0) + nb_twitter = models.IntegerField( + verbose_name=_("Number of Twitter links"), default=0) + nb_instagram = models.IntegerField( + verbose_name=_("Number of Instagram links"), default=0) + nb_youtube = models.IntegerField( + verbose_name=_("Number of Youtube links"), default=0) + nb_dailymotion = models.IntegerField( + verbose_name=_("Number of Dailymotion links"), default=0) + nb_vimeo = models.IntegerField( + verbose_name=_("Number of Vimeo links"), default=0) + nb_video = models.IntegerField( + verbose_name=_("Number of videos"), default=0) + nb_audio = models.IntegerField( + verbose_name=_("Number of audios"), default=0) + nb_internal_pdf = models.IntegerField( + verbose_name=_("Number of internal PDF"), default=0) + nb_external_pdf = models.IntegerField( + verbose_name=_("Number of external PDF"), default=0) + nb_internal_office = models.IntegerField( + verbose_name=_("Number of internal PDF"), default=0) + nb_external_office = models.IntegerField( + verbose_name=_("Number of external PDF"), default=0) + is_online = models.BooleanField( + verbose_name=_("Website is online"), default=False) + redirection = models.URLField( + verbose_name=_("Redirection"), blank=True, null=True) + + class Meta: + verbose_name = _("Crawl result") + verbose_name_plural = _("Crawl results") + + def __str__(self): + return "{} - {}".format(self.crawl, self.target) + + +class CrawlLink(models.Model): + result = models.ForeignKey(CrawlResult, verbose_name=_("Result")) + link = models.URLField(verbose_name=_("Link")) + + class Meta: + verbose_name = _("Crawl link") + verbose_name_plural = _("Crawl links") + + def __str__(self): + return "{} - {}".format(self.result, self.link) + + +class CrawlRelation(models.Model): + crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl")) + source = models.ForeignKey(Target, verbose_name=_("Source"), + related_name="relation_source") + destination = models.ForeignKey(Target, verbose_name=_("Destination"), + related_name="relation_destination") + number = models.IntegerField(verbose_name=_("Number"), default=1) + + class Meta: + verbose_name = _("Crawl relation") + verbose_name_plural = _("Crawl relations") + + def __str__(self): + return "{} - {}".format(self.crawl, self.source, self.destination) |