import datetime from django.contrib.postgres.fields import JSONField from django.contrib.sites.models import _simple_domain_name_validator from django.db import models from django.utils.translation import ugettext_lazy as _ from commorganization.models import Target class ExludedDomains(models.Model): domain = models.CharField( _("Domain name"), max_length=100, validators=[_simple_domain_name_validator], unique=True) class Meta: verbose_name = _("Excluded domain") verbose_name_plural = _("Excluded domains") def __str__(self): return self.domain def natural_key(self): return self.domain class Crawl(models.Model): STATUS = ( ('C', _("Created")), ('A', _("Planned")), ('P', _("Crawl in progress")), ('M', _("Match link in progress")), ('F', _("Finished")) ) name = models.CharField(verbose_name=_("Name"), max_length=200, unique=True) created = models.DateTimeField( verbose_name=_("Creation"), default=datetime.datetime.now) started = models.DateTimeField( verbose_name=_("Started"), blank=True, null=True) crawl_ended = models.DateTimeField( verbose_name=_("Crawl end"), blank=True, null=True) ended = models.DateTimeField( verbose_name=_("Ended"), blank=True, null=True) status = models.CharField( verbose_name=_("Status"), max_length=1, choices=STATUS, default='C') targets = models.ManyToManyField(Target, blank=True) progression = models.IntegerField( verbose_name=_("Progression"), blank=True, null=True) class Meta: verbose_name = _("Crawl") verbose_name_plural = _("Crawls") ordering = ("created", "name") def __str__(self): return self.name @property def target_nb(self): return self.targets.count() @property def progress(self): todo = self.target_nb if todo == 0: return "-" if self.status == "P": done = self.results.filter(status__in=("T", "F")).count() percent = int(done / todo * 100) return "{} % ({}/{})".format(percent, done, todo) if self.status == "M": done = self.progression or 0 percent = int(done / todo * 100) return "{} % ({}/{})".format(percent, done, todo) return "-" class CrawlResult(models.Model): STATUS = ( ('P', _("In progress")), ('T', _("Time out")), ('F', _("Finished")) ) crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"), related_name="results") target = models.ForeignKey(Target, verbose_name=_("Target")) started = models.DateTimeField( verbose_name=_("Start date"), default=datetime.datetime.now) duration = models.DurationField( verbose_name=_("Duration"), blank=True, null=True) status = models.CharField( verbose_name=_("Status"), max_length=1, choices=STATUS, default='P') crawl_result = JSONField(verbose_name=_("Crawl result"), default=list) nb_external_link = models.IntegerField( verbose_name=_("External links"), default=0) nb_internal_link = models.IntegerField( verbose_name=_("Internal links"), default=0) nb_images = models.IntegerField( verbose_name=_("Images"), default=0) nb_facebook = models.IntegerField( verbose_name=_("Facebook links"), default=0) nb_twitter = models.IntegerField( verbose_name=_("Twitter links"), default=0) nb_instagram = models.IntegerField( verbose_name=_("Instagram links"), default=0) nb_youtube = models.IntegerField( verbose_name=_("Youtube links"), default=0) nb_dailymotion = models.IntegerField( verbose_name=_("Dailymotion links"), default=0) nb_vimeo = models.IntegerField( verbose_name=_("Vimeo links"), default=0) nb_video = models.IntegerField( verbose_name=_("Internal videos"), default=0) nb_audio = models.IntegerField( verbose_name=_("Internal audios"), default=0) nb_internal_pdf = models.IntegerField( verbose_name=_("Internal PDF"), default=0) nb_external_pdf = models.IntegerField( verbose_name=_("External PDF"), default=0) nb_internal_office = models.IntegerField( verbose_name=_("Internal office documents"), default=0) nb_external_office = models.IntegerField( verbose_name=_("External office documents"), default=0) is_online = models.BooleanField( verbose_name=_("Website is online"), default=False) bad_ssl = models.BooleanField( verbose_name=_("Bad SSL certificate"), default=False) redirection = models.URLField( verbose_name=_("Redirection"), blank=True, null=True) class Meta: verbose_name = _("Crawl result") verbose_name_plural = _("Crawl results") unique_together = ("crawl", "target") def __str__(self): return "{} - {}".format(self.crawl, self.target) class CrawlRelation(models.Model): crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl")) source = models.ForeignKey(Target, verbose_name=_("Source"), related_name="relation_source") destination = models.ForeignKey(Target, verbose_name=_("Destination"), related_name="relation_destination") number = models.IntegerField(verbose_name=_("Number"), default=1) class Meta: verbose_name = _("Crawl relation") verbose_name_plural = _("Crawl relations") unique_together = ("crawl", "source", "destination") def __str__(self): return "{} - {}".format(self.crawl, self.source, self.destination)