summaryrefslogtreecommitdiff
path: root/commcrawler/models.py
blob: 0050ad83549720542e0e6551f7ab9a7940f63ab6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import datetime

from django.db import models
from django.utils.translation import ugettext_lazy as _

from commorganization.models import Target


class Crawl(models.Model):
    STATUS = (
        ('C', _("Created")), ('P', _("In progress")),
        ('F', _("Finished"))
    )
    name = models.CharField(verbose_name=_("Name"), max_length=200)
    created = models.DateTimeField(
        verbose_name=_("Creation date"), default=datetime.datetime.now)
    started = models.DateTimeField(
        verbose_name=_("Start date"), blank=True, null=True)
    ended = models.DateTimeField(
        verbose_name=_("End date"), blank=True, null=True)
    status = models.CharField(
        verbose_name=_("Status"),
        max_length=1, choices=STATUS, default='C')
    targets = models.ManyToManyField(Target, blank=True)

    class Meta:
        verbose_name = _("Crawl")
        verbose_name_plural = _("Crawls")
        ordering = ("created", "name")

    def __str__(self):
        return self.name

    @property
    def target_nb(self):
        return self.targets.count()


class CrawlResult(models.Model):
    crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
    target = models.ForeignKey(Target, verbose_name=_("Target"))
    nb_external_link = models.IntegerField(
        verbose_name=_("Number of external links"), default=0)
    nb_internal_link = models.IntegerField(
        verbose_name=_("Number of internal links"), default=0)
    nb_images = models.IntegerField(
        verbose_name=_("Number of images"), default=0)
    nb_facebook = models.IntegerField(
        verbose_name=_("Number of Facebook links"), default=0)
    nb_twitter = models.IntegerField(
        verbose_name=_("Number of Twitter links"), default=0)
    nb_instagram = models.IntegerField(
        verbose_name=_("Number of Instagram links"), default=0)
    nb_youtube = models.IntegerField(
        verbose_name=_("Number of Youtube links"), default=0)
    nb_dailymotion = models.IntegerField(
        verbose_name=_("Number of Dailymotion links"), default=0)
    nb_vimeo = models.IntegerField(
        verbose_name=_("Number of Vimeo links"), default=0)
    nb_video = models.IntegerField(
        verbose_name=_("Number of videos"), default=0)
    nb_audio = models.IntegerField(
        verbose_name=_("Number of audios"), default=0)
    nb_internal_pdf = models.IntegerField(
        verbose_name=_("Number of internal PDF"), default=0)
    nb_external_pdf = models.IntegerField(
        verbose_name=_("Number of external PDF"), default=0)
    nb_internal_office = models.IntegerField(
        verbose_name=_("Number of internal office documents"), default=0)
    nb_external_office = models.IntegerField(
        verbose_name=_("Number of external office documents"), default=0)
    is_online = models.BooleanField(
        verbose_name=_("Website is online"), default=False)
    redirection = models.URLField(
        verbose_name=_("Redirection"), blank=True, null=True)

    class Meta:
        verbose_name = _("Crawl result")
        verbose_name_plural = _("Crawl results")

    def __str__(self):
        return "{} - {}".format(self.crawl, self.target)


class CrawlLink(models.Model):
    result = models.ForeignKey(CrawlResult, verbose_name=_("Result"))
    link = models.URLField(verbose_name=_("Link"))

    class Meta:
        verbose_name = _("Crawl link")
        verbose_name_plural = _("Crawl links")

    def __str__(self):
        return "{} - {}".format(self.result, self.link)


class CrawlRelation(models.Model):
    crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
    source = models.ForeignKey(Target, verbose_name=_("Source"),
                               related_name="relation_source")
    destination = models.ForeignKey(Target, verbose_name=_("Destination"),
                                    related_name="relation_destination")
    number = models.IntegerField(verbose_name=_("Number"), default=1)

    class Meta:
        verbose_name = _("Crawl relation")
        verbose_name_plural = _("Crawl relations")

    def __str__(self):
        return "{} - {}".format(self.crawl, self.source, self.destination)