summaryrefslogtreecommitdiff
path: root/commcrawler/models.py
blob: f3e3246c9b70ab1057218ceb71148d52c4f5fd93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import datetime

from django.db import models
from django.utils.translation import ugettext_lazy as _

from commorganization.models import Target


class ExludedDomains(models.Model):
    url = models.URLField(verbose_name=_("URL"), unique=True)

    class Meta:
        verbose_name = _("Excluded domain")
        verbose_name_plural = _("Excluded domains")

    def __str__(self):
        return self.url


class Crawl(models.Model):
    STATUS = (
        ('C', _("Created")), ('P', _("In progress")),
        ('F', _("Finished"))
    )
    name = models.CharField(verbose_name=_("Name"), max_length=200)
    created = models.DateTimeField(
        verbose_name=_("Creation date"), default=datetime.datetime.now)
    started = models.DateTimeField(
        verbose_name=_("Start date"), blank=True, null=True)
    ended = models.DateTimeField(
        verbose_name=_("End date"), blank=True, null=True)
    status = models.CharField(
        verbose_name=_("Status"),
        max_length=1, choices=STATUS, default='C')
    targets = models.ManyToManyField(Target, blank=True)

    class Meta:
        verbose_name = _("Crawl")
        verbose_name_plural = _("Crawls")
        ordering = ("created", "name")

    def __str__(self):
        return self.name

    @property
    def target_nb(self):
        return self.targets.count()


class CrawlResult(models.Model):
    crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
    target = models.ForeignKey(Target, verbose_name=_("Target"))
    nb_external_link = models.IntegerField(
        verbose_name=_("Number of external links"), default=0)
    nb_internal_link = models.IntegerField(
        verbose_name=_("Number of internal links"), default=0)
    nb_images = models.IntegerField(
        verbose_name=_("Number of images"), default=0)
    nb_facebook = models.IntegerField(
        verbose_name=_("Number of Facebook links"), default=0)
    nb_twitter = models.IntegerField(
        verbose_name=_("Number of Twitter links"), default=0)
    nb_instagram = models.IntegerField(
        verbose_name=_("Number of Instagram links"), default=0)
    nb_youtube = models.IntegerField(
        verbose_name=_("Number of Youtube links"), default=0)
    nb_dailymotion = models.IntegerField(
        verbose_name=_("Number of Dailymotion links"), default=0)
    nb_vimeo = models.IntegerField(
        verbose_name=_("Number of Vimeo links"), default=0)
    nb_video = models.IntegerField(
        verbose_name=_("Number of videos"), default=0)
    nb_audio = models.IntegerField(
        verbose_name=_("Number of audios"), default=0)
    nb_internal_pdf = models.IntegerField(
        verbose_name=_("Number of internal PDF"), default=0)
    nb_external_pdf = models.IntegerField(
        verbose_name=_("Number of external PDF"), default=0)
    nb_internal_office = models.IntegerField(
        verbose_name=_("Number of internal office documents"), default=0)
    nb_external_office = models.IntegerField(
        verbose_name=_("Number of external office documents"), default=0)
    is_online = models.BooleanField(
        verbose_name=_("Website is online"), default=False)
    redirection = models.URLField(
        verbose_name=_("Redirection"), blank=True, null=True)

    class Meta:
        verbose_name = _("Crawl result")
        verbose_name_plural = _("Crawl results")

    def __str__(self):
        return "{} - {}".format(self.crawl, self.target)


class CrawlLink(models.Model):
    result = models.ForeignKey(CrawlResult, verbose_name=_("Result"))
    link = models.URLField(verbose_name=_("Link"))

    class Meta:
        verbose_name = _("Crawl link")
        verbose_name_plural = _("Crawl links")

    def __str__(self):
        return "{} - {}".format(self.result, self.link)


class CrawlRelation(models.Model):
    crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
    source = models.ForeignKey(Target, verbose_name=_("Source"),
                               related_name="relation_source")
    destination = models.ForeignKey(Target, verbose_name=_("Destination"),
                                    related_name="relation_destination")
    number = models.IntegerField(verbose_name=_("Number"), default=1)

    class Meta:
        verbose_name = _("Crawl relation")
        verbose_name_plural = _("Crawl relations")

    def __str__(self):
        return "{} - {}".format(self.crawl, self.source, self.destination)