1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
import datetime
from django.db import models
from django.utils.translation import ugettext_lazy as _
from commorganization.models import Target
class ExludedDomains(models.Model):
url = models.URLField(verbose_name=_("URL"), unique=True)
class Meta:
verbose_name = _("Excluded domain")
verbose_name_plural = _("Excluded domains")
def __str__(self):
return self.url
class Crawl(models.Model):
STATUS = (
('C', _("Created")), ('P', _("In progress")),
('F', _("Finished"))
)
name = models.CharField(verbose_name=_("Name"), max_length=200)
created = models.DateTimeField(
verbose_name=_("Creation date"), default=datetime.datetime.now)
started = models.DateTimeField(
verbose_name=_("Start date"), blank=True, null=True)
ended = models.DateTimeField(
verbose_name=_("End date"), blank=True, null=True)
status = models.CharField(
verbose_name=_("Status"),
max_length=1, choices=STATUS, default='C')
targets = models.ManyToManyField(Target, blank=True)
class Meta:
verbose_name = _("Crawl")
verbose_name_plural = _("Crawls")
ordering = ("created", "name")
def __str__(self):
return self.name
@property
def target_nb(self):
return self.targets.count()
class CrawlResult(models.Model):
crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
target = models.ForeignKey(Target, verbose_name=_("Target"))
nb_external_link = models.IntegerField(
verbose_name=_("Number of external links"), default=0)
nb_internal_link = models.IntegerField(
verbose_name=_("Number of internal links"), default=0)
nb_images = models.IntegerField(
verbose_name=_("Number of images"), default=0)
nb_facebook = models.IntegerField(
verbose_name=_("Number of Facebook links"), default=0)
nb_twitter = models.IntegerField(
verbose_name=_("Number of Twitter links"), default=0)
nb_instagram = models.IntegerField(
verbose_name=_("Number of Instagram links"), default=0)
nb_youtube = models.IntegerField(
verbose_name=_("Number of Youtube links"), default=0)
nb_dailymotion = models.IntegerField(
verbose_name=_("Number of Dailymotion links"), default=0)
nb_vimeo = models.IntegerField(
verbose_name=_("Number of Vimeo links"), default=0)
nb_video = models.IntegerField(
verbose_name=_("Number of videos"), default=0)
nb_audio = models.IntegerField(
verbose_name=_("Number of audios"), default=0)
nb_internal_pdf = models.IntegerField(
verbose_name=_("Number of internal PDF"), default=0)
nb_external_pdf = models.IntegerField(
verbose_name=_("Number of external PDF"), default=0)
nb_internal_office = models.IntegerField(
verbose_name=_("Number of internal office documents"), default=0)
nb_external_office = models.IntegerField(
verbose_name=_("Number of external office documents"), default=0)
is_online = models.BooleanField(
verbose_name=_("Website is online"), default=False)
redirection = models.URLField(
verbose_name=_("Redirection"), blank=True, null=True)
class Meta:
verbose_name = _("Crawl result")
verbose_name_plural = _("Crawl results")
def __str__(self):
return "{} - {}".format(self.crawl, self.target)
class CrawlLink(models.Model):
result = models.ForeignKey(CrawlResult, verbose_name=_("Result"))
link = models.URLField(verbose_name=_("Link"))
class Meta:
verbose_name = _("Crawl link")
verbose_name_plural = _("Crawl links")
def __str__(self):
return "{} - {}".format(self.result, self.link)
class CrawlRelation(models.Model):
crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
source = models.ForeignKey(Target, verbose_name=_("Source"),
related_name="relation_source")
destination = models.ForeignKey(Target, verbose_name=_("Destination"),
related_name="relation_destination")
number = models.IntegerField(verbose_name=_("Number"), default=1)
class Meta:
verbose_name = _("Crawl relation")
verbose_name_plural = _("Crawl relations")
def __str__(self):
return "{} - {}".format(self.crawl, self.source, self.destination)
|