summaryrefslogtreecommitdiff
path: root/commcrawler/models.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-07 03:53:32 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-07 03:55:49 +0200
commitd737e04553f464966f54739ba37f9f06dab44586 (patch)
tree2b68891ff1629b55b820312fdd3a17ce91ac5722 /commcrawler/models.py
parent9fbd94f70d4b819b45eef720425242c0d69b032d (diff)
downloadComm-on-net-d737e04553f464966f54739ba37f9f06dab44586.tar.bz2
Comm-on-net-d737e04553f464966f54739ba37f9f06dab44586.zip
Save crawling results in the database
Diffstat (limited to 'commcrawler/models.py')
-rw-r--r--commcrawler/models.py22
1 files changed, 20 insertions, 2 deletions
diff --git a/commcrawler/models.py b/commcrawler/models.py
index f3e3246..9a98b89 100644
--- a/commcrawler/models.py
+++ b/commcrawler/models.py
@@ -1,5 +1,7 @@
import datetime
+from django.contrib.postgres.fields import JSONField
+from django.contrib.sites.models import _simple_domain_name_validator
from django.db import models
from django.utils.translation import ugettext_lazy as _
@@ -7,7 +9,9 @@ from commorganization.models import Target
class ExludedDomains(models.Model):
- url = models.URLField(verbose_name=_("URL"), unique=True)
+ domain = models.CharField(
+ _("Domain name"), max_length=100,
+ validators=[_simple_domain_name_validator], unique=True)
class Meta:
verbose_name = _("Excluded domain")
@@ -22,7 +26,7 @@ class Crawl(models.Model):
('C', _("Created")), ('P', _("In progress")),
('F', _("Finished"))
)
- name = models.CharField(verbose_name=_("Name"), max_length=200)
+ name = models.CharField(verbose_name=_("Name"), max_length=200, unique=True)
created = models.DateTimeField(
verbose_name=_("Creation date"), default=datetime.datetime.now)
started = models.DateTimeField(
@@ -48,8 +52,20 @@ class Crawl(models.Model):
class CrawlResult(models.Model):
+ STATUS = (
+ ('P', _("In progress")),
+ ('F', _("Finished"))
+ )
crawl = models.ForeignKey(Crawl, verbose_name=_("Crawl"))
target = models.ForeignKey(Target, verbose_name=_("Target"))
+ started = models.DateTimeField(
+ verbose_name=_("Start date"), default=datetime.datetime.now)
+ duration = models.DurationField(
+ verbose_name=_("Duration"), blank=True, null=True)
+ status = models.CharField(
+ verbose_name=_("Status"),
+ max_length=1, choices=STATUS, default='P')
+ crawl_result = JSONField(verbose_name=_("Crawl result"), default=list)
nb_external_link = models.IntegerField(
verbose_name=_("Number of external links"), default=0)
nb_internal_link = models.IntegerField(
@@ -88,6 +104,7 @@ class CrawlResult(models.Model):
class Meta:
verbose_name = _("Crawl result")
verbose_name_plural = _("Crawl results")
+ unique_together = ("crawl", "target")
def __str__(self):
return "{} - {}".format(self.crawl, self.target)
@@ -116,6 +133,7 @@ class CrawlRelation(models.Model):
class Meta:
verbose_name = _("Crawl relation")
verbose_name_plural = _("Crawl relations")
+ unique_together = ("crawl", "source", "destination")
def __str__(self):
return "{} - {}".format(self.crawl, self.source, self.destination)