summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-10 12:28:09 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-10 12:28:09 +0200
commit9fb538feb0989df7bcd3538ae178165cc10cc184 (patch)
tree78c4372bf149e7a086448c0a2ab752ab6e890d0e
parent7436b1bac461a6bf71f4329b49d26ee5740ae3ee (diff)
downloadComm-on-net-9fb538feb0989df7bcd3538ae178165cc10cc184.tar.bz2
Comm-on-net-9fb538feb0989df7bcd3538ae178165cc10cc184.zip
Better management of timeout in crawl...
-rw-r--r--commcrawler/locale/fr/LC_MESSAGES/django.po92
-rw-r--r--commcrawler/scrapy.py8
-rw-r--r--commorganization/locale/fr/LC_MESSAGES/django.po2
3 files changed, 55 insertions, 47 deletions
diff --git a/commcrawler/locale/fr/LC_MESSAGES/django.po b/commcrawler/locale/fr/LC_MESSAGES/django.po
index 081304b..2145e0d 100644
--- a/commcrawler/locale/fr/LC_MESSAGES/django.po
+++ b/commcrawler/locale/fr/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2019-08-09 15:40+0200\n"
+"POT-Creation-Date: 2019-08-09 20:06+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@@ -12,7 +12,7 @@ msgstr ""
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
-#: admin.py:58 models.py:95 models.py:134
+#: admin.py:65 models.py:98 models.py:137
msgid "Crawl result"
msgstr "Résultat d'indexation"
@@ -41,166 +41,174 @@ msgid "Planned"
msgstr "Planifié"
#: models.py:31
+msgid "Pre-crawl in progress"
+msgstr "Pré-indexation en cours"
+
+#: models.py:32
msgid "Crawl in progress"
msgstr "Indexation en cours"
-#: models.py:32
+#: models.py:33
msgid "Match link in progress"
msgstr "Mise en correspondance des liens en cours"
-#: models.py:33 models.py:83
+#: models.py:34 models.py:86
msgid "Finished"
msgstr "Fini"
-#: models.py:35
+#: models.py:36
msgid "Name"
msgstr "Nom"
-#: models.py:37
+#: models.py:38
msgid "Creation"
msgstr "Création"
-#: models.py:39
+#: models.py:40
msgid "Started"
msgstr "Début"
-#: models.py:41
+#: models.py:42
+msgid "Pre-crawl end"
+msgstr "Fin de la pré-indexation"
+
+#: models.py:44
msgid "Crawl end"
msgstr "Fin d'indexation"
-#: models.py:43
+#: models.py:46
msgid "Ended"
msgstr "Fin"
-#: models.py:45 models.py:93
+#: models.py:48 models.py:96
msgid "Status"
msgstr "État"
-#: models.py:49
+#: models.py:52
msgid "Progression"
msgstr "Progression"
-#: models.py:52 models.py:85 models.py:143
+#: models.py:55 models.py:88 models.py:155
msgid "Crawl"
msgstr "Session d'indexation"
-#: models.py:53
+#: models.py:56
msgid "Crawls"
msgstr "Session d'indexation"
-#: models.py:81
+#: models.py:84
msgid "In progress"
msgstr "En cours"
-#: models.py:82
+#: models.py:85
msgid "Time out"
msgstr "Délai expiré"
-#: models.py:87
+#: models.py:90
msgid "Target"
msgstr "Cible"
-#: models.py:89
+#: models.py:92
msgid "Start date"
msgstr "Date de début"
-#: models.py:91
+#: models.py:94
msgid "Duration"
msgstr "Durée"
-#: models.py:97
+#: models.py:100
msgid "External links"
msgstr "Liens externes"
-#: models.py:99
+#: models.py:102
msgid "Internal links"
msgstr "Liens internes"
-#: models.py:101
+#: models.py:104
msgid "Images"
msgstr "Images"
-#: models.py:103
+#: models.py:106
msgid "Facebook links"
msgstr "Liens Facebook"
-#: models.py:105
+#: models.py:108
msgid "Twitter links"
msgstr "Liens Twitter"
-#: models.py:107
+#: models.py:110
msgid "Instagram links"
msgstr "Liens Instagram"
-#: models.py:109
+#: models.py:112
msgid "Youtube links"
msgstr "Liens Youtube"
-#: models.py:111
+#: models.py:114
msgid "Dailymotion links"
msgstr "Liens Dailymotion"
-#: models.py:113
+#: models.py:116
msgid "Vimeo links"
msgstr "Liens Vimeo"
-#: models.py:115
+#: models.py:118
msgid "Internal videos"
msgstr "Vidéos internes"
-#: models.py:117
+#: models.py:120
msgid "Internal audios"
msgstr "Audios internes"
-#: models.py:119
+#: models.py:122
msgid "Internal PDF"
msgstr "PDF internes"
-#: models.py:121
+#: models.py:124
msgid "External PDF"
msgstr "PDF externes"
-#: models.py:123
+#: models.py:126
msgid "Internal office documents"
msgstr "Document office internes"
-#: models.py:125
+#: models.py:128
msgid "External office documents"
msgstr "Document office externes"
-#: models.py:127
+#: models.py:130
msgid "Website is online"
msgstr "Site en ligne"
-#: models.py:129
+#: models.py:132
msgid "Bad SSL certificate"
msgstr "Mauvais certificat SSL"
-#: models.py:131
+#: models.py:134
msgid "Redirection"
msgstr "Redirection"
-#: models.py:135
+#: models.py:138
msgid "Crawl results"
msgstr "Résultats d'indexation"
-#: models.py:144
+#: models.py:156
msgid "Source"
msgstr "Source"
-#: models.py:146
+#: models.py:158
msgid "Destination"
msgstr "Destination"
-#: models.py:148
+#: models.py:160
msgid "Number"
msgstr "Nombre"
-#: models.py:151
+#: models.py:163
msgid "Crawl relation"
msgstr "Indexation - Relation"
-#: models.py:152
+#: models.py:164
msgid "Crawl relations"
msgstr "Indexations - Relations"
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index bdd28c3..767827a 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -5,7 +5,7 @@ import requests
import scrapy
from scrapy.crawler import CrawlerProcess
-from scrapy.exceptions import NotSupported
+from scrapy.exceptions import NotSupported, CloseSpider
from scrapy.linkextractors import LinkExtractor
from django.conf import settings
@@ -129,15 +129,15 @@ class DefaultSpider:
pk=self.crawl_result.pk)
result.status = "T"
result.save()
- self.is_timeout = True
- return True
+ self.is_timeout = True
+ raise CloseSpider('timeout')
def parse(self, response):
result = {
"url": response.url,
}
if self.is_timeout or self.timeout():
- return []
+ raise CloseSpider('timeout')
for domain in self.excluded_domains:
if domain in response.url:
result["is_online"] = False
diff --git a/commorganization/locale/fr/LC_MESSAGES/django.po b/commorganization/locale/fr/LC_MESSAGES/django.po
index e527790..c12a5b0 100644
--- a/commorganization/locale/fr/LC_MESSAGES/django.po
+++ b/commorganization/locale/fr/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2019-08-09 15:40+0200\n"
+"POT-Creation-Date: 2019-08-09 20:06+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"