summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-12 11:42:32 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-12 11:42:32 +0200
commit8377087d7572729f4e227d40c0a3f32fb5b2720a (patch)
tree879fedb15a8e6c48993140feb44eb92c4af87c10 /commcrawler/scrapy.py
parent9fb538feb0989df7bcd3538ae178165cc10cc184 (diff)
downloadComm-on-net-8377087d7572729f4e227d40c0a3f32fb5b2720a.tar.bz2
Comm-on-net-8377087d7572729f4e227d40c0a3f32fb5b2720a.zip
Fix missing domains
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
index 767827a..7702ea6 100644
--- a/commcrawler/scrapy.py
+++ b/commcrawler/scrapy.py
@@ -296,7 +296,8 @@ def launch_match(crawl_item):
"pk", "target_id", "target__url").all():
if subresult["pk"] == result["pk"]:
continue
- if get_domain(subresult["target__url"]) in domains:
+ url = subresult["target__url"]
+ if url and get_domain(url) in domains:
rel, created = models.CrawlRelation.objects.get_or_create(
crawl_id=crawl_item.pk, source_id=result["target_id"],
destination_id=subresult["target_id"])