diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 11:42:32 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-12 11:42:32 +0200 |
commit | 8377087d7572729f4e227d40c0a3f32fb5b2720a (patch) | |
tree | 879fedb15a8e6c48993140feb44eb92c4af87c10 | |
parent | 9fb538feb0989df7bcd3538ae178165cc10cc184 (diff) | |
download | Comm-on-net-8377087d7572729f4e227d40c0a3f32fb5b2720a.tar.bz2 Comm-on-net-8377087d7572729f4e227d40c0a3f32fb5b2720a.zip |
Fix missing domains
-rw-r--r-- | commcrawler/scrapy.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py index 767827a..7702ea6 100644 --- a/commcrawler/scrapy.py +++ b/commcrawler/scrapy.py @@ -296,7 +296,8 @@ def launch_match(crawl_item): "pk", "target_id", "target__url").all(): if subresult["pk"] == result["pk"]: continue - if get_domain(subresult["target__url"]) in domains: + url = subresult["target__url"] + if url and get_domain(url) in domains: rel, created = models.CrawlRelation.objects.get_or_create( crawl_id=crawl_item.pk, source_id=result["target_id"], destination_id=subresult["target_id"]) |