summaryrefslogtreecommitdiff
path: root/commcrawler/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/utils.py')
-rw-r--r--commcrawler/utils.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/commcrawler/utils.py b/commcrawler/utils.py
index 6a49669..c1051dd 100644
--- a/commcrawler/utils.py
+++ b/commcrawler/utils.py
@@ -14,5 +14,10 @@ def clean_url(url):
def get_domain(url):
+ if not url:
+ return
+ is_a_real_src = url.startswith("http") or url.startswith("/")
+ if not is_a_real_src:
+ return
ext = tldextract.extract(url)
return '{}.{}'.format(ext.domain, ext.suffix)