diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-13 17:21:51 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-13 17:21:51 +0200 |
commit | 01d72fa7c94359049e2a7beb068167cb7f047805 (patch) | |
tree | 7e4eff796c75d8429c5a4d73f4628f7d42c47dc8 /commcrawler/utils.py | |
parent | eb7a9f8c1ed76858c0963a9a7fb4bb896c1a7857 (diff) | |
download | Comm-on-net-01d72fa7c94359049e2a7beb068167cb7f047805.tar.bz2 Comm-on-net-01d72fa7c94359049e2a7beb068167cb7f047805.zip |
Fix domain check
Diffstat (limited to 'commcrawler/utils.py')
-rw-r--r-- | commcrawler/utils.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/commcrawler/utils.py b/commcrawler/utils.py index 6a49669..c1051dd 100644 --- a/commcrawler/utils.py +++ b/commcrawler/utils.py @@ -14,5 +14,10 @@ def clean_url(url): def get_domain(url): + if not url: + return + is_a_real_src = url.startswith("http") or url.startswith("/") + if not is_a_real_src: + return ext = tldextract.extract(url) return '{}.{}'.format(ext.domain, ext.suffix) |