from urllib.parse import urldefrag import tldextract def append_to_results(results, key, value): if key not in results: results[key] = [] results[key].append(value) def clean_url(url): url, __ = urldefrag(url) # remove anchors return url def get_domain(url): if not url: return is_a_real_src = url.startswith("http") or url.startswith("/") if not is_a_real_src: return ext = tldextract.extract(url) return '{}.{}'.format(ext.domain, ext.suffix)