from urllib.parse import urldefrag import tldextract def append_to_results(results, key, value): if key not in results: results[key] = [] results[key].append(value) def clean_url(url): url, __ = urldefrag(url) # remove anchors return url def get_domain(url): ext = tldextract.extract(url) return '{}.{}'.format(ext.domain, ext.suffix)