blob: 6a496694f35ff30fdf1e4f44a0070cb215e70a41 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
from urllib.parse import urldefrag
import tldextract
def append_to_results(results, key, value):
if key not in results:
results[key] = []
results[key].append(value)
def clean_url(url):
url, __ = urldefrag(url) # remove anchors
return url
def get_domain(url):
ext = tldextract.extract(url)
return '{}.{}'.format(ext.domain, ext.suffix)
|