diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-05 12:52:31 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-08-05 12:52:31 +0200 |
commit | 72dfec0c3532941a46f77b3c0a6a49e16e6a2864 (patch) | |
tree | 8fcb33e87b357c796ca8f2e3325298272900745d /commcrawler/management/commands/launch_crawl.py | |
parent | c6b3188e49049cf689658654a1458a3276304782 (diff) | |
download | Comm-on-net-72dfec0c3532941a46f77b3c0a6a49e16e6a2864.tar.bz2 Comm-on-net-72dfec0c3532941a46f77b3c0a6a49e16e6a2864.zip |
Manage excluded domains
Diffstat (limited to 'commcrawler/management/commands/launch_crawl.py')
-rw-r--r-- | commcrawler/management/commands/launch_crawl.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/commcrawler/management/commands/launch_crawl.py b/commcrawler/management/commands/launch_crawl.py index 92c3081..1248eeb 100644 --- a/commcrawler/management/commands/launch_crawl.py +++ b/commcrawler/management/commands/launch_crawl.py @@ -3,7 +3,7 @@ import sys from django.core.management.base import BaseCommand -from commcrawler.models import Crawl +from commcrawler.models import Crawl, ExludedDomains from commcrawler.scrapy import launch_crawl @@ -57,5 +57,7 @@ class Command(BaseCommand): except ValueError: c_id = None current_crawl = crawls[c_id] - launch_crawl(current_crawl) + excluded = [domain.split("://")[1] for domain in + ExludedDomains.objects.all()] + launch_crawl(current_crawl, excluded_domains=excluded) |