summaryrefslogtreecommitdiff
path: root/commcrawler/management/commands/launch_crawl.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-05 12:52:31 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-08-05 12:52:31 +0200
commit72dfec0c3532941a46f77b3c0a6a49e16e6a2864 (patch)
tree8fcb33e87b357c796ca8f2e3325298272900745d /commcrawler/management/commands/launch_crawl.py
parentc6b3188e49049cf689658654a1458a3276304782 (diff)
downloadComm-on-net-72dfec0c3532941a46f77b3c0a6a49e16e6a2864.tar.bz2
Comm-on-net-72dfec0c3532941a46f77b3c0a6a49e16e6a2864.zip
Manage excluded domains
Diffstat (limited to 'commcrawler/management/commands/launch_crawl.py')
-rw-r--r--commcrawler/management/commands/launch_crawl.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/commcrawler/management/commands/launch_crawl.py b/commcrawler/management/commands/launch_crawl.py
index 92c3081..1248eeb 100644
--- a/commcrawler/management/commands/launch_crawl.py
+++ b/commcrawler/management/commands/launch_crawl.py
@@ -3,7 +3,7 @@ import sys
from django.core.management.base import BaseCommand
-from commcrawler.models import Crawl
+from commcrawler.models import Crawl, ExludedDomains
from commcrawler.scrapy import launch_crawl
@@ -57,5 +57,7 @@ class Command(BaseCommand):
except ValueError:
c_id = None
current_crawl = crawls[c_id]
- launch_crawl(current_crawl)
+ excluded = [domain.split("://")[1] for domain in
+ ExludedDomains.objects.all()]
+ launch_crawl(current_crawl, excluded_domains=excluded)