1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
import sys
from django.core.management.base import BaseCommand
from commcrawler.models import Crawl, ExludedDomains
from commcrawler.scrapy import launch_crawl
class Command(BaseCommand):
help = 'Launch a crawl'
def add_arguments(self, parser):
parser.add_argument('--crawl-id', default=None, type=int,
dest="crawl_id")
parser.add_argument(
'--first-available', dest="first_available", action='store_true',
help="Crawl the first available crawler"
)
parser.add_argument(
'--quiet', dest='quiet', action='store_true',
help='Quiet output')
def handle(self, *args, **options):
quiet = options['quiet']
crawl_id = options['crawl_id']
first_available = options['first_available']
if crawl_id and first_available:
sys.stdout.write('--first-available and --crawl-id are '
'incompatible. Exit.\n')
return
q = Crawl.objects.filter(status="A")
if not q.count():
sys.stdout.write('No crawl waiting. Exit.\n')
return
if first_available:
q = q.order_by("-pk")
current_crawl = q.all()[0]
elif crawl_id:
q = q.filter(pk=crawl_id)
if not q.count():
sys.stdout.write('Crawl with this ID do not exist. Exit.\n')
return
current_crawl = q.all()[0]
else:
crawls = dict([(c.pk, c) for c in q.all()])
available_ids = crawls.keys()
c_id = None
while c_id not in available_ids:
sys.stdout.write('Which crawl to launch (type the number):\n')
for crawl_id, crawl in crawls.items():
sys.stdout.write('* {} - {}\n'.format(crawl_id, crawl))
sys.stdout.flush()
try:
c_id = int(input("# "))
except ValueError:
c_id = None
current_crawl = crawls[c_id]
excluded = [domain.domain for domain in ExludedDomains.objects.all()]
launch_crawl(current_crawl, excluded_domains=excluded)
|