From 108b5514fe795e3bbf4c76245047f5ea054c3d20 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Wed, 31 Jul 2019 17:56:53 +0200 Subject: Basic crawling --- commcrawler/management/commands/__init__.py | 0 commcrawler/management/commands/launch_crawl.py | 39 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 commcrawler/management/commands/__init__.py create mode 100644 commcrawler/management/commands/launch_crawl.py (limited to 'commcrawler/management/commands') diff --git a/commcrawler/management/commands/__init__.py b/commcrawler/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/commcrawler/management/commands/launch_crawl.py b/commcrawler/management/commands/launch_crawl.py new file mode 100644 index 0000000..050a54d --- /dev/null +++ b/commcrawler/management/commands/launch_crawl.py @@ -0,0 +1,39 @@ +import csv +import sys + +from django.core.management.base import BaseCommand + +from commcrawler.models import Crawl +from commcrawler.scrapy import launch_crawl + + +class Command(BaseCommand): + help = 'Launch a crawl' + + def add_arguments(self, parser): + parser.add_argument( + '--quiet', dest='quiet', action='store_true', + help='Quiet output') + + def handle(self, *args, **options): + quiet = options['quiet'] + q = Crawl.objects.filter(status="C") + if not q.count(): + sys.stdout.write('No crawl waiting. Exit.\n') + return + + crawls = dict([(c.pk, c) for c in q.all()]) + available_ids = crawls.keys() + c_id = None + while c_id not in available_ids: + sys.stdout.write('Which crawl to launch (type the number):\n') + for crawl_id, crawl in crawls.items(): + sys.stdout.write('* {} - {}\n'.format(crawl_id, crawl)) + sys.stdout.flush() + try: + c_id = int(input("# ")) + except ValueError: + c_id = None + current_crawl = crawls[c_id] + launch_crawl(current_crawl) + -- cgit v1.2.3