From dd2dd640aa649c715a843fa431621fd955ca6767 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Wed, 31 Jul 2019 16:33:11 +0200 Subject: Basic scrapy configuration --- commcrawler/scrapy.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 commcrawler/scrapy.py (limited to 'commcrawler/scrapy.py') diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py new file mode 100644 index 0000000..77dafe9 --- /dev/null +++ b/commcrawler/scrapy.py @@ -0,0 +1,26 @@ +import scrapy +from scrapy.crawler import CrawlerProcess + +from django.conf import settings + + +class DefaultSpider: + pass + + +def create_spider(name, urls, target=None): + return type( + name, (scrapy.Spider, DefaultSpider), + {"name": name, "start_urls": urls, "target": target} + ) + + +def crawl(crawl_item): + process = CrawlerProcess(settings=settings.SCRAPPY_SETTINGS) + for target in crawl_item.targets.all(): + process.crawl( + create_spider("Target{}".format(target.pk), + [target.url], + target) + ) + process.start() -- cgit v1.2.3