diff options
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py new file mode 100644 index 0000000..77dafe9 --- /dev/null +++ b/commcrawler/scrapy.py @@ -0,0 +1,26 @@ +import scrapy +from scrapy.crawler import CrawlerProcess + +from django.conf import settings + + +class DefaultSpider: + pass + + +def create_spider(name, urls, target=None): + return type( + name, (scrapy.Spider, DefaultSpider), + {"name": name, "start_urls": urls, "target": target} + ) + + +def crawl(crawl_item): + process = CrawlerProcess(settings=settings.SCRAPPY_SETTINGS) + for target in crawl_item.targets.all(): + process.crawl( + create_spider("Target{}".format(target.pk), + [target.url], + target) + ) + process.start() |