diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-07-31 16:33:11 +0200 |
---|---|---|
committer | Étienne Loks <etienne@peacefrogs.net> | 2019-07-31 16:33:25 +0200 |
commit | dd2dd640aa649c715a843fa431621fd955ca6767 (patch) | |
tree | 11a16e5c6c3920ebec8b2c40a426381406da6e35 /commcrawler/scrapy.py | |
parent | 6c6b1417111233b52fc55c792e9353964a60b536 (diff) | |
download | Comm-on-net-dd2dd640aa649c715a843fa431621fd955ca6767.tar.bz2 Comm-on-net-dd2dd640aa649c715a843fa431621fd955ca6767.zip |
Basic scrapy configuration
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r-- | commcrawler/scrapy.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py new file mode 100644 index 0000000..77dafe9 --- /dev/null +++ b/commcrawler/scrapy.py @@ -0,0 +1,26 @@ +import scrapy +from scrapy.crawler import CrawlerProcess + +from django.conf import settings + + +class DefaultSpider: + pass + + +def create_spider(name, urls, target=None): + return type( + name, (scrapy.Spider, DefaultSpider), + {"name": name, "start_urls": urls, "target": target} + ) + + +def crawl(crawl_item): + process = CrawlerProcess(settings=settings.SCRAPPY_SETTINGS) + for target in crawl_item.targets.all(): + process.crawl( + create_spider("Target{}".format(target.pk), + [target.url], + target) + ) + process.start() |