summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-07-31 16:33:11 +0200
committerÉtienne Loks <etienne@peacefrogs.net>2019-07-31 16:33:25 +0200
commitdd2dd640aa649c715a843fa431621fd955ca6767 (patch)
tree11a16e5c6c3920ebec8b2c40a426381406da6e35 /commcrawler/scrapy.py
parent6c6b1417111233b52fc55c792e9353964a60b536 (diff)
downloadComm-on-net-dd2dd640aa649c715a843fa431621fd955ca6767.tar.bz2
Comm-on-net-dd2dd640aa649c715a843fa431621fd955ca6767.zip
Basic scrapy configuration
Diffstat (limited to 'commcrawler/scrapy.py')
-rw-r--r--commcrawler/scrapy.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/commcrawler/scrapy.py b/commcrawler/scrapy.py
new file mode 100644
index 0000000..77dafe9
--- /dev/null
+++ b/commcrawler/scrapy.py
@@ -0,0 +1,26 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+from django.conf import settings
+
+
+class DefaultSpider:
+ pass
+
+
+def create_spider(name, urls, target=None):
+ return type(
+ name, (scrapy.Spider, DefaultSpider),
+ {"name": name, "start_urls": urls, "target": target}
+ )
+
+
+def crawl(crawl_item):
+ process = CrawlerProcess(settings=settings.SCRAPPY_SETTINGS)
+ for target in crawl_item.targets.all():
+ process.crawl(
+ create_spider("Target{}".format(target.pk),
+ [target.url],
+ target)
+ )
+ process.start()