summaryrefslogtreecommitdiff
path: root/commcrawler/scrapy.py
blob: 77dafe93e7de5b38b7859d0f3642ef5d3f69a397 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import scrapy
from scrapy.crawler import CrawlerProcess

from django.conf import settings


class DefaultSpider:
    pass


def create_spider(name, urls, target=None):
    return type(
        name, (scrapy.Spider, DefaultSpider),
        {"name": name, "start_urls": urls, "target": target}
    )


def crawl(crawl_item):
    process = CrawlerProcess(settings=settings.SCRAPPY_SETTINGS)
    for target in crawl_item.targets.all():
        process.crawl(
            create_spider("Target{}".format(target.pk),
                          [target.url],
                          target)
        )
    process.start()