diff options
Diffstat (limited to 'commcrawler/management')
-rw-r--r-- | commcrawler/management/__init__.py | 0 | ||||
-rw-r--r-- | commcrawler/management/commands/__init__.py | 0 | ||||
-rw-r--r-- | commcrawler/management/commands/import_csv_autres.py | 107 | ||||
-rw-r--r-- | commcrawler/management/commands/import_csv_communes.py | 167 |
4 files changed, 274 insertions, 0 deletions
diff --git a/commcrawler/management/__init__.py b/commcrawler/management/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/commcrawler/management/__init__.py diff --git a/commcrawler/management/commands/__init__.py b/commcrawler/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/commcrawler/management/commands/__init__.py diff --git a/commcrawler/management/commands/import_csv_autres.py b/commcrawler/management/commands/import_csv_autres.py new file mode 100644 index 0000000..2dd177c --- /dev/null +++ b/commcrawler/management/commands/import_csv_autres.py @@ -0,0 +1,107 @@ +import csv +import sys + +from django.core.management.base import BaseCommand + +from commcrawler.models import Organization, OrganizationType, Target + +header_len = 1 +expected_header = [ + 'Secteur', 'Type', 'Nom', 'URL', 'Localisation siège (département)', + 'Localisation siège (code postal)', 'Localisation siège (commune)'] + + +class Command(BaseCommand): + help = 'Import depuis CSV communes' + + def add_arguments(self, parser): + parser.add_argument('csv_file') + parser.add_argument( + '--quiet', dest='quiet', action='store_true', + help='Quiet output') + + def handle(self, *args, **options): + csv_file = options['csv_file'] + quiet = options['quiet'] + if not quiet: + sys.stdout.write('* opening file {}\n'.format(csv_file)) + nb_created = 0 + nb_organization_created = 0 + nb_tt_created = 0 + with open(csv_file, 'r') as csvfile: + reader = csv.reader(csvfile) + for idx, row in enumerate(reader): + if idx < header_len: + if not idx: + if expected_header != row: + sys.stdout.write('ERROR: expected header differs ' + 'from the one provided\n') + sys.stdout.write('* expected header is:\n') + sys.stdout.write(str(expected_header)) + sys.stdout.write('\n* header provided is:\n') + sys.stdout.write(str(row) + "\n") + return + continue + sec, sec_tpe, name, site, address = row[0:5] + address = address.strip() + if " " in address: + organization_name = " ".join(address.split(" ")[1:]) + else: + organization_name = address + + if not quiet: + sys.stdout.write('-> processing line %d.\r' % (idx + 1)) + sys.stdout.flush() + + tpe, c = OrganizationType.objects.get_or_create( + name=sec.strip(), + parent=None + ) + if c: + nb_tt_created += 1 + tpe, c = OrganizationType.objects.get_or_create( + name=sec_tpe.strip(), + parent=tpe + ) + if c: + nb_tt_created += 1 + + organization_values = { + "organization_type": tpe, + "name": "{} - {}".format(name.strip(), organization_name) + } + + default = dict() + default["address"] = address + organization_values["defaults"] = default + + organization, c = Organization.objects.get_or_create( + **organization_values) + if c: + nb_organization_created += 1 + else: + for k in default.keys(): + setattr(organization, k, default[k]) + organization.save() + + site = site.strip() + if site == "0" or "." not in site: + site = None + elif not site.startswith('http'): + site = "http://" + site + values = { + "name": name.strip(), + "organization": organization, + "url": site, + } + target, created = Target.objects.get_or_create(**values) + if created: + nb_created += 1 + if not quiet: + sys.stdout.write( + '\n* {} organization types created.\n'.format(nb_tt_created)) + sys.stdout.write( + '* {} organizations created.\n'.format(nb_organization_created)) + sys.stdout.write( + '* {} targets created.\n'.format(nb_created)) + sys.stdout.flush() diff --git a/commcrawler/management/commands/import_csv_communes.py b/commcrawler/management/commands/import_csv_communes.py new file mode 100644 index 0000000..4024067 --- /dev/null +++ b/commcrawler/management/commands/import_csv_communes.py @@ -0,0 +1,167 @@ +import csv +import sys + +from django.core.management.base import BaseCommand + +from commcrawler.models import Area, AreaType, Organization, OrganizationType,\ + Target + +header_len = 1 +expected_header = [ + 'DÉPARTEMENT', 'NOM EPCI OU MAIRIE', 'CODE POSTAL', 'COMMUNE', + 'C.C. / C.A. / C.U.', 'SITE INTERNET', 'POPULATION COMMUNAUTAIRE', + 'Type de collectivité', 'Code INSEE'] + + +class Command(BaseCommand): + help = 'Import depuis CSV communes' + + def add_arguments(self, parser): + parser.add_argument('csv_file') + parser.add_argument( + '--quiet', dest='quiet', action='store_true', + help='Quiet output') + + def handle(self, *args, **options): + csv_file = options['csv_file'] + quiet = options['quiet'] + if not quiet: + sys.stdout.write('* opening file {}\n'.format(csv_file)) + nb_created = 0 + nb_organization_created = 0 + nb_area_created = 0 + nb_tt_created = 0 + nb_at_created = 0 + with open(csv_file, 'r') as csvfile: + reader = csv.reader(csvfile) + for idx, row in enumerate(reader): + if idx < header_len: + if not idx: + if expected_header != row: + sys.stdout.write('ERROR: expected header differs ' + 'from the one provided\n') + sys.stdout.write('* expected header is:\n') + sys.stdout.write(str(expected_header)) + sys.stdout.write('\n* header provided is:\n') + sys.stdout.write(str(row) + "\n") + return + continue + dpt, name, code_postal, commune, comcom, site, pop = row[0:7] + type_coll, insee = row[7:9] + if insee.strip() == "NA": + insee = "" + if not quiet: + sys.stdout.write('-> processing line %d.\r' % (idx + 1)) + sys.stdout.flush() + try: + pop = int(pop.replace(" ", "")) + except ValueError: + pop = None + + p_tpe, c = OrganizationType.objects.get_or_create( + name=type_coll.strip() + ) + if c: + nb_tt_created += 1 + if comcom.strip(): + tpe, c = OrganizationType.objects.get_or_create( + name=comcom.strip(), + parent=p_tpe + ) + if c: + nb_tt_created += 1 + else: + tpe = p_tpe + atpe, c = AreaType.objects.get_or_create( + name=str(tpe) + ) + if c: + nb_at_created += 1 + + top_area = None + if dpt.strip(): + dpt_tpe, c = AreaType.objects.get_or_create( + name="Département", + ) + if c: + nb_tt_created += 1 + top_area, c = Area.objects.get_or_create( + name=dpt.strip(), + area_type=dpt_tpe + ) + if c: + nb_area_created += 1 + + area_values = {"area_type": atpe} + area_defaults = {"population": pop, "parent": top_area} + if insee.strip(): + area_values['reference'] = insee.strip() + area_defaults['name'] = commune.strip() + else: + area_values['name'] = name.strip() + area_values["defaults"] = area_defaults + + area, c = Area.objects.get_or_create( + **area_values) + if c: + nb_area_created += 1 + else: + for k in area_defaults: + setattr(area, k, area_defaults[k]) + area.save() + + organization_values = { + "organization_type": tpe, + "name": name, + "area": area, + } + default = dict() + default["address"] = "{} {}".format(code_postal.strip() or "", + commune.strip() or "") + organization_values["defaults"] = default + + organization, c = Organization.objects.get_or_create( + **organization_values) + if c: + nb_organization_created += 1 + else: + for k in default: + setattr(organization, k, default[k]) + organization.save() + + site = site.strip() + if site == "0" or "." not in site: + site = None + elif not site.startswith('http'): + site = "http://" + site + values = { + "name": name.strip(), + "organization": organization, + "url": site, + } + + target, created = Target.objects.get_or_create(**values) + if created: + nb_created += 1 + if not quiet: + sys.stdout.write("\n") + if nb_at_created: + sys.stdout.write( + '* {} area types created.\n'.format(nb_at_created) + ) + if nb_area_created: + sys.stdout.write( + '* {} areas created.\n'.format( + nb_area_created)) + if nb_tt_created: + sys.stdout.write( + '* {} organization types created.\n'.format(nb_tt_created) + ) + if nb_organization_created: + sys.stdout.write( + '* {} organizations created.\n'.format( + nb_organization_created)) + if nb_created: + sys.stdout.write( + '* {} targets created.\n'.format(nb_created)) + sys.stdout.flush() |