diff options
Diffstat (limited to 'commcrawler/management/commands/import_csv_autres.py')
-rw-r--r-- | commcrawler/management/commands/import_csv_autres.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/commcrawler/management/commands/import_csv_autres.py b/commcrawler/management/commands/import_csv_autres.py new file mode 100644 index 0000000..2dd177c --- /dev/null +++ b/commcrawler/management/commands/import_csv_autres.py @@ -0,0 +1,107 @@ +import csv +import sys + +from django.core.management.base import BaseCommand + +from commcrawler.models import Organization, OrganizationType, Target + +header_len = 1 +expected_header = [ + 'Secteur', 'Type', 'Nom', 'URL', 'Localisation siège (département)', + 'Localisation siège (code postal)', 'Localisation siège (commune)'] + + +class Command(BaseCommand): + help = 'Import depuis CSV communes' + + def add_arguments(self, parser): + parser.add_argument('csv_file') + parser.add_argument( + '--quiet', dest='quiet', action='store_true', + help='Quiet output') + + def handle(self, *args, **options): + csv_file = options['csv_file'] + quiet = options['quiet'] + if not quiet: + sys.stdout.write('* opening file {}\n'.format(csv_file)) + nb_created = 0 + nb_organization_created = 0 + nb_tt_created = 0 + with open(csv_file, 'r') as csvfile: + reader = csv.reader(csvfile) + for idx, row in enumerate(reader): + if idx < header_len: + if not idx: + if expected_header != row: + sys.stdout.write('ERROR: expected header differs ' + 'from the one provided\n') + sys.stdout.write('* expected header is:\n') + sys.stdout.write(str(expected_header)) + sys.stdout.write('\n* header provided is:\n') + sys.stdout.write(str(row) + "\n") + return + continue + sec, sec_tpe, name, site, address = row[0:5] + address = address.strip() + if " " in address: + organization_name = " ".join(address.split(" ")[1:]) + else: + organization_name = address + + if not quiet: + sys.stdout.write('-> processing line %d.\r' % (idx + 1)) + sys.stdout.flush() + + tpe, c = OrganizationType.objects.get_or_create( + name=sec.strip(), + parent=None + ) + if c: + nb_tt_created += 1 + tpe, c = OrganizationType.objects.get_or_create( + name=sec_tpe.strip(), + parent=tpe + ) + if c: + nb_tt_created += 1 + + organization_values = { + "organization_type": tpe, + "name": "{} - {}".format(name.strip(), organization_name) + } + + default = dict() + default["address"] = address + organization_values["defaults"] = default + + organization, c = Organization.objects.get_or_create( + **organization_values) + if c: + nb_organization_created += 1 + else: + for k in default.keys(): + setattr(organization, k, default[k]) + organization.save() + + site = site.strip() + if site == "0" or "." not in site: + site = None + elif not site.startswith('http'): + site = "http://" + site + values = { + "name": name.strip(), + "organization": organization, + "url": site, + } + target, created = Target.objects.get_or_create(**values) + if created: + nb_created += 1 + if not quiet: + sys.stdout.write( + '\n* {} organization types created.\n'.format(nb_tt_created)) + sys.stdout.write( + '* {} organizations created.\n'.format(nb_organization_created)) + sys.stdout.write( + '* {} targets created.\n'.format(nb_created)) + sys.stdout.flush() |