summaryrefslogtreecommitdiff
path: root/commcrawler/management/commands/import_csv_autres.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-07-30 20:19:11 +0200
committerÉtienne Loks <etienne@peacefrogs.net>2019-07-30 20:19:11 +0200
commit56a33989278a8fe2985f0d36d3c589136c1ec30d (patch)
treeb0cb3356e55b4547a4747e10411a8ca68852b977 /commcrawler/management/commands/import_csv_autres.py
downloadComm-on-net-56a33989278a8fe2985f0d36d3c589136c1ec30d.tar.bz2
Comm-on-net-56a33989278a8fe2985f0d36d3c589136c1ec30d.zip
First commit
Diffstat (limited to 'commcrawler/management/commands/import_csv_autres.py')
-rw-r--r--commcrawler/management/commands/import_csv_autres.py107
1 files changed, 107 insertions, 0 deletions
diff --git a/commcrawler/management/commands/import_csv_autres.py b/commcrawler/management/commands/import_csv_autres.py
new file mode 100644
index 0000000..2dd177c
--- /dev/null
+++ b/commcrawler/management/commands/import_csv_autres.py
@@ -0,0 +1,107 @@
+import csv
+import sys
+
+from django.core.management.base import BaseCommand
+
+from commcrawler.models import Organization, OrganizationType, Target
+
+header_len = 1
+expected_header = [
+ 'Secteur', 'Type', 'Nom', 'URL', 'Localisation siège (département)',
+ 'Localisation siège (code postal)', 'Localisation siège (commune)']
+
+
+class Command(BaseCommand):
+ help = 'Import depuis CSV communes'
+
+ def add_arguments(self, parser):
+ parser.add_argument('csv_file')
+ parser.add_argument(
+ '--quiet', dest='quiet', action='store_true',
+ help='Quiet output')
+
+ def handle(self, *args, **options):
+ csv_file = options['csv_file']
+ quiet = options['quiet']
+ if not quiet:
+ sys.stdout.write('* opening file {}\n'.format(csv_file))
+ nb_created = 0
+ nb_organization_created = 0
+ nb_tt_created = 0
+ with open(csv_file, 'r') as csvfile:
+ reader = csv.reader(csvfile)
+ for idx, row in enumerate(reader):
+ if idx < header_len:
+ if not idx:
+ if expected_header != row:
+ sys.stdout.write('ERROR: expected header differs '
+ 'from the one provided\n')
+ sys.stdout.write('* expected header is:\n')
+ sys.stdout.write(str(expected_header))
+ sys.stdout.write('\n* header provided is:\n')
+ sys.stdout.write(str(row) + "\n")
+ return
+ continue
+ sec, sec_tpe, name, site, address = row[0:5]
+ address = address.strip()
+ if " " in address:
+ organization_name = " ".join(address.split(" ")[1:])
+ else:
+ organization_name = address
+
+ if not quiet:
+ sys.stdout.write('-> processing line %d.\r' % (idx + 1))
+ sys.stdout.flush()
+
+ tpe, c = OrganizationType.objects.get_or_create(
+ name=sec.strip(),
+ parent=None
+ )
+ if c:
+ nb_tt_created += 1
+ tpe, c = OrganizationType.objects.get_or_create(
+ name=sec_tpe.strip(),
+ parent=tpe
+ )
+ if c:
+ nb_tt_created += 1
+
+ organization_values = {
+ "organization_type": tpe,
+ "name": "{} - {}".format(name.strip(), organization_name)
+ }
+
+ default = dict()
+ default["address"] = address
+ organization_values["defaults"] = default
+
+ organization, c = Organization.objects.get_or_create(
+ **organization_values)
+ if c:
+ nb_organization_created += 1
+ else:
+ for k in default.keys():
+ setattr(organization, k, default[k])
+ organization.save()
+
+ site = site.strip()
+ if site == "0" or "." not in site:
+ site = None
+ elif not site.startswith('http'):
+ site = "http://" + site
+ values = {
+ "name": name.strip(),
+ "organization": organization,
+ "url": site,
+ }
+ target, created = Target.objects.get_or_create(**values)
+ if created:
+ nb_created += 1
+ if not quiet:
+ sys.stdout.write(
+ '\n* {} organization types created.\n'.format(nb_tt_created))
+ sys.stdout.write(
+ '* {} organizations created.\n'.format(nb_organization_created))
+ sys.stdout.write(
+ '* {} targets created.\n'.format(nb_created))
+ sys.stdout.flush()