summaryrefslogtreecommitdiff
path: root/commcrawler/management/commands
diff options
context:
space:
mode:
Diffstat (limited to 'commcrawler/management/commands')
-rw-r--r--commcrawler/management/commands/__init__.py0
-rw-r--r--commcrawler/management/commands/import_csv_autres.py107
-rw-r--r--commcrawler/management/commands/import_csv_communes.py167
3 files changed, 274 insertions, 0 deletions
diff --git a/commcrawler/management/commands/__init__.py b/commcrawler/management/commands/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/commcrawler/management/commands/__init__.py
diff --git a/commcrawler/management/commands/import_csv_autres.py b/commcrawler/management/commands/import_csv_autres.py
new file mode 100644
index 0000000..2dd177c
--- /dev/null
+++ b/commcrawler/management/commands/import_csv_autres.py
@@ -0,0 +1,107 @@
+import csv
+import sys
+
+from django.core.management.base import BaseCommand
+
+from commcrawler.models import Organization, OrganizationType, Target
+
+header_len = 1
+expected_header = [
+ 'Secteur', 'Type', 'Nom', 'URL', 'Localisation siège (département)',
+ 'Localisation siège (code postal)', 'Localisation siège (commune)']
+
+
+class Command(BaseCommand):
+ help = 'Import depuis CSV communes'
+
+ def add_arguments(self, parser):
+ parser.add_argument('csv_file')
+ parser.add_argument(
+ '--quiet', dest='quiet', action='store_true',
+ help='Quiet output')
+
+ def handle(self, *args, **options):
+ csv_file = options['csv_file']
+ quiet = options['quiet']
+ if not quiet:
+ sys.stdout.write('* opening file {}\n'.format(csv_file))
+ nb_created = 0
+ nb_organization_created = 0
+ nb_tt_created = 0
+ with open(csv_file, 'r') as csvfile:
+ reader = csv.reader(csvfile)
+ for idx, row in enumerate(reader):
+ if idx < header_len:
+ if not idx:
+ if expected_header != row:
+ sys.stdout.write('ERROR: expected header differs '
+ 'from the one provided\n')
+ sys.stdout.write('* expected header is:\n')
+ sys.stdout.write(str(expected_header))
+ sys.stdout.write('\n* header provided is:\n')
+ sys.stdout.write(str(row) + "\n")
+ return
+ continue
+ sec, sec_tpe, name, site, address = row[0:5]
+ address = address.strip()
+ if " " in address:
+ organization_name = " ".join(address.split(" ")[1:])
+ else:
+ organization_name = address
+
+ if not quiet:
+ sys.stdout.write('-> processing line %d.\r' % (idx + 1))
+ sys.stdout.flush()
+
+ tpe, c = OrganizationType.objects.get_or_create(
+ name=sec.strip(),
+ parent=None
+ )
+ if c:
+ nb_tt_created += 1
+ tpe, c = OrganizationType.objects.get_or_create(
+ name=sec_tpe.strip(),
+ parent=tpe
+ )
+ if c:
+ nb_tt_created += 1
+
+ organization_values = {
+ "organization_type": tpe,
+ "name": "{} - {}".format(name.strip(), organization_name)
+ }
+
+ default = dict()
+ default["address"] = address
+ organization_values["defaults"] = default
+
+ organization, c = Organization.objects.get_or_create(
+ **organization_values)
+ if c:
+ nb_organization_created += 1
+ else:
+ for k in default.keys():
+ setattr(organization, k, default[k])
+ organization.save()
+
+ site = site.strip()
+ if site == "0" or "." not in site:
+ site = None
+ elif not site.startswith('http'):
+ site = "http://" + site
+ values = {
+ "name": name.strip(),
+ "organization": organization,
+ "url": site,
+ }
+ target, created = Target.objects.get_or_create(**values)
+ if created:
+ nb_created += 1
+ if not quiet:
+ sys.stdout.write(
+ '\n* {} organization types created.\n'.format(nb_tt_created))
+ sys.stdout.write(
+ '* {} organizations created.\n'.format(nb_organization_created))
+ sys.stdout.write(
+ '* {} targets created.\n'.format(nb_created))
+ sys.stdout.flush()
diff --git a/commcrawler/management/commands/import_csv_communes.py b/commcrawler/management/commands/import_csv_communes.py
new file mode 100644
index 0000000..4024067
--- /dev/null
+++ b/commcrawler/management/commands/import_csv_communes.py
@@ -0,0 +1,167 @@
+import csv
+import sys
+
+from django.core.management.base import BaseCommand
+
+from commcrawler.models import Area, AreaType, Organization, OrganizationType,\
+ Target
+
+header_len = 1
+expected_header = [
+ 'DÉPARTEMENT', 'NOM EPCI OU MAIRIE', 'CODE POSTAL', 'COMMUNE',
+ 'C.C. / C.A. / C.U.', 'SITE INTERNET', 'POPULATION COMMUNAUTAIRE',
+ 'Type de collectivité', 'Code INSEE']
+
+
+class Command(BaseCommand):
+ help = 'Import depuis CSV communes'
+
+ def add_arguments(self, parser):
+ parser.add_argument('csv_file')
+ parser.add_argument(
+ '--quiet', dest='quiet', action='store_true',
+ help='Quiet output')
+
+ def handle(self, *args, **options):
+ csv_file = options['csv_file']
+ quiet = options['quiet']
+ if not quiet:
+ sys.stdout.write('* opening file {}\n'.format(csv_file))
+ nb_created = 0
+ nb_organization_created = 0
+ nb_area_created = 0
+ nb_tt_created = 0
+ nb_at_created = 0
+ with open(csv_file, 'r') as csvfile:
+ reader = csv.reader(csvfile)
+ for idx, row in enumerate(reader):
+ if idx < header_len:
+ if not idx:
+ if expected_header != row:
+ sys.stdout.write('ERROR: expected header differs '
+ 'from the one provided\n')
+ sys.stdout.write('* expected header is:\n')
+ sys.stdout.write(str(expected_header))
+ sys.stdout.write('\n* header provided is:\n')
+ sys.stdout.write(str(row) + "\n")
+ return
+ continue
+ dpt, name, code_postal, commune, comcom, site, pop = row[0:7]
+ type_coll, insee = row[7:9]
+ if insee.strip() == "NA":
+ insee = ""
+ if not quiet:
+ sys.stdout.write('-> processing line %d.\r' % (idx + 1))
+ sys.stdout.flush()
+ try:
+ pop = int(pop.replace(" ", ""))
+ except ValueError:
+ pop = None
+
+ p_tpe, c = OrganizationType.objects.get_or_create(
+ name=type_coll.strip()
+ )
+ if c:
+ nb_tt_created += 1
+ if comcom.strip():
+ tpe, c = OrganizationType.objects.get_or_create(
+ name=comcom.strip(),
+ parent=p_tpe
+ )
+ if c:
+ nb_tt_created += 1
+ else:
+ tpe = p_tpe
+ atpe, c = AreaType.objects.get_or_create(
+ name=str(tpe)
+ )
+ if c:
+ nb_at_created += 1
+
+ top_area = None
+ if dpt.strip():
+ dpt_tpe, c = AreaType.objects.get_or_create(
+ name="Département",
+ )
+ if c:
+ nb_tt_created += 1
+ top_area, c = Area.objects.get_or_create(
+ name=dpt.strip(),
+ area_type=dpt_tpe
+ )
+ if c:
+ nb_area_created += 1
+
+ area_values = {"area_type": atpe}
+ area_defaults = {"population": pop, "parent": top_area}
+ if insee.strip():
+ area_values['reference'] = insee.strip()
+ area_defaults['name'] = commune.strip()
+ else:
+ area_values['name'] = name.strip()
+ area_values["defaults"] = area_defaults
+
+ area, c = Area.objects.get_or_create(
+ **area_values)
+ if c:
+ nb_area_created += 1
+ else:
+ for k in area_defaults:
+ setattr(area, k, area_defaults[k])
+ area.save()
+
+ organization_values = {
+ "organization_type": tpe,
+ "name": name,
+ "area": area,
+ }
+ default = dict()
+ default["address"] = "{} {}".format(code_postal.strip() or "",
+ commune.strip() or "")
+ organization_values["defaults"] = default
+
+ organization, c = Organization.objects.get_or_create(
+ **organization_values)
+ if c:
+ nb_organization_created += 1
+ else:
+ for k in default:
+ setattr(organization, k, default[k])
+ organization.save()
+
+ site = site.strip()
+ if site == "0" or "." not in site:
+ site = None
+ elif not site.startswith('http'):
+ site = "http://" + site
+ values = {
+ "name": name.strip(),
+ "organization": organization,
+ "url": site,
+ }
+
+ target, created = Target.objects.get_or_create(**values)
+ if created:
+ nb_created += 1
+ if not quiet:
+ sys.stdout.write("\n")
+ if nb_at_created:
+ sys.stdout.write(
+ '* {} area types created.\n'.format(nb_at_created)
+ )
+ if nb_area_created:
+ sys.stdout.write(
+ '* {} areas created.\n'.format(
+ nb_area_created))
+ if nb_tt_created:
+ sys.stdout.write(
+ '* {} organization types created.\n'.format(nb_tt_created)
+ )
+ if nb_organization_created:
+ sys.stdout.write(
+ '* {} organizations created.\n'.format(
+ nb_organization_created))
+ if nb_created:
+ sys.stdout.write(
+ '* {} targets created.\n'.format(nb_created))
+ sys.stdout.flush()