summaryrefslogtreecommitdiff
path: root/commcrawler/management/commands/import_csv_autres.py
blob: 2dd177cb32adb14cc636bb6f0e2353fb40a1a347 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import csv
import sys

from django.core.management.base import BaseCommand

from commcrawler.models import Organization, OrganizationType, Target

header_len = 1
expected_header = [
    'Secteur', 'Type', 'Nom', 'URL', 'Localisation siège (département)',
    'Localisation siège (code postal)', 'Localisation siège (commune)']


class Command(BaseCommand):
    help = 'Import depuis CSV communes'

    def add_arguments(self, parser):
        parser.add_argument('csv_file')
        parser.add_argument(
            '--quiet', dest='quiet', action='store_true',
            help='Quiet output')

    def handle(self, *args, **options):
        csv_file = options['csv_file']
        quiet = options['quiet']
        if not quiet:
            sys.stdout.write('* opening file {}\n'.format(csv_file))
        nb_created = 0
        nb_organization_created = 0
        nb_tt_created = 0
        with open(csv_file, 'r') as csvfile:
            reader = csv.reader(csvfile)
            for idx, row in enumerate(reader):
                if idx < header_len:
                    if not idx:
                        if expected_header != row:
                            sys.stdout.write('ERROR: expected header differs '
                                             'from the one provided\n')
                            sys.stdout.write('* expected header is:\n')
                            sys.stdout.write(str(expected_header))
                            sys.stdout.write('\n* header provided is:\n')
                            sys.stdout.write(str(row) + "\n")
                            return
                    continue
                sec, sec_tpe, name, site, address = row[0:5]
                address = address.strip()
                if " " in address:
                    organization_name = " ".join(address.split(" ")[1:])
                else:
                    organization_name = address

                if not quiet:
                    sys.stdout.write('-> processing line %d.\r' % (idx + 1))
                    sys.stdout.flush()

                tpe, c = OrganizationType.objects.get_or_create(
                    name=sec.strip(),
                    parent=None
                )
                if c:
                    nb_tt_created += 1
                tpe, c = OrganizationType.objects.get_or_create(
                    name=sec_tpe.strip(),
                    parent=tpe
                )
                if c:
                    nb_tt_created += 1

                organization_values = {
                    "organization_type": tpe,
                    "name": "{} - {}".format(name.strip(), organization_name)
                }

                default = dict()
                default["address"] = address
                organization_values["defaults"] = default

                organization, c = Organization.objects.get_or_create(
                    **organization_values)
                if c:
                    nb_organization_created += 1
                else:
                    for k in default.keys():
                        setattr(organization, k, default[k])
                    organization.save()

                site = site.strip()
                if site == "0" or "." not in site:
                    site = None
                elif not site.startswith('http'):
                    site = "http://" + site
                values = {
                    "name": name.strip(),
                    "organization": organization,
                    "url": site,
                }
                target, created = Target.objects.get_or_create(**values)
                if created:
                    nb_created += 1
        if not quiet:
            sys.stdout.write(
                '\n* {} organization types created.\n'.format(nb_tt_created))
            sys.stdout.write(
                '* {} organizations created.\n'.format(nb_organization_created))
            sys.stdout.write(
                '* {} targets created.\n'.format(nb_created))
            sys.stdout.flush()