diff options
Diffstat (limited to 'archaeological_operations/import_from_csv.py')
| -rw-r--r-- | archaeological_operations/import_from_csv.py | 381 | 
1 files changed, 1 insertions, 380 deletions
| diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py index bd4c1c841..878d95ce4 100644 --- a/archaeological_operations/import_from_csv.py +++ b/archaeological_operations/import_from_csv.py @@ -33,7 +33,7 @@ from django.contrib.auth.models import User  from django.db import transaction  from django.template.defaultfilters import slugify -from archaeological_operations.utils import parse_parcels +from archaeological_operations.utils import *  from ishtar_common.models import Town, Person, PersonType, OrganizationType, \      Organization, SourceType @@ -61,385 +61,6 @@ def utf_8_encoder(unicode_csv_data):      for line in unicode_csv_data:          yield line.encode('utf-8') -ope_types = {} -for k in settings.ISHTAR_OPE_TYPES.keys(): -    ot, created = OperationType.objects.get_or_create( -                             txt_idx=settings.ISHTAR_OPE_TYPES[k][0], -                             defaults={'label':settings.ISHTAR_OPE_TYPES[k][1], -                                       'preventive':k[0]==u'préventive'}) -    ope_types[k] = ot - -def _get_parse_string(trunc_number=None): -    def parse_string(value): -        value = value.strip() -        if value == '#EMPTY': -            value = '' -        value = value.replace('  ', ' ') -        if trunc_number: -            value = value[:trunc_number] -        return value -    return parse_string - -parse_string = _get_parse_string() - -def parse_multivalue(value): -    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name) -    s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1) -    return re.sub('([0-9])([a-z])', r'\1 \2', s1) - -def parse_operationtype(value, preventive, owner): -    value = (preventive.strip(), value.strip()) -    if value not in ope_types: -        return None -    return ope_types[value] - -periods = {} -for k in settings.ISHTAR_PERIODS.keys(): -    periods[k] = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) -periods_keys = periods.keys() -periods_keys.sort(key=len) -periods_keys.reverse() - -def parse_period(value): -    value = parse_string(value) -    value = value[3:] if value.startswith('EUR') else value -    while value.endswith('-'): -        value = value[:-1] -    value = value[3:] if value.startswith('EUR') else value -    if not value: -        return [periods[u'']] -    period, old_val = [], u'' -    while value and old_val != value: -        old_val = value -        for k in periods_keys: -            if value.startswith(k): -                period.append(periods[k]) -                value = value[len(k):] -                break -    return period - -_REPLACED_PERIOD = [('deuxieme', 'second')] -_REPLACED_PERIOD += [(y, x) for x, y in _REPLACED_PERIOD] -REPLACED_PERIOD_DCT = dict(_REPLACED_PERIOD) - -period_names = {} -for k in settings.ISHTAR_PERIODS.keys(): -    period = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) -    slug = slugify(period.label) -    period_names[slug] = period -    for k in REPLACED_PERIOD_DCT.keys(): -        if k in slug: -            period_names[slug.replace(k, REPLACED_PERIOD_DCT[k])] = period -period_names_keys = period_names.keys() -period_names_keys.sort(key=len) -period_names_keys.reverse() - -def parse_period_name(value): -    value = parse_string(value) -    if not value: -        return [period_names[u'']] -    period, old_val = [], u'' -    value = slugify(value) -    while value and old_val != value: -        old_val = value -        for k in period_names_keys: -            if value.startswith(k): -                period.append(period_names[k]) -                value = value[len(k):] -                break -    return period - -_CACHED_PERMIT_TYPES = {} -for k in settings.ISHTAR_PERMIT_TYPES: -    txt_idx, label = settings.ISHTAR_PERMIT_TYPES[k] -    permit_type, created = PermitType.objects.get_or_create(txt_idx=txt_idx, -                          defaults={'label':label, -                                    'available':True}) -    _CACHED_PERMIT_TYPES[k] = permit_type - -def parse_permittype(value): -    value = parse_string(value).lower() -    if value not in _CACHED_PERMIT_TYPES: -        if not "" in _CACHED_PERMIT_TYPES: -            return -        value = "" -    return _CACHED_PERMIT_TYPES[value] - -_CACHED_ADMIN_ACT_TYPES = {} -def parse_admin_act_typ(value, code, owner): -    value = parse_string(value).lower() -    code = parse_string(code).lower() -    if not value or not code: -        return -    if code not in _CACHED_ADMIN_ACT_TYPES: -        act_type, created = ActType.objects.get_or_create(txt_idx=code, -                                                     defaults={'label':value}) -        _CACHED_ADMIN_ACT_TYPES[code]  = act_type -    return _CACHED_ADMIN_ACT_TYPES[code] - -def parse_fileref(value): -    value = parse_string(value).split('/')[0] -    value = value.split('.')[0] -    match = re.search('[0-9].[0-9]*', value) -    if not match: -        return None -    return int(match.group()) - -def parse_orga(value, alternate_value, owner): -    value = parse_string(value) -    if not value: -        value = parse_string(alternate_value) -        if not value: -            return -    q = Organization.objects.filter(name__iexact=value) -    if q.count(): -        return q.all()[0] -    try: -        organization_type = OrganizationType.objects.get(label__iexact=value) -    except ObjectDoesNotExist: -        organization_type = OrganizationType.objects.get(txt_idx='undefined') -    orga = Organization.objects.create(name=value, -                                       organization_type=organization_type, -                                       history_modifier=owner) -    return orga - -def parse_bool(value): -    value = parse_string(value) -    if value.lower() in ('yes', 'oui'): -        value = True -    elif value.lower() in ('no', 'non'): -        value = False -    else: -        value = None -    return value - -def parse_date(value): -    value = parse_string(value).split(' ')[0] -    try: -        return datetime.datetime.strptime(value, '%d/%m/%Y') -    except: -        return None - -def parse_yearref(value): -    value = parse_string(value).split('.')[0] -    match = re.search('[0-9].[0-9]*', value) -    if not match: -        return None -    return int(match.group()) - -def parse_surface(value): -    value = parse_string(value) -    value = value.replace(',', '.') -    try: -        # hectare en metre carrés -        value = float(value) * 10000 -        if value: -            return value -        return None -    except: -        return None - -def parse_year(value): -    value = parse_string(value) -    try: -        yr = int(value) -    except: -        return None -    if yr < 1900 or yr > 2100: -        return None -    return yr - -def parse_insee(value): -    value = parse_string(value) -    values = [] -    while len(value) > 4: -        values.append(value[:5]) -        value = value[5:] -    towns = [] -    for value in values: -        try: -            town = Town.objects.get(numero_insee=value) -            towns.append(town) -        except: -            #sys.stderr.write('Numero INSEE : %s non existant en base' % value) -            continue -    return towns - -def parse_trunc_patriarche(value): -    value = parse_string(value) -    if not value: -        return -    value = value.replace(' ', '') -    try: -        int(value) -    except: -        return -    return '18' + unicode(value) - -def parse_operation_code(value): -    value = parse_string(value) -    code = value.split('.')[-1] -    try: -        return int(code) -    except: -        return - -def parse_title(value): -    value = parse_string(value) -    if not value: -        return -    return value.title() - -def parse_name_surname(value): -    value = parse_string(value) -    items = value.split(' ') -    name = items[0] -    surname = "" -    if len(items) > 1: -        name = " ".join(items[:-1]) -        surname = items[-1] -    values = {"surname":parse_title(surname)[:30], -              "name":parse_title(name)[:30]} -    if not values['surname'] and not values['name']: -        return -    q = Person.objects.filter(**values) -    if q.count(): -        return q.all()[0] -    else: -        defaults = {'history_modifier':DEFAULT_PERSON, -                    'title':''} -        defaults.update(values) -        p = Person.objects.create(**defaults) -        p.person_types.add(PersonType.objects.get( -                                txt_idx='head_scientist')) -        return p - -def parse_person(surname, name, old_ref, owner): -    value = parse_string(value) -    values = {"surname":parse_title(surname), -              "name":parse_title(name)} -    if not values['surname'] and not values['name']: -        return -    q = Person.objects.filter(**values) -    if q.count(): -        return q.all()[0] -    else: -        defaults = {'history_modifier':owner, -                    'title':''} -        defaults.update(values) -        p = Person.objects.create(**defaults) -        p.person_types.add(PersonType.objects.get( -                                txt_idx='head_scientist')) -        return p - -def parse_comment_addr_nature(nature, addr, owner): -    addr = parse_string(addr) -    nature = parse_string(nature) -    comments = [] -    if nature: -        comments += [u"Aménagement :", nature] -    if addr: -        comments += [u"Adresse :", addr] -    if not comments: -        return "" -    return u"\n".join(comments) - -# si pas de start date : premier janvier de year - -ope_types = { -    'AET':('other_study', -           'Autre étude', True), -    'APP':('assistance_preparation_help', -           'Aide à la préparation de publication', True), -    'DOC':('documents_study', -           'Étude documentaire', True), -    'EV':('evaluation', -          "Fouille d'évaluation", True), -    'FOU':('ancient_excavation', -           "Fouille ancienne", True), -    'FP':('prog_excavation', -          "Fouille programmée", False), -    'MH':('building_study', "Fouille avant MH", True), -    'OPD':('arch_diagnostic', -           "Diagnostic archéologique", True), -    'PAN':('analysis_program', -           "Programme d'analyses", False), -    'PCR':('collective_research_project', -           "Projet collectif de recherche", False), -    'PMS':('specialized_eqp_prospection', -           "Prospection avec matériel spécialisé", False), -    'PRD':('diachronic_prospection', -           "Prospection diachronique", False), -    'PI':('diachronic_prospection', -           "Prospection diachronique", False), -    'PRM':('metal_detector_prospection', -           "Prospection détecteur de métaux", False), -    'PRT':('thematic_prospection', -           "Prospection thématique", False), -    'PT':('thematic_prospection', -           "Prospection thématique", False), -    'RAR':('cave_art_record', -           "Relevé d'art rupestre", False), -    'SD':('sampling_research', -          "Sondage", False), -    'SP':('prev_excavation', -          "Fouille préventive", True), -    'SU':('emergency_excavation', -          "Fouille préventive d'urgence", True), -} - -_CACHED_OPE_TYPES = {} - -def _prepare_ope_types(): -    for k in ope_types.keys(): -        txt_idx, label, preventive = ope_types[k] -        ot, created = OperationType.objects.get_or_create(txt_idx=txt_idx, -                              defaults={'label':label, 'preventive':preventive}) -        if k not in _CACHED_OPE_TYPES.keys(): -            _CACHED_OPE_TYPES[k] = ot - -def parse_patriarche_operationtype(value): -    if value not in _CACHED_OPE_TYPES.keys(): -        return None -    return _CACHED_OPE_TYPES[value] - -_dpt_re_filter = re.compile('^\([0-9]*\) ') - -def parse_ope_name(value): -    if not value: -        return '' -    value = value.strip() -    if value.lower() == 'null': -        return '' -    value = _dpt_re_filter.sub('', value) -    return value - -def parse_ha(value): -    value = parse_string(value) -    try: -        value = float(value) * 10000 -    except: -        value = None -    return value - -def parse_rapp_index(value): -    value = parse_string(value) -    items = re.findall(r'[0-9]+$', value) -    if items: -        return int(items[-1]) - -_CACHED_DOC_TYPES = {} - -def parse_doc_types(value): -    value = parse_string(value) -    if value not in _CACHED_DOC_TYPES: -        if value not in settings.ISHTAR_DOC_TYPES: -            return -        _CACHED_DOC_TYPES[value], created = SourceType.objects.get_or_create( -                            txt_idx=value, -                            defaults={"label":settings.ISHTAR_DOC_TYPES[value]}) -    return _CACHED_DOC_TYPES[value] -  # attrs, convert  DEFAULT_OPE_COLS = [   [], # numéro de dossier ? | 
