summaryrefslogtreecommitdiff
path: root/archaeological_operations/import_from_csv.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@proxience.com>2015-01-24 22:46:16 +0100
committerÉtienne Loks <etienne.loks@proxience.com>2015-01-24 22:46:16 +0100
commit56aceffe8361bf01ef472b9c13567c28d7dcb943 (patch)
tree060ce3c6e8d91df838b70d9acd909f93552cc124 /archaeological_operations/import_from_csv.py
parentaa70ac1ae2ea28faf7e23ad2150e24d91c2c4e5e (diff)
downloadIshtar-56aceffe8361bf01ef472b9c13567c28d7dcb943.tar.bz2
Ishtar-56aceffe8361bf01ef472b9c13567c28d7dcb943.zip
Fix operations and context_records tests
Diffstat (limited to 'archaeological_operations/import_from_csv.py')
-rw-r--r--archaeological_operations/import_from_csv.py381
1 files changed, 1 insertions, 380 deletions
diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py
index bd4c1c841..878d95ce4 100644
--- a/archaeological_operations/import_from_csv.py
+++ b/archaeological_operations/import_from_csv.py
@@ -33,7 +33,7 @@ from django.contrib.auth.models import User
from django.db import transaction
from django.template.defaultfilters import slugify
-from archaeological_operations.utils import parse_parcels
+from archaeological_operations.utils import *
from ishtar_common.models import Town, Person, PersonType, OrganizationType, \
Organization, SourceType
@@ -61,385 +61,6 @@ def utf_8_encoder(unicode_csv_data):
for line in unicode_csv_data:
yield line.encode('utf-8')
-ope_types = {}
-for k in settings.ISHTAR_OPE_TYPES.keys():
- ot, created = OperationType.objects.get_or_create(
- txt_idx=settings.ISHTAR_OPE_TYPES[k][0],
- defaults={'label':settings.ISHTAR_OPE_TYPES[k][1],
- 'preventive':k[0]==u'préventive'})
- ope_types[k] = ot
-
-def _get_parse_string(trunc_number=None):
- def parse_string(value):
- value = value.strip()
- if value == '#EMPTY':
- value = ''
- value = value.replace(' ', ' ')
- if trunc_number:
- value = value[:trunc_number]
- return value
- return parse_string
-
-parse_string = _get_parse_string()
-
-def parse_multivalue(value):
- s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name)
- s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1)
- return re.sub('([0-9])([a-z])', r'\1 \2', s1)
-
-def parse_operationtype(value, preventive, owner):
- value = (preventive.strip(), value.strip())
- if value not in ope_types:
- return None
- return ope_types[value]
-
-periods = {}
-for k in settings.ISHTAR_PERIODS.keys():
- periods[k] = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k])
-periods_keys = periods.keys()
-periods_keys.sort(key=len)
-periods_keys.reverse()
-
-def parse_period(value):
- value = parse_string(value)
- value = value[3:] if value.startswith('EUR') else value
- while value.endswith('-'):
- value = value[:-1]
- value = value[3:] if value.startswith('EUR') else value
- if not value:
- return [periods[u'']]
- period, old_val = [], u''
- while value and old_val != value:
- old_val = value
- for k in periods_keys:
- if value.startswith(k):
- period.append(periods[k])
- value = value[len(k):]
- break
- return period
-
-_REPLACED_PERIOD = [('deuxieme', 'second')]
-_REPLACED_PERIOD += [(y, x) for x, y in _REPLACED_PERIOD]
-REPLACED_PERIOD_DCT = dict(_REPLACED_PERIOD)
-
-period_names = {}
-for k in settings.ISHTAR_PERIODS.keys():
- period = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k])
- slug = slugify(period.label)
- period_names[slug] = period
- for k in REPLACED_PERIOD_DCT.keys():
- if k in slug:
- period_names[slug.replace(k, REPLACED_PERIOD_DCT[k])] = period
-period_names_keys = period_names.keys()
-period_names_keys.sort(key=len)
-period_names_keys.reverse()
-
-def parse_period_name(value):
- value = parse_string(value)
- if not value:
- return [period_names[u'']]
- period, old_val = [], u''
- value = slugify(value)
- while value and old_val != value:
- old_val = value
- for k in period_names_keys:
- if value.startswith(k):
- period.append(period_names[k])
- value = value[len(k):]
- break
- return period
-
-_CACHED_PERMIT_TYPES = {}
-for k in settings.ISHTAR_PERMIT_TYPES:
- txt_idx, label = settings.ISHTAR_PERMIT_TYPES[k]
- permit_type, created = PermitType.objects.get_or_create(txt_idx=txt_idx,
- defaults={'label':label,
- 'available':True})
- _CACHED_PERMIT_TYPES[k] = permit_type
-
-def parse_permittype(value):
- value = parse_string(value).lower()
- if value not in _CACHED_PERMIT_TYPES:
- if not "" in _CACHED_PERMIT_TYPES:
- return
- value = ""
- return _CACHED_PERMIT_TYPES[value]
-
-_CACHED_ADMIN_ACT_TYPES = {}
-def parse_admin_act_typ(value, code, owner):
- value = parse_string(value).lower()
- code = parse_string(code).lower()
- if not value or not code:
- return
- if code not in _CACHED_ADMIN_ACT_TYPES:
- act_type, created = ActType.objects.get_or_create(txt_idx=code,
- defaults={'label':value})
- _CACHED_ADMIN_ACT_TYPES[code] = act_type
- return _CACHED_ADMIN_ACT_TYPES[code]
-
-def parse_fileref(value):
- value = parse_string(value).split('/')[0]
- value = value.split('.')[0]
- match = re.search('[0-9].[0-9]*', value)
- if not match:
- return None
- return int(match.group())
-
-def parse_orga(value, alternate_value, owner):
- value = parse_string(value)
- if not value:
- value = parse_string(alternate_value)
- if not value:
- return
- q = Organization.objects.filter(name__iexact=value)
- if q.count():
- return q.all()[0]
- try:
- organization_type = OrganizationType.objects.get(label__iexact=value)
- except ObjectDoesNotExist:
- organization_type = OrganizationType.objects.get(txt_idx='undefined')
- orga = Organization.objects.create(name=value,
- organization_type=organization_type,
- history_modifier=owner)
- return orga
-
-def parse_bool(value):
- value = parse_string(value)
- if value.lower() in ('yes', 'oui'):
- value = True
- elif value.lower() in ('no', 'non'):
- value = False
- else:
- value = None
- return value
-
-def parse_date(value):
- value = parse_string(value).split(' ')[0]
- try:
- return datetime.datetime.strptime(value, '%d/%m/%Y')
- except:
- return None
-
-def parse_yearref(value):
- value = parse_string(value).split('.')[0]
- match = re.search('[0-9].[0-9]*', value)
- if not match:
- return None
- return int(match.group())
-
-def parse_surface(value):
- value = parse_string(value)
- value = value.replace(',', '.')
- try:
- # hectare en metre carrés
- value = float(value) * 10000
- if value:
- return value
- return None
- except:
- return None
-
-def parse_year(value):
- value = parse_string(value)
- try:
- yr = int(value)
- except:
- return None
- if yr < 1900 or yr > 2100:
- return None
- return yr
-
-def parse_insee(value):
- value = parse_string(value)
- values = []
- while len(value) > 4:
- values.append(value[:5])
- value = value[5:]
- towns = []
- for value in values:
- try:
- town = Town.objects.get(numero_insee=value)
- towns.append(town)
- except:
- #sys.stderr.write('Numero INSEE : %s non existant en base' % value)
- continue
- return towns
-
-def parse_trunc_patriarche(value):
- value = parse_string(value)
- if not value:
- return
- value = value.replace(' ', '')
- try:
- int(value)
- except:
- return
- return '18' + unicode(value)
-
-def parse_operation_code(value):
- value = parse_string(value)
- code = value.split('.')[-1]
- try:
- return int(code)
- except:
- return
-
-def parse_title(value):
- value = parse_string(value)
- if not value:
- return
- return value.title()
-
-def parse_name_surname(value):
- value = parse_string(value)
- items = value.split(' ')
- name = items[0]
- surname = ""
- if len(items) > 1:
- name = " ".join(items[:-1])
- surname = items[-1]
- values = {"surname":parse_title(surname)[:30],
- "name":parse_title(name)[:30]}
- if not values['surname'] and not values['name']:
- return
- q = Person.objects.filter(**values)
- if q.count():
- return q.all()[0]
- else:
- defaults = {'history_modifier':DEFAULT_PERSON,
- 'title':''}
- defaults.update(values)
- p = Person.objects.create(**defaults)
- p.person_types.add(PersonType.objects.get(
- txt_idx='head_scientist'))
- return p
-
-def parse_person(surname, name, old_ref, owner):
- value = parse_string(value)
- values = {"surname":parse_title(surname),
- "name":parse_title(name)}
- if not values['surname'] and not values['name']:
- return
- q = Person.objects.filter(**values)
- if q.count():
- return q.all()[0]
- else:
- defaults = {'history_modifier':owner,
- 'title':''}
- defaults.update(values)
- p = Person.objects.create(**defaults)
- p.person_types.add(PersonType.objects.get(
- txt_idx='head_scientist'))
- return p
-
-def parse_comment_addr_nature(nature, addr, owner):
- addr = parse_string(addr)
- nature = parse_string(nature)
- comments = []
- if nature:
- comments += [u"Aménagement :", nature]
- if addr:
- comments += [u"Adresse :", addr]
- if not comments:
- return ""
- return u"\n".join(comments)
-
-# si pas de start date : premier janvier de year
-
-ope_types = {
- 'AET':('other_study',
- 'Autre étude', True),
- 'APP':('assistance_preparation_help',
- 'Aide à la préparation de publication', True),
- 'DOC':('documents_study',
- 'Étude documentaire', True),
- 'EV':('evaluation',
- "Fouille d'évaluation", True),
- 'FOU':('ancient_excavation',
- "Fouille ancienne", True),
- 'FP':('prog_excavation',
- "Fouille programmée", False),
- 'MH':('building_study', "Fouille avant MH", True),
- 'OPD':('arch_diagnostic',
- "Diagnostic archéologique", True),
- 'PAN':('analysis_program',
- "Programme d'analyses", False),
- 'PCR':('collective_research_project',
- "Projet collectif de recherche", False),
- 'PMS':('specialized_eqp_prospection',
- "Prospection avec matériel spécialisé", False),
- 'PRD':('diachronic_prospection',
- "Prospection diachronique", False),
- 'PI':('diachronic_prospection',
- "Prospection diachronique", False),
- 'PRM':('metal_detector_prospection',
- "Prospection détecteur de métaux", False),
- 'PRT':('thematic_prospection',
- "Prospection thématique", False),
- 'PT':('thematic_prospection',
- "Prospection thématique", False),
- 'RAR':('cave_art_record',
- "Relevé d'art rupestre", False),
- 'SD':('sampling_research',
- "Sondage", False),
- 'SP':('prev_excavation',
- "Fouille préventive", True),
- 'SU':('emergency_excavation',
- "Fouille préventive d'urgence", True),
-}
-
-_CACHED_OPE_TYPES = {}
-
-def _prepare_ope_types():
- for k in ope_types.keys():
- txt_idx, label, preventive = ope_types[k]
- ot, created = OperationType.objects.get_or_create(txt_idx=txt_idx,
- defaults={'label':label, 'preventive':preventive})
- if k not in _CACHED_OPE_TYPES.keys():
- _CACHED_OPE_TYPES[k] = ot
-
-def parse_patriarche_operationtype(value):
- if value not in _CACHED_OPE_TYPES.keys():
- return None
- return _CACHED_OPE_TYPES[value]
-
-_dpt_re_filter = re.compile('^\([0-9]*\) ')
-
-def parse_ope_name(value):
- if not value:
- return ''
- value = value.strip()
- if value.lower() == 'null':
- return ''
- value = _dpt_re_filter.sub('', value)
- return value
-
-def parse_ha(value):
- value = parse_string(value)
- try:
- value = float(value) * 10000
- except:
- value = None
- return value
-
-def parse_rapp_index(value):
- value = parse_string(value)
- items = re.findall(r'[0-9]+$', value)
- if items:
- return int(items[-1])
-
-_CACHED_DOC_TYPES = {}
-
-def parse_doc_types(value):
- value = parse_string(value)
- if value not in _CACHED_DOC_TYPES:
- if value not in settings.ISHTAR_DOC_TYPES:
- return
- _CACHED_DOC_TYPES[value], created = SourceType.objects.get_or_create(
- txt_idx=value,
- defaults={"label":settings.ISHTAR_DOC_TYPES[value]})
- return _CACHED_DOC_TYPES[value]
-
# attrs, convert
DEFAULT_OPE_COLS = [
[], # numéro de dossier ?