diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2016-01-18 18:41:39 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2016-01-19 00:18:02 +0100 |
commit | e9c377136b8d0329f464749ea9671874d9dde17e (patch) | |
tree | 17e9fbf51fc054d18ddcd7fabc87e8684daf4eb4 /scripts/pre_import_sra_files.py | |
parent | 50124308f8f514e0f0120257a7d6a97523edb160 (diff) | |
download | Ishtar-e9c377136b8d0329f464749ea9671874d9dde17e.tar.bz2 Ishtar-e9c377136b8d0329f464749ea9671874d9dde17e.zip |
Configure for publications on pypi, descriptions, etc.
Diffstat (limited to 'scripts/pre_import_sra_files.py')
-rw-r--r-- | scripts/pre_import_sra_files.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/scripts/pre_import_sra_files.py b/scripts/pre_import_sra_files.py new file mode 100644 index 000000000..df00d3ef5 --- /dev/null +++ b/scripts/pre_import_sra_files.py @@ -0,0 +1,79 @@ +import unicodecsv +import datetime + +from django.conf import settings + +from ishtar_common.data_importer import Importer + + +def get_year(value): + try: + for fmt in ['%d/%m/%Y', '%d/%m/%Y']: + return datetime.datetime.strptime(value, fmt).year + except: + pass + +index_list = [] + + +def treatment(data): + internal_ref = data[37].strip() + creation = data[34].strip() + reception = data[19].strip() + yr = get_year(creation) + if not yr: + yr = get_year(reception) + + idx, year = None, None + if '-' in internal_ref: + year, y_idx = internal_ref.split('-') + if len(year) == 4: # 2007-XXXX + try: + year = int(year) + idx = int(y_idx) + except ValueError: + pass + elif '.' in internal_ref: + year, y_idx = internal_ref.split('.') + if len(year) == 4: # 2011.XXXX + try: + year = int(year) + idx = int(y_idx) + except ValueError: + pass + if not idx: + idx = int(internal_ref) + if year and year != yr: + yr = year + assert yr # we should absolutly have a year! + + external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx) + assert (yr, external_id) not in index_list + index_list.append((yr, external_id)) + return yr, idx, external_id + + +new_datas = [] +with open('plouf.csv') as csv_file: + datas = [line for line in unicodecsv.reader(csv_file, + encoding='utf-8')] + for idx, data in enumerate(datas): + if idx < 3: + # headers + data.append('annee') + data.append('identifiant numerique') + data.append('external_id') + new_datas.append(data) + continue + try: + year, idx, external_id = treatment(data) + data.append(year) + data.append(idx) + data.append(external_id) + new_datas.append(data) + except Exception as e: + print("Line {}: {}".format(idx + 1, e)) + +csv = Importer()._get_csv(new_datas, empty=u'') +with open('plouf2.csv', 'w') as fle: + fle.write(csv.encode('utf-8')) |