diff options
| author | Étienne Loks <etienne.loks@proxience.com> | 2015-10-24 03:50:36 +0200 | 
|---|---|---|
| committer | Étienne Loks <etienne.loks@proxience.com> | 2015-10-24 03:51:14 +0200 | 
| commit | ab66e177c787f4982790b20079e0d00d78829058 (patch) | |
| tree | 9e64c8d168e0ed4e73e3725b9c06d072d62233ff /misc/pre_import_sra_files.py | |
| parent | 51c50d6f27b70fde3b83b2cdb41ad41f0ac5e972 (diff) | |
| download | Ishtar-ab66e177c787f4982790b20079e0d00d78829058.tar.bz2 Ishtar-ab66e177c787f4982790b20079e0d00d78829058.zip | |
Imports: manage soft import (update) with unicity keys - script to initialize SRA file import
Diffstat (limited to 'misc/pre_import_sra_files.py')
| -rw-r--r-- | misc/pre_import_sra_files.py | 79 | 
1 files changed, 79 insertions, 0 deletions
| diff --git a/misc/pre_import_sra_files.py b/misc/pre_import_sra_files.py new file mode 100644 index 000000000..df00d3ef5 --- /dev/null +++ b/misc/pre_import_sra_files.py @@ -0,0 +1,79 @@ +import unicodecsv +import datetime + +from django.conf import settings + +from ishtar_common.data_importer import Importer + + +def get_year(value): +    try: +        for fmt in ['%d/%m/%Y', '%d/%m/%Y']: +            return datetime.datetime.strptime(value, fmt).year +    except: +        pass + +index_list = [] + + +def treatment(data): +    internal_ref = data[37].strip() +    creation = data[34].strip() +    reception = data[19].strip() +    yr = get_year(creation) +    if not yr: +        yr = get_year(reception) + +    idx, year = None, None +    if '-' in internal_ref: +        year, y_idx = internal_ref.split('-') +        if len(year) == 4:  # 2007-XXXX +            try: +                year = int(year) +                idx = int(y_idx) +            except ValueError: +                pass +    elif '.' in internal_ref: +        year, y_idx = internal_ref.split('.') +        if len(year) == 4:  # 2011.XXXX +            try: +                year = int(year) +                idx = int(y_idx) +            except ValueError: +                pass +    if not idx: +        idx = int(internal_ref) +    if year and year != yr: +        yr = year +    assert yr  # we should absolutly have a year! + +    external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx) +    assert (yr, external_id) not in index_list +    index_list.append((yr, external_id)) +    return yr, idx, external_id + + +new_datas = [] +with open('plouf.csv') as csv_file: +    datas = [line for line in unicodecsv.reader(csv_file, +                                                encoding='utf-8')] +    for idx, data in enumerate(datas): +        if idx < 3: +            # headers +            data.append('annee') +            data.append('identifiant numerique') +            data.append('external_id') +            new_datas.append(data) +            continue +        try: +            year, idx, external_id = treatment(data) +            data.append(year) +            data.append(idx) +            data.append(external_id) +            new_datas.append(data) +        except Exception as e: +            print("Line {}: {}".format(idx + 1, e)) + +csv = Importer()._get_csv(new_datas, empty=u'') +with open('plouf2.csv', 'w') as fle: +    fle.write(csv.encode('utf-8')) | 
