diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2016-01-18 18:41:39 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2016-01-19 00:18:02 +0100 |
commit | 494aada27e305658ce60b0815fd992857222682b (patch) | |
tree | 17e9fbf51fc054d18ddcd7fabc87e8684daf4eb4 /misc/pre_import_sra_files.py | |
parent | f384337ef0d9d2e40d09204f18c4a486e925132c (diff) | |
download | Ishtar-494aada27e305658ce60b0815fd992857222682b.tar.bz2 Ishtar-494aada27e305658ce60b0815fd992857222682b.zip |
Configure for publications on pypi, descriptions, etc.
Diffstat (limited to 'misc/pre_import_sra_files.py')
-rw-r--r-- | misc/pre_import_sra_files.py | 79 |
1 files changed, 0 insertions, 79 deletions
diff --git a/misc/pre_import_sra_files.py b/misc/pre_import_sra_files.py deleted file mode 100644 index df00d3ef5..000000000 --- a/misc/pre_import_sra_files.py +++ /dev/null @@ -1,79 +0,0 @@ -import unicodecsv -import datetime - -from django.conf import settings - -from ishtar_common.data_importer import Importer - - -def get_year(value): - try: - for fmt in ['%d/%m/%Y', '%d/%m/%Y']: - return datetime.datetime.strptime(value, fmt).year - except: - pass - -index_list = [] - - -def treatment(data): - internal_ref = data[37].strip() - creation = data[34].strip() - reception = data[19].strip() - yr = get_year(creation) - if not yr: - yr = get_year(reception) - - idx, year = None, None - if '-' in internal_ref: - year, y_idx = internal_ref.split('-') - if len(year) == 4: # 2007-XXXX - try: - year = int(year) - idx = int(y_idx) - except ValueError: - pass - elif '.' in internal_ref: - year, y_idx = internal_ref.split('.') - if len(year) == 4: # 2011.XXXX - try: - year = int(year) - idx = int(y_idx) - except ValueError: - pass - if not idx: - idx = int(internal_ref) - if year and year != yr: - yr = year - assert yr # we should absolutly have a year! - - external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx) - assert (yr, external_id) not in index_list - index_list.append((yr, external_id)) - return yr, idx, external_id - - -new_datas = [] -with open('plouf.csv') as csv_file: - datas = [line for line in unicodecsv.reader(csv_file, - encoding='utf-8')] - for idx, data in enumerate(datas): - if idx < 3: - # headers - data.append('annee') - data.append('identifiant numerique') - data.append('external_id') - new_datas.append(data) - continue - try: - year, idx, external_id = treatment(data) - data.append(year) - data.append(idx) - data.append(external_id) - new_datas.append(data) - except Exception as e: - print("Line {}: {}".format(idx + 1, e)) - -csv = Importer()._get_csv(new_datas, empty=u'') -with open('plouf2.csv', 'w') as fle: - fle.write(csv.encode('utf-8')) |