summaryrefslogtreecommitdiff
path: root/misc/pre_import_sra_files.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2016-01-18 18:41:39 +0100
committerÉtienne Loks <etienne.loks@iggdrasil.net>2016-01-19 00:18:02 +0100
commit494aada27e305658ce60b0815fd992857222682b (patch)
tree17e9fbf51fc054d18ddcd7fabc87e8684daf4eb4 /misc/pre_import_sra_files.py
parentf384337ef0d9d2e40d09204f18c4a486e925132c (diff)
downloadIshtar-494aada27e305658ce60b0815fd992857222682b.tar.bz2
Ishtar-494aada27e305658ce60b0815fd992857222682b.zip
Configure for publications on pypi, descriptions, etc.
Diffstat (limited to 'misc/pre_import_sra_files.py')
-rw-r--r--misc/pre_import_sra_files.py79
1 files changed, 0 insertions, 79 deletions
diff --git a/misc/pre_import_sra_files.py b/misc/pre_import_sra_files.py
deleted file mode 100644
index df00d3ef5..000000000
--- a/misc/pre_import_sra_files.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import unicodecsv
-import datetime
-
-from django.conf import settings
-
-from ishtar_common.data_importer import Importer
-
-
-def get_year(value):
- try:
- for fmt in ['%d/%m/%Y', '%d/%m/%Y']:
- return datetime.datetime.strptime(value, fmt).year
- except:
- pass
-
-index_list = []
-
-
-def treatment(data):
- internal_ref = data[37].strip()
- creation = data[34].strip()
- reception = data[19].strip()
- yr = get_year(creation)
- if not yr:
- yr = get_year(reception)
-
- idx, year = None, None
- if '-' in internal_ref:
- year, y_idx = internal_ref.split('-')
- if len(year) == 4: # 2007-XXXX
- try:
- year = int(year)
- idx = int(y_idx)
- except ValueError:
- pass
- elif '.' in internal_ref:
- year, y_idx = internal_ref.split('.')
- if len(year) == 4: # 2011.XXXX
- try:
- year = int(year)
- idx = int(y_idx)
- except ValueError:
- pass
- if not idx:
- idx = int(internal_ref)
- if year and year != yr:
- yr = year
- assert yr # we should absolutly have a year!
-
- external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx)
- assert (yr, external_id) not in index_list
- index_list.append((yr, external_id))
- return yr, idx, external_id
-
-
-new_datas = []
-with open('plouf.csv') as csv_file:
- datas = [line for line in unicodecsv.reader(csv_file,
- encoding='utf-8')]
- for idx, data in enumerate(datas):
- if idx < 3:
- # headers
- data.append('annee')
- data.append('identifiant numerique')
- data.append('external_id')
- new_datas.append(data)
- continue
- try:
- year, idx, external_id = treatment(data)
- data.append(year)
- data.append(idx)
- data.append(external_id)
- new_datas.append(data)
- except Exception as e:
- print("Line {}: {}".format(idx + 1, e))
-
-csv = Importer()._get_csv(new_datas, empty=u'')
-with open('plouf2.csv', 'w') as fle:
- fle.write(csv.encode('utf-8'))