summaryrefslogtreecommitdiff
path: root/scripts/pre_import_sra_files.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2016-01-18 18:41:39 +0100
committerÉtienne Loks <etienne.loks@iggdrasil.net>2016-01-19 00:18:02 +0100
commite9c377136b8d0329f464749ea9671874d9dde17e (patch)
tree17e9fbf51fc054d18ddcd7fabc87e8684daf4eb4 /scripts/pre_import_sra_files.py
parent50124308f8f514e0f0120257a7d6a97523edb160 (diff)
downloadIshtar-e9c377136b8d0329f464749ea9671874d9dde17e.tar.bz2
Ishtar-e9c377136b8d0329f464749ea9671874d9dde17e.zip
Configure for publications on pypi, descriptions, etc.
Diffstat (limited to 'scripts/pre_import_sra_files.py')
-rw-r--r--scripts/pre_import_sra_files.py79
1 files changed, 79 insertions, 0 deletions
diff --git a/scripts/pre_import_sra_files.py b/scripts/pre_import_sra_files.py
new file mode 100644
index 000000000..df00d3ef5
--- /dev/null
+++ b/scripts/pre_import_sra_files.py
@@ -0,0 +1,79 @@
+import unicodecsv
+import datetime
+
+from django.conf import settings
+
+from ishtar_common.data_importer import Importer
+
+
+def get_year(value):
+ try:
+ for fmt in ['%d/%m/%Y', '%d/%m/%Y']:
+ return datetime.datetime.strptime(value, fmt).year
+ except:
+ pass
+
+index_list = []
+
+
+def treatment(data):
+ internal_ref = data[37].strip()
+ creation = data[34].strip()
+ reception = data[19].strip()
+ yr = get_year(creation)
+ if not yr:
+ yr = get_year(reception)
+
+ idx, year = None, None
+ if '-' in internal_ref:
+ year, y_idx = internal_ref.split('-')
+ if len(year) == 4: # 2007-XXXX
+ try:
+ year = int(year)
+ idx = int(y_idx)
+ except ValueError:
+ pass
+ elif '.' in internal_ref:
+ year, y_idx = internal_ref.split('.')
+ if len(year) == 4: # 2011.XXXX
+ try:
+ year = int(year)
+ idx = int(y_idx)
+ except ValueError:
+ pass
+ if not idx:
+ idx = int(internal_ref)
+ if year and year != yr:
+ yr = year
+ assert yr # we should absolutly have a year!
+
+ external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx)
+ assert (yr, external_id) not in index_list
+ index_list.append((yr, external_id))
+ return yr, idx, external_id
+
+
+new_datas = []
+with open('plouf.csv') as csv_file:
+ datas = [line for line in unicodecsv.reader(csv_file,
+ encoding='utf-8')]
+ for idx, data in enumerate(datas):
+ if idx < 3:
+ # headers
+ data.append('annee')
+ data.append('identifiant numerique')
+ data.append('external_id')
+ new_datas.append(data)
+ continue
+ try:
+ year, idx, external_id = treatment(data)
+ data.append(year)
+ data.append(idx)
+ data.append(external_id)
+ new_datas.append(data)
+ except Exception as e:
+ print("Line {}: {}".format(idx + 1, e))
+
+csv = Importer()._get_csv(new_datas, empty=u'')
+with open('plouf2.csv', 'w') as fle:
+ fle.write(csv.encode('utf-8'))