summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@proxience.com>2015-10-24 03:50:36 +0200
committerÉtienne Loks <etienne.loks@proxience.com>2015-10-24 03:51:14 +0200
commit42116f233489b878a512295adb86b5556e7257f2 (patch)
tree9e64c8d168e0ed4e73e3725b9c06d072d62233ff /misc
parent5b5c1bd7dd865fb530b156212474846db59b2191 (diff)
downloadIshtar-42116f233489b878a512295adb86b5556e7257f2.tar.bz2
Ishtar-42116f233489b878a512295adb86b5556e7257f2.zip
Imports: manage soft import (update) with unicity keys - script to initialize SRA file import
Diffstat (limited to 'misc')
-rw-r--r--misc/pre_import_sra_files.py79
1 files changed, 79 insertions, 0 deletions
diff --git a/misc/pre_import_sra_files.py b/misc/pre_import_sra_files.py
new file mode 100644
index 000000000..df00d3ef5
--- /dev/null
+++ b/misc/pre_import_sra_files.py
@@ -0,0 +1,79 @@
+import unicodecsv
+import datetime
+
+from django.conf import settings
+
+from ishtar_common.data_importer import Importer
+
+
+def get_year(value):
+ try:
+ for fmt in ['%d/%m/%Y', '%d/%m/%Y']:
+ return datetime.datetime.strptime(value, fmt).year
+ except:
+ pass
+
+index_list = []
+
+
+def treatment(data):
+ internal_ref = data[37].strip()
+ creation = data[34].strip()
+ reception = data[19].strip()
+ yr = get_year(creation)
+ if not yr:
+ yr = get_year(reception)
+
+ idx, year = None, None
+ if '-' in internal_ref:
+ year, y_idx = internal_ref.split('-')
+ if len(year) == 4: # 2007-XXXX
+ try:
+ year = int(year)
+ idx = int(y_idx)
+ except ValueError:
+ pass
+ elif '.' in internal_ref:
+ year, y_idx = internal_ref.split('.')
+ if len(year) == 4: # 2011.XXXX
+ try:
+ year = int(year)
+ idx = int(y_idx)
+ except ValueError:
+ pass
+ if not idx:
+ idx = int(internal_ref)
+ if year and year != yr:
+ yr = year
+ assert yr # we should absolutly have a year!
+
+ external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx)
+ assert (yr, external_id) not in index_list
+ index_list.append((yr, external_id))
+ return yr, idx, external_id
+
+
+new_datas = []
+with open('plouf.csv') as csv_file:
+ datas = [line for line in unicodecsv.reader(csv_file,
+ encoding='utf-8')]
+ for idx, data in enumerate(datas):
+ if idx < 3:
+ # headers
+ data.append('annee')
+ data.append('identifiant numerique')
+ data.append('external_id')
+ new_datas.append(data)
+ continue
+ try:
+ year, idx, external_id = treatment(data)
+ data.append(year)
+ data.append(idx)
+ data.append(external_id)
+ new_datas.append(data)
+ except Exception as e:
+ print("Line {}: {}".format(idx + 1, e))
+
+csv = Importer()._get_csv(new_datas, empty=u'')
+with open('plouf2.csv', 'w') as fle:
+ fle.write(csv.encode('utf-8'))