summaryrefslogtreecommitdiff
path: root/scripts/pre_import_sra_files.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-02-27 20:44:52 +0100
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-06-17 13:21:27 +0200
commit0f3229a119e53054d98c5e878a9581403628cd08 (patch)
treee07ad2e91e2544f7b2d61e8f3f653fdda26bfd52 /scripts/pre_import_sra_files.py
parentcfabd636d0a6d84e8377be3e4d041b4e75ddda9a (diff)
downloadIshtar-0f3229a119e53054d98c5e878a9581403628cd08.tar.bz2
Ishtar-0f3229a119e53054d98c5e878a9581403628cd08.zip
Migrate to python 3 - Clean old migrations and some old scripts
Diffstat (limited to 'scripts/pre_import_sra_files.py')
-rw-r--r--scripts/pre_import_sra_files.py79
1 files changed, 0 insertions, 79 deletions
diff --git a/scripts/pre_import_sra_files.py b/scripts/pre_import_sra_files.py
deleted file mode 100644
index 07e4db1df..000000000
--- a/scripts/pre_import_sra_files.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import unicodecsv
-import datetime
-
-from django.conf import settings
-
-from ishtar_common.data_importer import Importer
-
-
-def get_year(value):
- try:
- for fmt in ['%d/%m/%Y', '%d/%m/%Y']:
- return datetime.datetime.strptime(value, fmt).year
- except:
- pass
-
-index_list = []
-
-
-def treatment(data):
- internal_ref = data[37].strip()
- creation = data[34].strip()
- reception = data[19].strip()
- yr = get_year(creation)
- if not yr:
- yr = get_year(reception)
-
- idx, year = None, None
- if '-' in internal_ref:
- year, y_idx = internal_ref.split('-')
- if len(year) == 4: # 2007-XXXX
- try:
- year = int(year)
- idx = int(y_idx)
- except ValueError:
- pass
- elif '.' in internal_ref:
- year, y_idx = internal_ref.split('.')
- if len(year) == 4: # 2011.XXXX
- try:
- year = int(year)
- idx = int(y_idx)
- except ValueError:
- pass
- if not idx:
- idx = int(internal_ref)
- if year and year != yr:
- yr = year
- assert yr # we should absolutly have a year!
-
- external_id = "SRA{}-{}".format(yr, idx)
- assert (yr, external_id) not in index_list
- index_list.append((yr, external_id))
- return yr, idx, external_id
-
-
-new_datas = []
-with open('plouf.csv') as csv_file:
- datas = [line for line in unicodecsv.reader(csv_file,
- encoding='utf-8')]
- for idx, data in enumerate(datas):
- if idx < 3:
- # headers
- data.append('annee')
- data.append('identifiant numerique')
- data.append('external_id')
- new_datas.append(data)
- continue
- try:
- year, idx, external_id = treatment(data)
- data.append(year)
- data.append(idx)
- data.append(external_id)
- new_datas.append(data)
- except Exception as e:
- print("Line {}: {}".format(idx + 1, e))
-
-csv = Importer()._get_csv(new_datas, empty=u'')
-with open('plouf2.csv', 'w') as fle:
- fle.write(csv.encode('utf-8'))