summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2018-04-03 11:33:17 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2018-04-03 11:33:17 +0200
commite8e510620e625994b32065db9ccc4616e4e698fc (patch)
treef3df0c1e6bec90a300eb2bacde4322aeabc29b32 /scripts
parentb7b609dcf3dad3bab338297d6a32a44f229f3e53 (diff)
downloadIshtar-e8e510620e625994b32065db9ccc4616e4e698fc.tar.bz2
Ishtar-e8e510620e625994b32065db9ccc4616e4e698fc.zip
Script: convert hierarchical CSV to typo CSV for Ishtar
Diffstat (limited to 'scripts')
-rw-r--r--scripts/typo_csv_to_ishtar_typo_csv.py134
1 files changed, 134 insertions, 0 deletions
diff --git a/scripts/typo_csv_to_ishtar_typo_csv.py b/scripts/typo_csv_to_ishtar_typo_csv.py
new file mode 100644
index 000000000..6adf4f2db
--- /dev/null
+++ b/scripts/typo_csv_to_ishtar_typo_csv.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import csv
+import sys
+
+from django.utils.text import slugify
+
+HELP_TEXT = """"python3 typo_csv_to_ish_typo_csv.py CSV file|test [header number]
+
+Convert a hierarchical CSV file with one column by level to a CSV file readable
+by Ishtar.
+
+"header number" is the number of line for headers. Default is 1.
+"""
+
+CSV_TEST = """Niveau 1,Niveau 2,Niveau 3
+"Epave (navire, avion, sous-marin, tank)",,
+,|_____ Dotation de bord ,
+,|_____ Cargaison ,
+Ameublement (intérieurs habitation et/ou épave),,
+,,|_____ meuble et accessoires,"___meuble, pied de meuble, coffre"
+Ancre et corps-mort,"____ Ancre à jas, jas d'ancre, ancre en pierre",
+"""
+
+EXPECTED_DEST = """id,txt_idx,label,parent,available,order,comment
+,epave,"Epave (navire, avion, sous-marin, tank)",,True,10,
+,epave-dotation-de-bord,"Dotation de bord",epave,True,20,
+,epave-cargaison,"Cargaison",epave,True,30,
+,ameublement,"Ameublement (intérieurs habitation et/ou épave)",,True,40,
+,ameublement-meuble-et-accessoires,"Meuble et accessoires",ameublement,True,50,
+,ameublement-meuble-et-accessoires-meuble,"Meuble",ameublement-meuble-et-accessoires,True,60,
+,ameublement-meuble-et-accessoires-pied-de-meuble,"Pied de meuble",ameublement-meuble-et-accessoires,True,70,
+,ameublement-meuble-et-accessoires-coffre,"Coffre",ameublement-meuble-et-accessoires,True,80,
+,ancre-et-corps-mort,"Ancre et corps-mort",,True,90,
+,ancre-et-corps-mort-ancre-a-jas,"Ancre à jas",ancre-et-corps-mort,True,100,
+,ancre-et-corps-mort-jas-dancre,"Jas d'ancre",ancre-et-corps-mort,True,110,
+,ancre-et-corps-mort-ancre-en-pierre,"Ancre en pierre",ancre-et-corps-mort,True,120,
+"""
+
+
+def convert(lines, nb_header, with_header=True):
+ imported_slug = []
+ current_result_by_level = {}
+ current_idx = 0
+ result = []
+ if with_header:
+ result.append("id,txt_idx,label,parent,available,order,comment")
+ for nb_line, line in enumerate(lines):
+ if nb_line < nb_header:
+ continue
+ current_slug, new_slug = "", ""
+ has_empty_column = False
+ for idx, labels in enumerate(line):
+ if has_empty_column:
+ break
+
+ if labels.startswith("__"): # marvelous source file...
+ labels = labels.split(",")
+ else:
+ labels = [labels]
+ for label in labels:
+ # cleaning
+ label = label.replace("|", "").replace("_", "").strip()
+
+ if not label:
+ # no data - try to get the data on the same column from a
+ # previous line
+ if idx in current_result_by_level:
+ current_slug = current_result_by_level[idx]
+ has_empty_column = False
+ continue
+ else:
+ has_empty_column = True
+ continue
+
+ new_slug = ""
+ if current_slug:
+ new_slug += current_slug + "-"
+ slug = label[:]
+ # do not use what is inside parenthesis for slug
+ if "(" in slug:
+ slug = slug[:slug.index("(")].strip()
+ new_slug += slugify(slug)
+ if new_slug in imported_slug:
+ current_slug = new_slug
+ continue
+ imported_slug.append(new_slug)
+ current_idx += 1
+ result.append(
+ ',{slug},"{label}",{parent},True,{order},'.format(
+ slug=new_slug, label=label.capitalize(),
+ parent=current_slug, order=current_idx * 10)
+ )
+ if new_slug:
+ current_slug = new_slug
+ current_result_by_level[idx] = new_slug
+ return "\n".join(result)
+
+
+def test():
+ reader = csv.reader(CSV_TEST.split("\n"))
+ expected = EXPECTED_DEST.split("\n")
+ got_error = False
+ for idx, line in enumerate(convert(reader, 1).split("\n")):
+ if idx > len(expected):
+ got_error = True
+ print("{} - ERROR: nothing expected got: {}".format(idx + 1, line))
+ else:
+ if expected[idx] != line:
+ got_error = True
+ print("{} - ERROR: '{}' expected got: '{}'".format(
+ idx + 1, expected[idx], line))
+ if not got_error:
+ print("Test OK!")
+
+
+if __name__ == '__main__':
+ if len(sys.argv) < 2:
+ sys.stdout.write(HELP_TEXT)
+ else:
+ CSV_FILE = sys.argv[1]
+ NB_HEADER = 1
+ if len(sys.argv) > 2:
+ try:
+ NB_HEADER = int(sys.argv[2])
+ except ValueError:
+ pass
+ if CSV_FILE == "test":
+ test()
+ else:
+ with open(CSV_FILE) as csv_file:
+ reader = csv.reader(csv_file)
+ print(convert(reader, NB_HEADER)) \ No newline at end of file