diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-04-03 11:33:17 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-04-03 11:33:17 +0200 |
commit | e8e510620e625994b32065db9ccc4616e4e698fc (patch) | |
tree | f3df0c1e6bec90a300eb2bacde4322aeabc29b32 /scripts | |
parent | b7b609dcf3dad3bab338297d6a32a44f229f3e53 (diff) | |
download | Ishtar-e8e510620e625994b32065db9ccc4616e4e698fc.tar.bz2 Ishtar-e8e510620e625994b32065db9ccc4616e4e698fc.zip |
Script: convert hierarchical CSV to typo CSV for Ishtar
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/typo_csv_to_ishtar_typo_csv.py | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/scripts/typo_csv_to_ishtar_typo_csv.py b/scripts/typo_csv_to_ishtar_typo_csv.py new file mode 100644 index 000000000..6adf4f2db --- /dev/null +++ b/scripts/typo_csv_to_ishtar_typo_csv.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import csv +import sys + +from django.utils.text import slugify + +HELP_TEXT = """"python3 typo_csv_to_ish_typo_csv.py CSV file|test [header number] + +Convert a hierarchical CSV file with one column by level to a CSV file readable +by Ishtar. + +"header number" is the number of line for headers. Default is 1. +""" + +CSV_TEST = """Niveau 1,Niveau 2,Niveau 3 +"Epave (navire, avion, sous-marin, tank)",, +,|_____ Dotation de bord , +,|_____ Cargaison , +Ameublement (intérieurs habitation et/ou épave),, +,,|_____ meuble et accessoires,"___meuble, pied de meuble, coffre" +Ancre et corps-mort,"____ Ancre à jas, jas d'ancre, ancre en pierre", +""" + +EXPECTED_DEST = """id,txt_idx,label,parent,available,order,comment +,epave,"Epave (navire, avion, sous-marin, tank)",,True,10, +,epave-dotation-de-bord,"Dotation de bord",epave,True,20, +,epave-cargaison,"Cargaison",epave,True,30, +,ameublement,"Ameublement (intérieurs habitation et/ou épave)",,True,40, +,ameublement-meuble-et-accessoires,"Meuble et accessoires",ameublement,True,50, +,ameublement-meuble-et-accessoires-meuble,"Meuble",ameublement-meuble-et-accessoires,True,60, +,ameublement-meuble-et-accessoires-pied-de-meuble,"Pied de meuble",ameublement-meuble-et-accessoires,True,70, +,ameublement-meuble-et-accessoires-coffre,"Coffre",ameublement-meuble-et-accessoires,True,80, +,ancre-et-corps-mort,"Ancre et corps-mort",,True,90, +,ancre-et-corps-mort-ancre-a-jas,"Ancre à jas",ancre-et-corps-mort,True,100, +,ancre-et-corps-mort-jas-dancre,"Jas d'ancre",ancre-et-corps-mort,True,110, +,ancre-et-corps-mort-ancre-en-pierre,"Ancre en pierre",ancre-et-corps-mort,True,120, +""" + + +def convert(lines, nb_header, with_header=True): + imported_slug = [] + current_result_by_level = {} + current_idx = 0 + result = [] + if with_header: + result.append("id,txt_idx,label,parent,available,order,comment") + for nb_line, line in enumerate(lines): + if nb_line < nb_header: + continue + current_slug, new_slug = "", "" + has_empty_column = False + for idx, labels in enumerate(line): + if has_empty_column: + break + + if labels.startswith("__"): # marvelous source file... + labels = labels.split(",") + else: + labels = [labels] + for label in labels: + # cleaning + label = label.replace("|", "").replace("_", "").strip() + + if not label: + # no data - try to get the data on the same column from a + # previous line + if idx in current_result_by_level: + current_slug = current_result_by_level[idx] + has_empty_column = False + continue + else: + has_empty_column = True + continue + + new_slug = "" + if current_slug: + new_slug += current_slug + "-" + slug = label[:] + # do not use what is inside parenthesis for slug + if "(" in slug: + slug = slug[:slug.index("(")].strip() + new_slug += slugify(slug) + if new_slug in imported_slug: + current_slug = new_slug + continue + imported_slug.append(new_slug) + current_idx += 1 + result.append( + ',{slug},"{label}",{parent},True,{order},'.format( + slug=new_slug, label=label.capitalize(), + parent=current_slug, order=current_idx * 10) + ) + if new_slug: + current_slug = new_slug + current_result_by_level[idx] = new_slug + return "\n".join(result) + + +def test(): + reader = csv.reader(CSV_TEST.split("\n")) + expected = EXPECTED_DEST.split("\n") + got_error = False + for idx, line in enumerate(convert(reader, 1).split("\n")): + if idx > len(expected): + got_error = True + print("{} - ERROR: nothing expected got: {}".format(idx + 1, line)) + else: + if expected[idx] != line: + got_error = True + print("{} - ERROR: '{}' expected got: '{}'".format( + idx + 1, expected[idx], line)) + if not got_error: + print("Test OK!") + + +if __name__ == '__main__': + if len(sys.argv) < 2: + sys.stdout.write(HELP_TEXT) + else: + CSV_FILE = sys.argv[1] + NB_HEADER = 1 + if len(sys.argv) > 2: + try: + NB_HEADER = int(sys.argv[2]) + except ValueError: + pass + if CSV_FILE == "test": + test() + else: + with open(CSV_FILE) as csv_file: + reader = csv.reader(csv_file) + print(convert(reader, NB_HEADER))
\ No newline at end of file |