diff options
| -rw-r--r-- | scripts/typo_csv_to_ishtar_typo_csv.py | 134 | 
1 files changed, 134 insertions, 0 deletions
diff --git a/scripts/typo_csv_to_ishtar_typo_csv.py b/scripts/typo_csv_to_ishtar_typo_csv.py new file mode 100644 index 000000000..6adf4f2db --- /dev/null +++ b/scripts/typo_csv_to_ishtar_typo_csv.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import csv +import sys + +from django.utils.text import slugify + +HELP_TEXT = """"python3 typo_csv_to_ish_typo_csv.py CSV file|test [header number] + +Convert a hierarchical CSV file with one column by level to a CSV file readable +by Ishtar. + +"header number" is the number of line for headers. Default is 1. +""" + +CSV_TEST = """Niveau 1,Niveau 2,Niveau 3 +"Epave (navire, avion, sous-marin, tank)",, +,|_____  Dotation de bord   , +,|_____  Cargaison   , +Ameublement (intérieurs habitation et/ou épave),, +,,|_____  meuble et accessoires,"___meuble, pied de meuble, coffre" +Ancre et corps-mort,"____ Ancre à jas, jas d'ancre, ancre en pierre", +""" + +EXPECTED_DEST = """id,txt_idx,label,parent,available,order,comment +,epave,"Epave (navire, avion, sous-marin, tank)",,True,10, +,epave-dotation-de-bord,"Dotation de bord",epave,True,20, +,epave-cargaison,"Cargaison",epave,True,30, +,ameublement,"Ameublement (intérieurs habitation et/ou épave)",,True,40, +,ameublement-meuble-et-accessoires,"Meuble et accessoires",ameublement,True,50, +,ameublement-meuble-et-accessoires-meuble,"Meuble",ameublement-meuble-et-accessoires,True,60, +,ameublement-meuble-et-accessoires-pied-de-meuble,"Pied de meuble",ameublement-meuble-et-accessoires,True,70, +,ameublement-meuble-et-accessoires-coffre,"Coffre",ameublement-meuble-et-accessoires,True,80, +,ancre-et-corps-mort,"Ancre et corps-mort",,True,90, +,ancre-et-corps-mort-ancre-a-jas,"Ancre à jas",ancre-et-corps-mort,True,100, +,ancre-et-corps-mort-jas-dancre,"Jas d'ancre",ancre-et-corps-mort,True,110, +,ancre-et-corps-mort-ancre-en-pierre,"Ancre en pierre",ancre-et-corps-mort,True,120, +""" + + +def convert(lines, nb_header, with_header=True): +    imported_slug = [] +    current_result_by_level = {} +    current_idx = 0 +    result = [] +    if with_header: +        result.append("id,txt_idx,label,parent,available,order,comment") +    for nb_line, line in enumerate(lines): +        if nb_line < nb_header: +            continue +        current_slug, new_slug = "", "" +        has_empty_column = False +        for idx, labels in enumerate(line): +            if has_empty_column: +                break + +            if labels.startswith("__"):  # marvelous source file... +                labels = labels.split(",") +            else: +                labels = [labels] +            for label in labels: +                # cleaning +                label = label.replace("|", "").replace("_", "").strip() + +                if not label: +                    # no data - try to get the data on the same column from a +                    # previous line +                    if idx in current_result_by_level: +                        current_slug = current_result_by_level[idx] +                        has_empty_column = False +                        continue +                    else: +                        has_empty_column = True +                        continue + +                new_slug = "" +                if current_slug: +                    new_slug += current_slug + "-" +                slug = label[:] +                # do not use what is inside parenthesis for slug +                if "(" in slug: +                    slug = slug[:slug.index("(")].strip() +                new_slug += slugify(slug) +                if new_slug in imported_slug: +                    current_slug = new_slug +                    continue +                imported_slug.append(new_slug) +                current_idx += 1 +                result.append( +                    ',{slug},"{label}",{parent},True,{order},'.format( +                        slug=new_slug, label=label.capitalize(), +                        parent=current_slug, order=current_idx * 10) +                ) +            if new_slug: +                current_slug = new_slug +                current_result_by_level[idx] = new_slug +    return "\n".join(result) + + +def test(): +    reader = csv.reader(CSV_TEST.split("\n")) +    expected = EXPECTED_DEST.split("\n") +    got_error = False +    for idx, line in enumerate(convert(reader, 1).split("\n")): +        if idx > len(expected): +            got_error = True +            print("{} - ERROR: nothing expected got: {}".format(idx + 1, line)) +        else: +            if expected[idx] != line: +                got_error = True +                print("{} - ERROR: '{}' expected got: '{}'".format( +                    idx + 1, expected[idx], line)) +    if not got_error: +        print("Test OK!") + + +if __name__ == '__main__': +    if len(sys.argv) < 2: +        sys.stdout.write(HELP_TEXT) +    else: +        CSV_FILE = sys.argv[1] +        NB_HEADER = 1 +        if len(sys.argv) > 2: +            try: +                NB_HEADER = int(sys.argv[2]) +            except ValueError: +                pass +        if CSV_FILE == "test": +            test() +        else: +            with open(CSV_FILE) as csv_file: +                reader = csv.reader(csv_file) +                print(convert(reader, NB_HEADER))
\ No newline at end of file  | 
