1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import csv
import sys
from django.utils.text import slugify
HELP_TEXT = """"python3 typo_csv_to_ish_typo_csv.py CSV file|test [header number]
Convert a hierarchical CSV file with one column by level to a CSV file readable
by Ishtar.
"header number" is the number of line for headers. Default is 1.
"""
CSV_TEST = """Niveau 1,Niveau 2,Niveau 3
"Epave (navire, avion, sous-marin, tank)",,
,|_____ Dotation de bord ,
,|_____ Cargaison ,
Ameublement (intérieurs habitation et/ou épave),,
,,|_____ meuble et accessoires,"___meuble, pied de meuble, coffre"
Ancre et corps-mort,"____ Ancre à jas, jas d'ancre, ancre en pierre",
"""
EXPECTED_DEST = """id,txt_idx,label,parent,available,order,comment
,epave,"Epave (navire, avion, sous-marin, tank)",,True,10,
,epave-dotation-de-bord,"Dotation de bord",epave,True,20,
,epave-cargaison,"Cargaison",epave,True,30,
,ameublement,"Ameublement (intérieurs habitation et/ou épave)",,True,40,
,ameublement-meuble-et-accessoires,"Meuble et accessoires",ameublement,True,50,
,ameublement-meuble-et-accessoires-meuble,"Meuble",ameublement-meuble-et-accessoires,True,60,
,ameublement-meuble-et-accessoires-pied-de-meuble,"Pied de meuble",ameublement-meuble-et-accessoires,True,70,
,ameublement-meuble-et-accessoires-coffre,"Coffre",ameublement-meuble-et-accessoires,True,80,
,ancre-et-corps-mort,"Ancre et corps-mort",,True,90,
,ancre-et-corps-mort-ancre-a-jas,"Ancre à jas",ancre-et-corps-mort,True,100,
,ancre-et-corps-mort-jas-dancre,"Jas d'ancre",ancre-et-corps-mort,True,110,
,ancre-et-corps-mort-ancre-en-pierre,"Ancre en pierre",ancre-et-corps-mort,True,120,
"""
def convert(lines, nb_header, with_header=True):
imported_slug = []
current_result_by_level = {}
current_idx = 0
result = []
if with_header:
result.append("id,txt_idx,label,parent,available,order,comment")
for nb_line, line in enumerate(lines):
if nb_line < nb_header:
continue
current_slug, new_slug = "", ""
has_empty_column = False
for idx, labels in enumerate(line):
if has_empty_column:
break
if labels.startswith("__"): # marvelous source file...
labels = labels.split(",")
else:
labels = [labels]
for label in labels:
# cleaning
label = label.replace("|", "").replace("_", "").strip()
if not label:
# no data - try to get the data on the same column from a
# previous line
if idx in current_result_by_level:
current_slug = current_result_by_level[idx]
has_empty_column = False
continue
else:
has_empty_column = True
continue
new_slug = ""
if current_slug:
new_slug += current_slug + "-"
slug = label[:]
# do not use what is inside parenthesis for slug
if "(" in slug:
slug = slug[:slug.index("(")].strip()
new_slug += slugify(slug)
if new_slug in imported_slug:
current_slug = new_slug
continue
imported_slug.append(new_slug)
current_idx += 1
result.append(
',{slug},"{label}",{parent},True,{order},'.format(
slug=new_slug, label=label.capitalize(),
parent=current_slug, order=current_idx * 10)
)
if new_slug:
current_slug = new_slug
current_result_by_level[idx] = new_slug
return "\n".join(result)
def test():
reader = csv.reader(CSV_TEST.split("\n"))
expected = EXPECTED_DEST.split("\n")
got_error = False
for idx, line in enumerate(convert(reader, 1).split("\n")):
if idx > len(expected):
got_error = True
print("{} - ERROR: nothing expected got: {}".format(idx + 1, line))
else:
if expected[idx] != line:
got_error = True
print("{} - ERROR: '{}' expected got: '{}'".format(
idx + 1, expected[idx], line))
if not got_error:
print("Test OK!")
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.stdout.write(HELP_TEXT)
else:
CSV_FILE = sys.argv[1]
NB_HEADER = 1
if len(sys.argv) > 2:
try:
NB_HEADER = int(sys.argv[2])
except ValueError:
pass
if CSV_FILE == "test":
test()
else:
with open(CSV_FILE) as csv_file:
reader = csv.reader(csv_file)
print(convert(reader, NB_HEADER))
|