diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2023-11-22 21:19:37 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2024-02-05 10:51:52 +0100 |
commit | 276f05964f44af908947e57604e904fa2710be0a (patch) | |
tree | 0953bce45016e6363ff60674200d1cb55bbdc9a6 | |
parent | fbc5432ef632082645b3d3961b5332a21f0f1da1 (diff) | |
download | Ishtar-276f05964f44af908947e57604e904fa2710be0a.tar.bz2 Ishtar-276f05964f44af908947e57604e904fa2710be0a.zip |
🐛 import - extract data from xls, ods, etc. with pandas instead of LO
-rw-r--r-- | changelog/en/changelog_2022-06-15.md | 3 | ||||
-rw-r--r-- | changelog/fr/changelog_2023-01-25.md | 5 | ||||
-rw-r--r-- | ishtar_common/forms_common.py | 4 | ||||
-rw-r--r-- | ishtar_common/models_imports.py | 30 | ||||
-rw-r--r-- | ishtar_common/utils.py | 16 | ||||
-rw-r--r-- | requirements.txt | 1 |
6 files changed, 40 insertions, 19 deletions
diff --git a/changelog/en/changelog_2022-06-15.md b/changelog/en/changelog_2022-06-15.md index ecf9474a9..14c43fec3 100644 --- a/changelog/en/changelog_2022-06-15.md +++ b/changelog/en/changelog_2022-06-15.md @@ -3,7 +3,8 @@ v4.0.XX - 2099-12-31 ### Features/improvements ### - pre-import forms -- imports : ability to use natively LibreOffice and Excel files (need Libreoffice extension) +- imports : ability to use natively LibreOffice and Excel files +- imports - group: chain multiple imports - imports : refactoring of permission management - imports: allow to exclude some errors messages - imports: allow errors to be pointed out line by line diff --git a/changelog/fr/changelog_2023-01-25.md b/changelog/fr/changelog_2023-01-25.md index de72163f5..f5e958bed 100644 --- a/changelog/fr/changelog_2023-01-25.md +++ b/changelog/fr/changelog_2023-01-25.md @@ -3,7 +3,8 @@ v4.0.XX - 2099-12-31 ### Fonctionnalités/améliorations ### - ajout de formulaire pré-imports -- imports : possibilité d'utiliser nativement les fichiers tableurs LibreOffice et Excel (nécessite l'extension Libreoffice) +- imports : possibilité d'utiliser nativement les fichiers tableurs LibreOffice et Excel +- imports - groupes : chainage d'imports multiples - imports : refonte de la gestion des droits - imports : possibilité d'ignorer certains messages d'erreur - imports : possibilité de pointer les erreurs ligne par ligne @@ -14,7 +15,7 @@ v4.0.XX - 2099-12-31 - réorganisation des champs - amélioration de la présentation - imports - fiche (liens des autres fiches vers la fiche import) -- imports - correspondance : ne pas proposer « Mettre à NULL » +- imports - correspondance : ne pas proposer « Mettre à NULL » quand le champ est obligatoire ### Technique ### - relation de mise à jour entre imports et les éléments principaux diff --git a/ishtar_common/forms_common.py b/ishtar_common/forms_common.py index 993b03766..effe8b722 100644 --- a/ishtar_common/forms_common.py +++ b/ishtar_common/forms_common.py @@ -394,9 +394,7 @@ class NewImportForm(BaseImportForm): raise forms.ValidationError( _('"Associated images" field must be a valid zip file.') ) - types = [".csv"] - if settings.USE_LIBREOFFICE: - types += [".ods", ".xls", ".xlsx", ".xlsm"] + types = [".csv", ".ods", ".xls", ".xlsx", ".xlsm"] self._clean_imported_file(types=types) archive_required = self._need_archive(data) if archive_required and ( diff --git a/ishtar_common/models_imports.py b/ishtar_common/models_imports.py index 6cb95b0ee..b56f69bb4 100644 --- a/ishtar_common/models_imports.py +++ b/ishtar_common/models_imports.py @@ -19,6 +19,7 @@ import csv import datetime +import pandas import random from pathlib import Path @@ -67,6 +68,7 @@ from ishtar_common.model_managers import SlugModelManager from ishtar_common.utils import ( create_slug, + format_int_float, generate_dict_from_list, get_all_related_m2m_objects_with_model, get_session_var, @@ -2116,7 +2118,7 @@ class Import(BaseImport): return self.imported_file def set_imported_values(self): - if not settings.USE_LIBREOFFICE or not self.imported_file or not UnoCalc: + if not self.imported_file: return name = self.imported_file.name.lower() ext = name.split(".")[-1] @@ -2126,25 +2128,21 @@ class Import(BaseImport): media_root = os.path.abspath(settings.MEDIA_ROOT) if not imported_file_path.startswith(media_root): return - uno = UnoCalc() - - calc = uno.open_calc(imported_file_path) - if not calc: - return try: - sheet = uno.get_sheet(calc, (self.importer_type.tab_number or 1) - 1) + data = pandas.read_excel(imported_file_path, + sheet_name=(self.importer_type.tab_number or 1) - 1) except Exception: return + data = data.dropna(how="all") # drop empty rows + if data.empty: + return col_numbers = [c.col_number for c in self.importer_type.columns.all()] if not col_numbers: return last_column = max(col_numbers) filename = ".".join(imported_file_path.split('.')[:-1]) + f"-{random.randint(1, 10000):05d}.csv" - with open(filename, "w") as result_file: - w = csv.writer(result_file) - w.writerows(data for data in uno.sheet_get_data(sheet, last_column=last_column)) - + data.to_csv(filename, index=False, columns=data.columns[range(last_column)], float_format=format_int_float) name = filename[len(media_root):] if name.startswith(os.sep): name = name[1:] @@ -2734,14 +2732,20 @@ class Import(BaseImport): ImportLineError.objects.get_or_create(import_item=self, line=idx) def save(self, *args, **kwargs): - if self.imported_file: + maj_imported_file = False + if getattr(self, "_maj_imported_file", False): + pass + elif self.imported_file: if self._initial_imported_file != self.imported_file.path or not self.imported_values: - self.set_imported_values() + maj_imported_file = True elif self.imported_values: self.imported_values = None super().save(*args, **kwargs) if not getattr(self, "_no_parse_error_file", False): self.parse_error_file() + if maj_imported_file and self.set_imported_values(): + self._maj_imported_file = True + self.save() def pre_delete_import(sender, **kwargs): diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py index 5cc5d0363..cf56ea705 100644 --- a/ishtar_common/utils.py +++ b/ishtar_common/utils.py @@ -30,6 +30,7 @@ import io from jinja2 import Template import locale import math +import numpy import os import random import re @@ -1223,6 +1224,21 @@ def _post_save_geo(sender, **kwargs): return +def format_int_float(values): + """ + Numpy array: format integer with not "." + """ + new_values = [] + for value in values: + if numpy.isnan(value): + new_values.append("") + elif int(value) == value: + new_values.append(str(int(value))) + else: + new_values.append(value) + return new_values + + def create_slug(model, name, slug_attr="slug", max_length=100): base_slug = slugify(name) slug = base_slug[:max_length] diff --git a/requirements.txt b/requirements.txt index aaba07a88..ca92ba526 100644 --- a/requirements.txt +++ b/requirements.txt @@ -64,4 +64,5 @@ django-extensions==3.0.3 # django-debug-toolbar==3.2.4 +pandas==1.1.5 django-axes==5.4.3 |