From cd774405e525edcf9e47f90eeaacb8cf6d1d53e2 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Mon, 27 Feb 2023 13:00:44 +0100 Subject: Import: improve bad encoding detection --- CHANGES.md | 1 + ishtar_common/forms_common.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0e383eceb..36813b525 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,6 +16,7 @@ Ishtar changelog - Admin - Global variable: edit in table, add import/export in CSV/JSON - Admin: overload index to add sub-section headers - Geo: create/edit form - new openlayers version - add default IGN tiles +- Import: improve bad encoding detection ### Bug fixes ### - find form: remove TAQ/TPQ check diff --git a/ishtar_common/forms_common.py b/ishtar_common/forms_common.py index 7ad89ed3e..f14799f53 100644 --- a/ishtar_common/forms_common.py +++ b/ishtar_common/forms_common.py @@ -213,6 +213,8 @@ class BaseImportForm(IshtarForm, forms.ModelForm): self.fields["imported_file"].validators = [file_size_validator] self._post_init() + BAD_CHARS = ["é", "³", "ô", "Ã\xa0", "é"] + def _clean_csv(self, is_csv=False): imported_file = self.cleaned_data.get("imported_file", None) encoding = self.cleaned_data.get("encoding", None) @@ -220,15 +222,22 @@ class BaseImportForm(IshtarForm, forms.ModelForm): try: if not imported_file.name.lower().endswith(".csv"): if is_csv: - raise AssertionError() + raise ValueError() else: return imported_file.seek(0) reader = csv.reader(StringIO(imported_file.read().decode(encoding))) - for __ in reader: - break + idx = 0 + for row in reader: + for col in row: + for char in self.BAD_CHARS: + if char in col: + raise ValueError() + idx += 1 + if idx >= 200: + break imported_file.seek(0) - except (AssertionError, UnicodeDecodeError) as e: + except (UnicodeDecodeError, ValueError) as e: raise forms.ValidationError( _("This is not a valid CSV file. Check file format and encoding.") ) -- cgit v1.2.3