From 78ceb1e03c29bffc5382da95c8aa7b73e19f55fd Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Thu, 23 Jan 2025 17:52:45 +0100 Subject: [PATCH 1/6] temp --- api/views/purchaseimport.py | 24 +++++++++++++++--------- common/api/validata.py | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 common/api/validata.py diff --git a/api/views/purchaseimport.py b/api/views/purchaseimport.py index 6534f2b10..48ebd10d1 100644 --- a/api/views/purchaseimport.py +++ b/api/views/purchaseimport.py @@ -16,6 +16,7 @@ from api.permissions import IsAuthenticated from api.serializers import PurchaseSerializer +from common.api.validata import validate_file_against_schema from common.utils import file_import from common.utils.siret import normalise_siret from data.models import Canteen, ImportFailure, ImportType, Purchase @@ -41,8 +42,11 @@ def __init__(self, **kwargs): self.is_duplicate_file = False self.duplicate_purchases = [] self.duplicate_purchase_count = 0 - self.data_schema = json.load(open("data/schemas/imports/achats.json")) - self.expected_header = [field["name"] for field in self.data_schema["fields"]] + self.schema_url = ( + "https://raw.githubusercontent.com/betagouv/ma-cantine/refs/heads/staging/data/schemas/imports/achats.json" + ) + self.schema_json = json.load(open("data/schemas/imports/achats.json")) + self.expected_header = [field["name"] for field in self.schema_json["fields"]] super().__init__(**kwargs) def post(self, request): @@ -60,15 +64,17 @@ def post(self, request): file_import.verify_first_line_is_header(self.file, self.dialect, self.expected_header) with transaction.atomic(): - self._process_file() + res = validate_file_against_schema() + print(res) + # self._process_file() - # If at least an error has been detected, we raise an error to interrupt the - # transaction and rollback the insertion of any data - if self.errors: - raise IntegrityError() + # # If at least an error has been detected, we raise an error to interrupt the + # # transaction and rollback the insertion of any data + # if self.errors: + # raise IntegrityError() - # Update all purchases's import source with file digest - Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.file_digest) + # # Update all purchases's import source with file digest + # Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.dialect) return self._get_success_response() diff --git a/common/api/validata.py b/common/api/validata.py new file mode 100644 index 000000000..f8ea315db --- /dev/null +++ b/common/api/validata.py @@ -0,0 +1,14 @@ +import requests + + +def validate_file_against_schema(self): + # Reset the file pointer to the beginning + self.file.seek(0) + response = requests.post( + "https://api.validata.etalab.studio/validate", + files={ + "file": ("file.csv", self.file.read(), self.file.content_type), + }, + data={"schema": self.schema_url, "header_case": True}, + ) + return response.json()["report"] From ceeba0c4353491671fcad60f60b6d586b0c5cb26 Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Fri, 24 Jan 2025 19:41:46 +0100 Subject: [PATCH 2/6] _process_validata_errors --- api/views/purchaseimport.py | 22 ++++++++++++---------- common/api/validata.py | 21 +++++++++++++++++---- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/api/views/purchaseimport.py b/api/views/purchaseimport.py index 48ebd10d1..2321eed98 100644 --- a/api/views/purchaseimport.py +++ b/api/views/purchaseimport.py @@ -16,7 +16,7 @@ from api.permissions import IsAuthenticated from api.serializers import PurchaseSerializer -from common.api.validata import validate_file_against_schema +from common.api.validata import process_errors, validate_file_against_schema from common.utils import file_import from common.utils.siret import normalise_siret from data.models import Canteen, ImportFailure, ImportType, Purchase @@ -63,18 +63,20 @@ def post(self, request): self.dialect = file_import.get_csv_file_dialect(self.file) file_import.verify_first_line_is_header(self.file, self.dialect, self.expected_header) + report = validate_file_against_schema(self.file, self.schema_url) + self.errors = process_errors(report) + print(self.errors) + with transaction.atomic(): - res = validate_file_against_schema() - print(res) - # self._process_file() + self._process_file() - # # If at least an error has been detected, we raise an error to interrupt the - # # transaction and rollback the insertion of any data - # if self.errors: - # raise IntegrityError() + # If at least an error has been detected, we raise an error to interrupt the + # transaction and rollback the insertion of any data + if self.errors: + raise IntegrityError() - # # Update all purchases's import source with file digest - # Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.dialect) + # Update all purchases's import source with file digest + Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.digest) return self._get_success_response() diff --git a/common/api/validata.py b/common/api/validata.py index f8ea315db..601d6006a 100644 --- a/common/api/validata.py +++ b/common/api/validata.py @@ -1,14 +1,27 @@ import requests -def validate_file_against_schema(self): +def validate_file_against_schema(file, schema_url): # Reset the file pointer to the beginning - self.file.seek(0) + file.seek(0) response = requests.post( "https://api.validata.etalab.studio/validate", files={ - "file": ("file.csv", self.file.read(), self.file.content_type), + "file": ("file.csv", file.read(), file.content_type), }, - data={"schema": self.schema_url, "header_case": True}, + data={"schema": schema_url, "header_case": True}, ) return response.json()["report"] + + +def process_errors(report): + errors = [] + for error in report["tasks"][0]["errors"]: + errors.append( + { + "row": error["rowNumber"], + "status": 400, + "message": f"Champ '{error['fieldName']}' : Valeur « {error['cell']} » : {error['message']}", + } + ) + return errors From 4ac8f5f77f01838fda929d46c0cab288c0f44e9e Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Thu, 30 Jan 2025 12:09:00 +0100 Subject: [PATCH 3/6] =?UTF-8?q?Plus=20besoin=20de=20g=C3=A9rer=20le=20head?= =?UTF-8?q?er?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/views/purchaseimport.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/views/purchaseimport.py b/api/views/purchaseimport.py index 2321eed98..1023eeca7 100644 --- a/api/views/purchaseimport.py +++ b/api/views/purchaseimport.py @@ -54,6 +54,8 @@ def post(self, request): logger.info("Purchase bulk import started") try: self.file = request.data["file"] + + # Step 1: Format validation file_import.validate_file_size(self.file) file_import.validate_file_format(self.file) @@ -63,10 +65,13 @@ def post(self, request): self.dialect = file_import.get_csv_file_dialect(self.file) file_import.verify_first_line_is_header(self.file, self.dialect, self.expected_header) + # Step 2: Schema validation (Validata) report = validate_file_against_schema(self.file, self.schema_url) self.errors = process_errors(report) - print(self.errors) + if len(self.errors): + return self._get_success_response() + # Step 3: ma-cantine validation (permissions, last checks...) + import with transaction.atomic(): self._process_file() From 4a10acf582439729d348fec2e5556a3cd455efae Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Thu, 30 Jan 2025 13:07:10 +0100 Subject: [PATCH 4/6] =?UTF-8?q?Enrichir=20la=20r=C3=A9ponse=20validata?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/tests/test_import_purchases.py | 12 +++++------- api/views/purchaseimport.py | 9 --------- common/api/validata.py | 4 +++- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/api/tests/test_import_purchases.py b/api/tests/test_import_purchases.py index 5243223bb..5226df5ab 100644 --- a/api/tests/test_import_purchases.py +++ b/api/tests/test_import_purchases.py @@ -232,16 +232,14 @@ def test_import_bad_purchases(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(Purchase.objects.count(), 0) errors = response.json()["errors"] - self.assertEqual(errors.pop(0)["message"], "Champ 'siret' : Le siret de la cantine ne peut pas être vide") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") self.assertEqual( errors.pop(0)["message"], "Une cantine avec le siret « 86180597100897 » n'existe pas sur la plateforme." ) self.assertEqual(errors.pop(0)["message"], "Vous n'êtes pas un gestionnaire de cette cantine.") - self.assertEqual( - errors.pop(0)["message"], "Champ 'description du produit' : La description ne peut pas être vide" - ) - self.assertEqual(errors.pop(0)["message"], "Champ 'fournisseur' : Le fournisseur ne peut pas être vide") - self.assertEqual(errors.pop(0)["message"], "Champ 'date' : La date ne peut pas être vide") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") self.assertEqual( errors.pop(0)["message"], "Champ 'date' : Le format de date de la valeur «\xa02022-02-31\xa0» est correct (AAAA-MM-JJ), mais la date n’est pas valide.", @@ -250,7 +248,7 @@ def test_import_bad_purchases(self): errors.pop(0)["message"], "Champ 'date' : Le format de date de la valeur «\xa02022/03/01\xa0» n’est pas valide. Le format correct est AAAA-MM-JJ.", ) - self.assertEqual(errors.pop(0)["message"], "Champ 'prix HT' : Le prix ne peut pas être vide") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") self.assertEqual( errors.pop(0)["message"], "Champ 'prix HT' : La valeur «\xa0A price\xa0» doit être un nombre décimal." ) diff --git a/api/views/purchaseimport.py b/api/views/purchaseimport.py index 1023eeca7..1137a2076 100644 --- a/api/views/purchaseimport.py +++ b/api/views/purchaseimport.py @@ -188,18 +188,9 @@ def _create_purchase_for_canteen(self, siret, row): raise PermissionDenied(detail="Vous n'êtes pas un gestionnaire de cette cantine.") description = row.pop(0) - if description == "": - raise ValidationError({"description": "La description ne peut pas être vide"}) provider = row.pop(0) - if provider == "": - raise ValidationError({"provider": "Le fournisseur ne peut pas être vide"}) date = row.pop(0) - if date == "": - raise ValidationError({"date": "La date ne peut pas être vide"}) - price = row.pop(0).strip().replace(",", ".") - if price == "": - raise ValidationError({"price_ht": "Le prix ne peut pas être vide"}) # We try to round the price. If we can't, we will let Django's field validation # manage the error - hence the `pass` in the exception handler diff --git a/common/api/validata.py b/common/api/validata.py index 601d6006a..b1903d4b8 100644 --- a/common/api/validata.py +++ b/common/api/validata.py @@ -21,7 +21,9 @@ def process_errors(report): { "row": error["rowNumber"], "status": 400, - "message": f"Champ '{error['fieldName']}' : Valeur « {error['cell']} » : {error['message']}", + "column": error["fieldName"], + "cell": error["cell"], + "message": error["message"], } ) return errors From 22c23156f25285844abb78473385ebdb238bb33f Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Mon, 3 Feb 2025 17:07:34 +0100 Subject: [PATCH 5/6] fix --- api/views/purchaseimport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/views/purchaseimport.py b/api/views/purchaseimport.py index 1137a2076..1477e6ea7 100644 --- a/api/views/purchaseimport.py +++ b/api/views/purchaseimport.py @@ -81,7 +81,7 @@ def post(self, request): raise IntegrityError() # Update all purchases's import source with file digest - Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.digest) + Purchase.objects.filter(import_source=self.tmp_id).update(import_source=self.file_digest) return self._get_success_response() From b9210ab44a237e8c83b322526e3b8aebebae04f1 Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Mon, 3 Feb 2025 17:08:03 +0100 Subject: [PATCH 6/6] fix tests --- .../purchases_good_separator_semicolon.csv | 2 +- api/tests/test_import_failures.py | 2 +- api/tests/test_import_purchases.py | 44 +++++++++---------- api/views/purchaseimport.py | 1 + 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/api/tests/files/achats/purchases_good_separator_semicolon.csv b/api/tests/files/achats/purchases_good_separator_semicolon.csv index 3e8c7fb0f..e56f30f77 100644 --- a/api/tests/files/achats/purchases_good_separator_semicolon.csv +++ b/api/tests/files/achats/purchases_good_separator_semicolon.csv @@ -1,2 +1,2 @@ siret;description;fournisseur;date;prix_ht;famille_produits;caracteristiques;definition_local -82399356058716;"Pommes vertes"; Le bon traiteur;2022-05-02; 90.11; PRODUITS_LAITIERS;"BIO, LOCAL"; DEPARTMENT +82399356058716;"Pommes vertes"; Le bon traiteur;2022-05-02;90.11; PRODUITS_LAITIERS;"BIO, LOCAL"; DEPARTMENT diff --git a/api/tests/test_import_failures.py b/api/tests/test_import_failures.py index 03b8c3d4b..d79bacca1 100644 --- a/api/tests/test_import_failures.py +++ b/api/tests/test_import_failures.py @@ -110,7 +110,7 @@ def test_purchase_import_file_too_big(self): self._assertImportFailureCreated(authenticate.user, ImportType.PURCHASE, file_path) @authenticate - def test_purchase_import_erros(self): + def test_purchase_import_errors(self): CanteenFactory.create(siret="82399356058716", managers=[authenticate.user]) CanteenFactory.create(siret="36462492895701") file_path = "./api/tests/files/achats/purchases_bad.csv" diff --git a/api/tests/test_import_purchases.py b/api/tests/test_import_purchases.py index 5226df5ab..cc13cefc6 100644 --- a/api/tests/test_import_purchases.py +++ b/api/tests/test_import_purchases.py @@ -146,7 +146,7 @@ def test_import_purchases_different_separators(self): with open("./api/tests/files/achats/purchases_good_separator_semicolon.csv") as purchase_file: response = self.client.post(reverse("import_purchases"), {"file": purchase_file}) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(Purchase.objects.count(), 2) + self.assertEqual(Purchase.objects.count(), 1 + 1) @authenticate @override_settings(CSV_PURCHASE_CHUNK_LINES=1) @@ -232,41 +232,39 @@ def test_import_bad_purchases(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(Purchase.objects.count(), 0) errors = response.json()["errors"] - self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") - self.assertEqual( - errors.pop(0)["message"], "Une cantine avec le siret « 86180597100897 » n'existe pas sur la plateforme." - ) - self.assertEqual(errors.pop(0)["message"], "Vous n'êtes pas un gestionnaire de cette cantine.") - self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") - self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") - self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée.") + # self.assertEqual( + # errors.pop(0)["message"], "Une cantine avec le siret « 86180597100897 » n'existe pas sur la plateforme." + # ) + # self.assertEqual(errors.pop(0)["message"], "Vous n'êtes pas un gestionnaire de cette cantine.") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée.") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée.") + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée.") self.assertEqual( errors.pop(0)["message"], - "Champ 'date' : Le format de date de la valeur «\xa02022-02-31\xa0» est correct (AAAA-MM-JJ), mais la date n’est pas valide.", + "La date doit être écrite sous la forme `aaaa-mm-jj`.", ) self.assertEqual( errors.pop(0)["message"], - "Champ 'date' : Le format de date de la valeur «\xa02022/03/01\xa0» n’est pas valide. Le format correct est AAAA-MM-JJ.", + "La date doit être écrite sous la forme `aaaa-mm-jj`.", ) - self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée") - self.assertEqual( - errors.pop(0)["message"], "Champ 'prix HT' : La valeur «\xa0A price\xa0» doit être un nombre décimal." + self.assertEqual(errors.pop(0)["message"], "Une valeur doit être renseignée.") + self.assertTrue( + errors.pop(0)["message"].startswith("A price ne respecte pas le motif imposé (expression régulière") ) - self.assertEqual( - errors.pop(0)["message"], - "Champ 'famille de produits' : La valeur «\xa0'NOPE'\xa0» n’est pas un choix valide.", + self.assertTrue( + errors.pop(0)["message"].startswith("NOPE ne respecte pas le motif imposé (expression régulière"), ) - self.assertEqual( - errors.pop(0)["message"], - "Champ 'caractéristiques' : L'élément n°2 du tableau n’est pas valide\xa0: La valeur «\xa0'NOPE'\xa0» n’est pas un choix valide.", + self.assertTrue( + errors.pop(0)["message"].startswith("BIO,NOPE ne respecte pas le motif imposé (expression régulière"), ) self.assertEqual( errors.pop(0)["message"], - "Champ 'définition de local' : La définition de local est obligatoire pour les produits locaux", + 'Row at position "15" has a missing cell in field "caracteristiques" at position "7"', ) self.assertEqual( errors.pop(0)["message"], - "Format fichier : 8 colonnes attendues, 6 trouvées.", + 'Row at position "15" has a missing cell in field "definition_local" at position "8"', ) @authenticate @@ -350,7 +348,7 @@ def test_import_file_many_errors(self): self.assertEqual(Purchase.objects.count(), 0) body = response.json() self.assertEqual(len(body["errors"]), 30) - self.assertEqual(body["errorCount"], 56) + self.assertEqual(body["errorCount"], 48) @authenticate def test_encoding_autodetect_windows1252(self): diff --git a/api/views/purchaseimport.py b/api/views/purchaseimport.py index 1477e6ea7..8233773bb 100644 --- a/api/views/purchaseimport.py +++ b/api/views/purchaseimport.py @@ -69,6 +69,7 @@ def post(self, request): report = validate_file_against_schema(self.file, self.schema_url) self.errors = process_errors(report) if len(self.errors): + self._log_error("Echec lors de la validation du fichier (schema achats.json - Validata)") return self._get_success_response() # Step 3: ma-cantine validation (permissions, last checks...) + import