From 97cb55902c930d01b811093d0ec85e4069c37930 Mon Sep 17 00:00:00 2001 From: ssandhu <93278293+sammy-sandhu@users.noreply.github.com> Date: Thu, 28 Jul 2022 13:06:23 -0700 Subject: [PATCH 1/6] updated mysql-connector-python (#11) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 09344bd..1f7bd4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ gunicorn>=20.0.4,<20.1.0 martor>=1.5.8,<1.6.0 -mysql-connector-python>= 8.0.28 +mysql-connector-python >= 8.0.28, <=8.0.29 pandas>=1.3.5,<1.4.0 From 08f03e847a32cffa97391ccc9d79d1587039085a Mon Sep 17 00:00:00 2001 From: JDTobin <90791836+JDTobin@users.noreply.github.com> Date: Thu, 7 Sep 2023 11:02:41 -0400 Subject: [PATCH 2/6] Feature/use new xss (#9) * update model to use xss --------- Co-authored-by: KarenAJ --- README.md | 32 ++--- app/README.md | 113 +++++++++++++++++- app/openlxp_xia/admin.py | 10 +- .../management/utils/xis_client.py | 17 ++- .../management/utils/xss_client.py | 58 +++++---- app/openlxp_xia/migrations/0002_use_xss.py | 48 ++++++++ .../0003_xisconfiguration_xis_api_key.py | 19 +++ .../0004_alter_xiaconfiguration_xss_api.py | 18 +++ ...0005_alter_xisconfiguration_xis_api_key.py | 18 +++ app/openlxp_xia/models.py | 43 ++++--- .../tests/test_xia_models_integration.py | 4 +- app/openlxp_xia/tests/test_xia_models_unit.py | 45 ++++++- app/openlxp_xia/tests/test_xia_utils_unit.py | 49 ++++++-- app/setup.cfg | 3 +- 14 files changed, 386 insertions(+), 91 deletions(-) create mode 100644 app/openlxp_xia/migrations/0002_use_xss.py create mode 100644 app/openlxp_xia/migrations/0003_xisconfiguration_xis_api_key.py create mode 100644 app/openlxp_xia/migrations/0004_alter_xiaconfiguration_xss_api.py create mode 100644 app/openlxp_xia/migrations/0005_alter_xisconfiguration_xis_api_key.py diff --git a/README.md b/README.md index 0ef906e..99f1b42 100644 --- a/README.md +++ b/README.md @@ -25,20 +25,6 @@ The OpenLXP-XIA implements five core workflows after extracting metadata from th `Python >=3.7` : Download and install python from here [Python](https://www.python.org/downloads/). -## Environment Variables - -To run this package along with your project, you will need to add the following environment variables to your .env file - - -`BUCKET_NAME` - S3 Bucket name where schema files are stored - -`AWS_ACCESS_KEY_ID` - AWS access keys - -`AWS_SECRET_ACCESS_KEY` - AWS access password - -`AWS_DEFAULT_REGION` - AWS region - - # Installation $ python -m pip install OpenLXP-XIA (use the latest package version) @@ -61,13 +47,12 @@ INSTALLED_APPS = [ `Xis metadata api endpoint`: API endpoint for XIS where metadata will get stored. - Example: + Example: `Xis metadata api endpoint`: http://localhost:8080/api/metadata/ `Xis supplemental api endpoint`: API endpoint for XIS where supplemental metadata will get stored. - Example: - + Example: `Xis supplemental api endpoint`: http://openlxp-xis:8020/api/supplemental-data/ (Note: Replace localhost with the XIS Host) @@ -76,14 +61,17 @@ INSTALLED_APPS = [ 3. `Add xia configuration` : Configure Experience Index Agents(XIA): `Publisher`: Agent Name + + `Xss api`: API endpoint for XSS where schemas will be retrieved from. + + Example: + `Xss api`: https://localhost:8000/api/ - `Source metadata schema`: Schema file name for source metadata validation - - `Source target mapping`: Schema file name for source to target mapping schema file + `Source metadata schema`: Schema iri or name for source metadata validation - `Target metadata schema`: Schema file name for target metadata validation + `Target metadata schema`: Schema iri or name for target metadata validation - (Note: Please make sure to upload schema files in the Experience Schema Server (XSS). In this case, upload schema files into the S3 bucket. ) + (Note: Please make sure to upload schema files in the Experience Schema Server (XSS). ) 4. `Add metadata field overwrite`: Here, we can add new fields and their values or overwrite values for existing fields. diff --git a/app/README.md b/app/README.md index 75498af..86b8507 100644 --- a/app/README.md +++ b/app/README.md @@ -1 +1,112 @@ -# openlxp-xia \ No newline at end of file + +# OPENLXP-XIA +## What is it? +OpenLXP XIA is a Python package that provides the validation functionality of extracted metadata from the source. In addition, it helps transform metadata into target metadata and further load it into indexing services. The OpenLXP XIA package does not include the metadata extraction process because extraction methods can be different for different sources. But once metadata gets extracted from the source, OpenLXP-XIA continues the V-T-V-L (Validate-Transform-Validate-Load) cycle. + +The schema files used for validation can be placed on the schema server. Currently, OpenLXP-XIA uses AWS S3 buckets as a schema server. + +Below are the workflow which are performed by the OpenLXP-XIA after package installation. + + +## Workflows +The OpenLXP-XIA implements five core workflows after extracting metadata from the Specifiec source, as follows: + +1. `Validate`: Compares extracted learning experience metadata against the configured source metadata reference schema stored in the Experience Schema Service (XSS). + +2. `Transform`: Transforms extracted+validated source learning experience metadata to the configured target schema using the "XSR-to-Target" transformation map stored in the Experience Schema Service (XSS) + +3. `Validate`: Compares transformed learning experience metadata against the configured target metadata reference schema stored in the Experience Schema Service (XSS). + +4. `Load`: Pushes transformed and validated learning experience metadata to the target Experience Index Service (XIS) for further processing. + +5. `Log`: Records error, warning, informational, and debug events which can be reviewed and monitored. + +## Prerequisites +`Python >=3.7` : Download and install python from here [Python](https://www.python.org/downloads/). + + +## Installation + + $ python -m pip install OpenLXP-XIA (use the latest package version) + +Add OpenLXP-XIA in the setting.py in your project. + +INSTALLED_APPS = [ + ... + + 'openlxp_xia', + + .... +] + +## Configuration + +1. On the Admin page, log in with the admin credentials + +2. `Add xis configuration`: Configure Experience Index Services (XIS): + + `Xis metadata api endpoint`: API endpoint for XIS where metadata will get stored. + + Example: + `Xis metadata api endpoint`: http://localhost:8080/api/metadata/ + + `Xis supplemental api endpoint`: API endpoint for XIS where supplemental metadata will get stored. + + Example: + `Xis supplemental api endpoint`: http://openlxp-xis:8020/api/supplemental-data/ + + (Note: Replace localhost with the XIS Host) + + +3. `Add xia configuration` : Configure Experience Index Agents(XIA): + + `Publisher`: Agent Name + + `Xss api`: API endpoint for XSS where schemas will be retrieved from. + + Example: + `Xss api`: https://localhost:8000/api/ + + `Source metadata schema`: Schema iri or name for source metadata validation + + `Target metadata schema`: Schema iri or name for target metadata validation + + (Note: Please make sure to upload schema files in the Experience Schema Server (XSS). ) + + +4. `Add metadata field overwrite`: Here, we can add new fields and their values or overwrite values for existing fields. + + `Field name`: Add new or existing field Name + + `Field type`: Add date type of the field + + `Field value`: Add corresponding value + + `Overwrite`: Check the box if existing values need to be overwritten. + +## Running ETL Pipeline: + +ETL or EVTVL (Extract-Transform-Load) Pipeline can be run through two ways: + +1. Through API Endpoint: +To run ETL tasks run below API: + + http://localhost:8000/api/xia-workflow +(Note: Change localhost with XIA host) + +2. Periodically through celery beat: + On the admin page add periodic task and it's schedule. On selected time interval celery task will run. + + +## Logs +To check the running of celery tasks, check the logs of application and celery container. + +## Documentation + +## Troubleshooting + + +## License + + This project uses the [MIT](http://www.apache.org/licenses/LICENSE-2.0) license. + diff --git a/app/openlxp_xia/admin.py b/app/openlxp_xia/admin.py index 29951a1..ec7212a 100644 --- a/app/openlxp_xia/admin.py +++ b/app/openlxp_xia/admin.py @@ -22,13 +22,11 @@ def unmarked_default(MetadataFieldOverwriteAdmin, request, queryset): @admin.register(XIAConfiguration) class XIAConfigurationAdmin(admin.ModelAdmin): list_display = ( - 'publisher', + 'publisher', 'xss_api', 'source_metadata_schema', - 'source_target_mapping', 'target_metadata_schema',) - fields = ['publisher', - 'source_metadata_schema', - ('source_target_mapping', + fields = ['publisher', 'xss_api', + ('source_metadata_schema', 'target_metadata_schema')] def delete_queryset(self, request, queryset): @@ -42,7 +40,7 @@ class XISConfigurationAdmin(admin.ModelAdmin): list_display = ('xis_metadata_api_endpoint', 'xis_supplemental_api_endpoint',) fields = ['xis_metadata_api_endpoint', - 'xis_supplemental_api_endpoint'] + 'xis_supplemental_api_endpoint', 'xis_api_key'] @admin.register(MetadataFieldOverwrite) diff --git a/app/openlxp_xia/management/utils/xis_client.py b/app/openlxp_xia/management/utils/xis_client.py index 9a6298e..b0173b8 100644 --- a/app/openlxp_xia/management/utils/xis_client.py +++ b/app/openlxp_xia/management/utils/xis_client.py @@ -1,6 +1,7 @@ import logging import requests +from requests.auth import AuthBase from openlxp_xia.models import XISConfiguration @@ -32,7 +33,8 @@ def posting_metadata_ledger_to_xis(renamed_data): headers = {'Content-Type': 'application/json'} xis_response = requests.post(url=get_xis_metadata_api_endpoint(), - data=renamed_data, headers=headers) + data=renamed_data, headers=headers, + auth=TokenAuth()) return xis_response @@ -43,5 +45,16 @@ def posting_supplemental_metadata_to_xis(renamed_data): xis_response = requests.post( url=get_xis_supplemental_metadata_api_endpoint(), data=renamed_data, - headers=headers) + headers=headers, auth=TokenAuth()) return xis_response + + +class TokenAuth(AuthBase): + """Attaches HTTP Authentication Header to the given Request object.""" + + def __call__(self, r, token_name='token'): + # modify and return the request + + r.headers['Authorization'] = token_name + ' ' + \ + XISConfiguration.objects.first().xis_api_key + return r diff --git a/app/openlxp_xia/management/utils/xss_client.py b/app/openlxp_xia/management/utils/xss_client.py index 845fb32..1580183 100644 --- a/app/openlxp_xia/management/utils/xss_client.py +++ b/app/openlxp_xia/management/utils/xss_client.py @@ -1,8 +1,6 @@ -import json import logging -import os -import boto3 +import requests from openlxp_xia.management.utils.xia_internal import dict_flatten from openlxp_xia.models import XIAConfiguration @@ -10,21 +8,35 @@ logger = logging.getLogger('dict_config_logger') -def get_aws_bucket_name(): - """function returns the source bucket name""" - bucket = os.environ.get('BUCKET_NAME') - return bucket - - -def read_json_data(file_name): - """Setting file path for json files and ingesting as dictionary values """ - s3 = boto3.resource('s3') - bucket_name = get_aws_bucket_name() - # Read json file and store as a dictionary for processing - json_path = s3.Object(bucket_name, file_name) - json_content = json_path.get()['Body'].read().decode('utf-8') - data_dict = json.loads(json_content) - return data_dict +def xss_get(): + """Function to get xss configuration value""" + conf = XIAConfiguration.objects.first() + return conf.xss_api + + +def read_json_data(source_schema_ref, target_schema_ref=None): + """get schema from xss and ingest as dictionary values""" + xss_host = xss_get() + request_path = xss_host + if(target_schema_ref is not None): + if(target_schema_ref.startswith('xss:')): + request_path += 'mappings/?targetIRI=' + target_schema_ref + else: + request_path += 'mappings/?targetName=' + target_schema_ref + if(source_schema_ref.startswith('xss:')): + request_path += '&sourceIRI=' + source_schema_ref + else: + request_path += '&sourceName=' + source_schema_ref + schema = requests.get(request_path, verify=True) + json_content = schema.json()['schema_mapping'] + else: + if(source_schema_ref.startswith('xss:')): + request_path += 'schemas/?iri=' + source_schema_ref + else: + request_path += 'schemas/?name=' + source_schema_ref + schema = requests.get(request_path, verify=True) + json_content = schema.json()['schema'] + return json_content def get_source_validation_schema(): @@ -108,10 +120,12 @@ def get_target_metadata_for_transformation(): """Retrieve target metadata schema from XIA configuration """ logger.info("Configuration of schemas and files for transformation") xia_data = XIAConfiguration.objects.first() - target_metadata_schema = xia_data.source_target_mapping - if not target_metadata_schema: - logger.warning("Target metadata schema field name is empty!") + target_metadata_schema = xia_data.target_metadata_schema + source_metadata_schema = xia_data.source_metadata_schema + if not target_metadata_schema or not source_metadata_schema: + logger.warning("Metadata schema field name is empty!") logger.info("Reading schema for transformation") # Read source transformation schema as dictionary - target_mapping_dict = read_json_data(target_metadata_schema) + target_mapping_dict = read_json_data( + source_metadata_schema, target_metadata_schema) return target_mapping_dict diff --git a/app/openlxp_xia/migrations/0002_use_xss.py b/app/openlxp_xia/migrations/0002_use_xss.py new file mode 100644 index 0000000..62d1de2 --- /dev/null +++ b/app/openlxp_xia/migrations/0002_use_xss.py @@ -0,0 +1,48 @@ +# Generated by Django 3.2.13 on 2022-06-28 13:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openlxp_xia', '0001_initial'), + ] + + operations = [ + migrations.RemoveField( + model_name='xiaconfiguration', + name='source_target_mapping', + ), + migrations.AddField( + model_name='xiaconfiguration', + name='xss_api', + field=models.URLField(default='https://localhost', help_text='Enter the XSS API'), + preserve_default=False, + ), + migrations.AlterField( + model_name='metadatafieldoverwrite', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='xiaconfiguration', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='xiaconfiguration', + name='source_metadata_schema', + field=models.CharField(help_text='Enter the schema name/IRI', max_length=200), + ), + migrations.AlterField( + model_name='xiaconfiguration', + name='target_metadata_schema', + field=models.CharField(help_text='Enter the target schema name/IRI to validate from.', max_length=200), + ), + migrations.AlterField( + model_name='xisconfiguration', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + ] diff --git a/app/openlxp_xia/migrations/0003_xisconfiguration_xis_api_key.py b/app/openlxp_xia/migrations/0003_xisconfiguration_xis_api_key.py new file mode 100644 index 0000000..46673e7 --- /dev/null +++ b/app/openlxp_xia/migrations/0003_xisconfiguration_xis_api_key.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.13 on 2023-05-17 13:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openlxp_xia', '0002_use_xss'), + ] + + operations = [ + migrations.AddField( + model_name='xisconfiguration', + name='xis_api_key', + field=models.CharField(default='INVALID KEY', help_text='Enter the XIS API Key', max_length=40), + preserve_default=False, + ), + ] diff --git a/app/openlxp_xia/migrations/0004_alter_xiaconfiguration_xss_api.py b/app/openlxp_xia/migrations/0004_alter_xiaconfiguration_xss_api.py new file mode 100644 index 0000000..3f42541 --- /dev/null +++ b/app/openlxp_xia/migrations/0004_alter_xiaconfiguration_xss_api.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.13 on 2023-05-17 20:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openlxp_xia', '0003_xisconfiguration_xis_api_key'), + ] + + operations = [ + migrations.AlterField( + model_name='xiaconfiguration', + name='xss_api', + field=models.CharField(help_text='Enter the XSS API', max_length=200), + ), + ] diff --git a/app/openlxp_xia/migrations/0005_alter_xisconfiguration_xis_api_key.py b/app/openlxp_xia/migrations/0005_alter_xisconfiguration_xis_api_key.py new file mode 100644 index 0000000..ba50e60 --- /dev/null +++ b/app/openlxp_xia/migrations/0005_alter_xisconfiguration_xis_api_key.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.13 on 2023-07-14 16:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openlxp_xia', '0004_alter_xiaconfiguration_xss_api'), + ] + + operations = [ + migrations.AlterField( + model_name='xisconfiguration', + name='xis_api_key', + field=models.CharField(help_text='Enter the XIS API Key', max_length=128), + ), + ] diff --git a/app/openlxp_xia/models.py b/app/openlxp_xia/models.py index e3bb0d5..96ee921 100644 --- a/app/openlxp_xia/models.py +++ b/app/openlxp_xia/models.py @@ -1,9 +1,7 @@ -import json import logging -import os import uuid -import boto3 +import requests from django.db import models from django.forms import ValidationError from django.urls import reverse @@ -16,16 +14,13 @@ class XIAConfiguration(TimeStampedModel): """Model for XIA Configuration """ publisher = models.CharField(max_length=200, help_text='Enter the publisher name') + xss_api = models.CharField(help_text='Enter the XSS API', max_length=200) source_metadata_schema = models.CharField(max_length=200, help_text='Enter the ' - 'schema file') - source_target_mapping = models.CharField(max_length=200, - help_text='Enter the schema ' - 'file to map ' - 'target.') + 'schema name/IRI') target_metadata_schema = models.CharField(max_length=200, help_text='Enter the target ' - 'schema file to ' + 'schema name/IRI to ' 'validate from.') source_file = models.FileField(help_text='Upload the source ' 'file') @@ -42,17 +37,26 @@ def field_overwrite(self): # Deleting the corresponding existing value to overwrite MetadataFieldOverwrite.objects.all().delete() # get required columns list from schema files - s3 = boto3.resource('s3') - bucket_name = os.environ.get('BUCKET_NAME') + conf = self.xss_api # Read json file and store as a dictionary for processing - mapping_path = s3.Object(bucket_name, self.source_target_mapping) - mapping_content = mapping_path.get()['Body'].read().decode('utf-8') - mapping = json.loads(mapping_content) + request_path = conf + if (self.target_metadata_schema.startswith('xss:')): + request_path += 'schemas/?iri=' + self.target_metadata_schema + conf += 'mappings/?targetIRI=' + self.target_metadata_schema + else: + request_path += 'schemas/?name=' + self.target_metadata_schema + conf += 'mappings/?targetName=' + self.target_metadata_schema + schema = requests.get(request_path, verify=True) + target = schema.json()['schema'] # Read json file and store as a dictionary for processing - target_path = s3.Object(bucket_name, self.target_metadata_schema) - target_content = target_path.get()['Body'].read().decode('utf-8') - target = json.loads(target_content) + request_path = conf + if (self.source_metadata_schema.startswith('xss:')): + request_path += '&sourceIRI=' + self.source_metadata_schema + else: + request_path += '&sourceName=' + self.source_metadata_schema + schema = requests.get(request_path, verify=True) + mapping = schema.json()['schema_mapping'] # saving required column values to be overwritten for section in target: @@ -111,6 +115,11 @@ class XISConfiguration(TimeStampedModel): max_length=200 ) + xis_api_key = models.CharField( + help_text="Enter the XIS API Key", + max_length=128 + ) + def save(self, *args, **kwargs): if not self.pk and XISConfiguration.objects.exists(): raise ValidationError('There can be only one XISConfiguration ' diff --git a/app/openlxp_xia/tests/test_xia_models_integration.py b/app/openlxp_xia/tests/test_xia_models_integration.py index e54363d..640dda2 100644 --- a/app/openlxp_xia/tests/test_xia_models_integration.py +++ b/app/openlxp_xia/tests/test_xia_models_integration.py @@ -17,12 +17,12 @@ def test_create_two_xia_configuration(self): xiaConfig = XIAConfiguration( publisher="XYZ", source_metadata_schema="XYZ_source_validate_schema.json", - source_target_mapping="XYZ_target_metadata_schema.json", + xss_api="https://localhost", target_metadata_schema="p2881_schema.json") xiaConfig2 = XIAConfiguration( publisher="ABC", source_metadata_schema="ABC_source_validate_schema.json", - source_target_mapping="ABC_target_metadata_schema.json", + xss_api="https://localhost", target_metadata_schema="p2881_schema.json") xiaConfig.save() xiaConfig2.save() diff --git a/app/openlxp_xia/tests/test_xia_models_unit.py b/app/openlxp_xia/tests/test_xia_models_unit.py index 272393a..54cef18 100644 --- a/app/openlxp_xia/tests/test_xia_models_unit.py +++ b/app/openlxp_xia/tests/test_xia_models_unit.py @@ -16,18 +16,17 @@ def test_create_xia_configuration(self): """Test that creating a new XIA Configuration entry is successful with defaults """ source_metadata_schema = 'test_file.json' - source_target_mapping = 'test_file.json' + xss_api = 'https://localhost' target_metadata_schema = 'test_file.json' xiaConfig = XIAConfiguration( source_metadata_schema=source_metadata_schema, - source_target_mapping=source_target_mapping, + xss_api=xss_api, target_metadata_schema=target_metadata_schema) self.assertEqual(xiaConfig.source_metadata_schema, source_metadata_schema) - self.assertEqual(xiaConfig.source_target_mapping, - source_target_mapping) + self.assertEqual(xiaConfig.xss_api, xss_api) self.assertEqual(xiaConfig.target_metadata_schema, target_metadata_schema) @@ -38,15 +37,32 @@ def test_create_two_xia_configuration(self): with self.assertRaises(ValidationError): xiaConfig = \ XIAConfiguration(source_metadata_schema="example1.json", - source_target_mapping="example1.json", + xss_api="https://localhost", target_metadata_schema="example1.json") xiaConfig2 = \ XIAConfiguration(source_metadata_schema="example2.json", - source_target_mapping="example2.json", + xss_api="https://localhost", target_metadata_schema="example2.json") xiaConfig.save() xiaConfig2.save() + def test_xia_field_overwrite(self): + """Test that field_overwrite in an XIA Configuration generates + MetadataFieldOverwrite objects """ + with patch("openlxp_xia.models.requests") as mock: + target_schema = {"schema": { + "start": {"test": {"use": "Required"}}}} + transform_schema = {"schema_mapping": { + "start": {"test": "start.test"}}} + mock.get.return_value = mock + mock.json.side_effect = [target_schema, transform_schema] + xiaConfig = \ + XIAConfiguration(source_metadata_schema="example1.json", + xss_api="https://localhost", + target_metadata_schema="example1.json") + xiaConfig.save() + self.assertEqual(MetadataFieldOverwrite.objects.count(), 1) + def test_create_xis_configuration(self): """Test that creating a new XIS Configuration entry is successful with defaults """ @@ -62,6 +78,23 @@ def test_create_xis_configuration(self): self.assertEqual(xisConfig.xis_supplemental_api_endpoint, xis_supplemental_api_endpoint) + def test_create_two_xis_configuration(self): + """Test that creating trying to create more than one XIS Configuration + throws a ValidationError """ + xis_metadata_api_endpoint = 'http://localhost:8000/api/metadata/' + xis_supplemental_api_endpoint = 'http://localhost:8000/api/supplement/' + + xisConfig = XISConfiguration( + xis_metadata_api_endpoint=xis_metadata_api_endpoint, + xis_supplemental_api_endpoint=xis_supplemental_api_endpoint) + xisConfig2 = XISConfiguration( + xis_metadata_api_endpoint=xis_metadata_api_endpoint, + xis_supplemental_api_endpoint=xis_supplemental_api_endpoint) + + with self.assertRaises(ValidationError): + xisConfig.save() + xisConfig2.save() + def test_metadata_ledger(self): """Test for a new Metadata_Ledger entry is successful with defaults""" metadata_record_inactivate_date = timezone.now() diff --git a/app/openlxp_xia/tests/test_xia_utils_unit.py b/app/openlxp_xia/tests/test_xia_utils_unit.py index bb1c883..fc49e53 100644 --- a/app/openlxp_xia/tests/test_xia_utils_unit.py +++ b/app/openlxp_xia/tests/test_xia_utils_unit.py @@ -13,9 +13,9 @@ from openlxp_xia.management.utils.xis_client import ( get_xis_metadata_api_endpoint, get_xis_supplemental_metadata_api_endpoint) from openlxp_xia.management.utils.xss_client import ( - get_aws_bucket_name, get_data_types_for_validation, - get_required_fields_for_validation, get_source_validation_schema, - get_target_metadata_for_transformation, get_target_validation_schema) + get_data_types_for_validation, get_required_fields_for_validation, + get_source_validation_schema, get_target_metadata_for_transformation, + get_target_validation_schema, read_json_data, xss_get) from openlxp_xia.models import XIAConfiguration, XISConfiguration from .test_setup import TestSetUp @@ -410,19 +410,12 @@ def test_get_xis_supplemental_metadata_api_endpoint(self): # Test cases for XSS_CLIENT - def test_get_aws_bucket_name(self): - """Test the function which returns the source bucket name""" - result_bucket = get_aws_bucket_name() - self.assertTrue(result_bucket) - def test_get_source_validation_schema(self): """Test to retrieve source_metadata_schema from XIA configuration""" with patch('openlxp_xia.management.utils.xss_client' '.XIAConfiguration.objects') as xdsCfg, \ patch('openlxp_xia.management.utils.xss_client' - '.read_json_data') as read_obj, \ - patch('openlxp_xia.management.utils.xss_client' - '.get_aws_bucket_name', return_value="eccschema"): + '.read_json_data') as read_obj: xiaConfig = XIAConfiguration( source_metadata_schema='AGENT_source_validate_schema.json') xdsCfg.return_value = xiaConfig @@ -470,7 +463,10 @@ def test_get_target_metadata_for_transformation(self): patch('openlxp_xia.management.utils.xss_client' '.read_json_data') as read_obj: xiaConfig = XIAConfiguration( - source_target_mapping='AGENT_p2881_target_metadata_schema.json' + target_metadata_schema='AGENT_p2881_target_metadata_schema' + + '.json', + source_metadata_schema='AGENT_p2881_target_metadata_schema' + + '.json' ) xia_config_obj.return_value = xiaConfig read_obj.return_value = read_obj @@ -478,3 +474,32 @@ def test_get_target_metadata_for_transformation(self): return_from_function = get_target_metadata_for_transformation() self.assertEqual(read_obj.return_value, return_from_function) + + def test_xss_get(self): + """Test for retrieving XSS api root """ + with patch('openlxp_xia.management.utils.xss_client' + '.XIAConfiguration.objects') as xia_config_obj: + xss_api = "http://test_xss_api" + xiaConfig = XIAConfiguration( + target_metadata_schema='AGENT_p2881_target_metadata_schema' + + '.json', + source_metadata_schema='AGENT_p2881_target_metadata_schema' + + '.json', + xss_api=xss_api + ) + xia_config_obj.first.return_value = xiaConfig + + self.assertEqual(xss_get(), xss_api) + + def test_read_json_data(self): + """Test for retrieving XSS json schemas """ + with patch('openlxp_xia.management.utils.xss_client.xss_get') as \ + xss_host, patch('openlxp_xia.management.utils.xss_client.' + 'requests') as req: + xss_api = "http://test_xss_api" + schema = {"schema": {"test": "val"}} + xss_host.return_value = xss_api + req.get.return_value = req + req.json.return_value = schema + + self.assertEqual(read_json_data(""), schema['schema']) diff --git a/app/setup.cfg b/app/setup.cfg index 153348b..e22605f 100644 --- a/app/setup.cfg +++ b/app/setup.cfg @@ -1,9 +1,10 @@ # setup.cfg [metadata] name = openlxp-xia -version = 1.2.12 +version = 1.4.3 description = Sample installable XIA long_description = file:README.md +long_description_content_type = text/markdown url = https://github.com/OpenLXP/openlxp-xia/ author = OpenLXP author_email = openlxphost@gmail.com From 763c13b50fd0be1cbb087ee4b86da7d42392f7ac Mon Sep 17 00:00:00 2001 From: KarenAJ Date: Fri, 13 Oct 2023 11:41:04 -0500 Subject: [PATCH 3/6] Sd updates (#12) * Sd updaes * fixed variables & added checks to supplemental data --- .../management/utils/model_help.py | 45 +++++++++++++++++++ .../migrations/0006_auto_20230907_1642.py | 24 ++++++++++ app/openlxp_xia/models.py | 38 +++++++++++++++- app/setup.cfg | 2 +- requirements.txt | 6 ++- 5 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 app/openlxp_xia/management/utils/model_help.py create mode 100644 app/openlxp_xia/migrations/0006_auto_20230907_1642.py diff --git a/app/openlxp_xia/management/utils/model_help.py b/app/openlxp_xia/management/utils/model_help.py new file mode 100644 index 0000000..e25b9b1 --- /dev/null +++ b/app/openlxp_xia/management/utils/model_help.py @@ -0,0 +1,45 @@ +import bleach +import logging +from confusable_homoglyphs import categories, confusables + +logger = logging.getLogger('dict_config_logger') + + +def bleach_data_to_json(rdata): + """Recursive function to bleach/clean HTML tags from string + data and return dictionary data. + + :param rdata: dictionary to clean. + WARNING rdata will be edited + :return: dict""" + + # iterate over dict + for key in rdata: + # if string, clean + if isinstance(rdata[key], str): + rdata[key] = bleach.clean(rdata[key], tags={}, strip=True) + # if dict, enter dict + if isinstance(rdata[key], dict): + rdata[key] = bleach_data_to_json(rdata[key]) + + return rdata + + +def confusable_homoglyphs_check(data): + """Checks for dangerous homoglyphs.""" + + data_is_safe = True + for key in data: + + # if string, Check homoglyph + if isinstance(data[key], str) and bool(confusables. + is_dangerous(data[key])): + data_is_safe = False + logger.info("Homoglyphs does not have the expected prefered alias") + logger.error(categories.unique_aliases(data[key])) + # if dict, enter dict + if isinstance(data[key], dict): + ret_val = confusable_homoglyphs_check(data[key]) + if not ret_val: + data_is_safe = False + return data_is_safe diff --git a/app/openlxp_xia/migrations/0006_auto_20230907_1642.py b/app/openlxp_xia/migrations/0006_auto_20230907_1642.py new file mode 100644 index 0000000..0a29486 --- /dev/null +++ b/app/openlxp_xia/migrations/0006_auto_20230907_1642.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.18 on 2023-09-07 16:42 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openlxp_xia', '0005_alter_xisconfiguration_xis_api_key'), + ] + + operations = [ + migrations.AlterField( + model_name='metadataledger', + name='source_metadata', + field=models.JSONField(blank=True, validators=[django.core.validators.RegexValidator(message='The Wrong Format Entered', regex='(?!(\\A( \\x09\\x0A\\x0D\\x20-\\x7E # ASCII | \\xC2-\\xDF # non-overlong 2-byte | \\xE0\\xA0-\\xBF # excluding overlongs | \\xE1-\\xEC\\xEE\\xEF{2} # straight 3-byte | \\xED\\x80-\\x9F # excluding surrogates | \\xF0\\x90-\\xBF{2} # planes 1-3 | \\xF1-\\xF3{3} # planes 4-15 | \\xF4\\x80-\\x8F{2} # plane 16 )*\\Z))')]), + ), + migrations.AlterField( + model_name='supplementalledger', + name='supplemental_metadata', + field=models.JSONField(blank=True, validators=[django.core.validators.RegexValidator(message='The Wrong Format Entered', regex='(?!(\\A( \\x09\\x0A\\x0D\\x20-\\x7E # ASCII | \\xC2-\\xDF # non-overlong 2-byte | \\xE0\\xA0-\\xBF # excluding overlongs | \\xE1-\\xEC\\xEE\\xEF{2} # straight 3-byte | \\xED\\x80-\\x9F # excluding surrogates | \\xF0\\x90-\\xBF{2} # planes 1-3 | \\xF1-\\xF3{3} # planes 4-15 | \\xF4\\x80-\\x8F{2} # plane 16 )*\\Z))')]), + ), + ] diff --git a/app/openlxp_xia/models.py b/app/openlxp_xia/models.py index 96ee921..9008577 100644 --- a/app/openlxp_xia/models.py +++ b/app/openlxp_xia/models.py @@ -4,12 +4,25 @@ import requests from django.db import models from django.forms import ValidationError +from django.core.validators import RegexValidator from django.urls import reverse +from openlxp_xia.management.utils.model_help import confusable_homoglyphs_check +from openlxp_xia.management.utils.model_help import bleach_data_to_json from model_utils.models import TimeStampedModel logger = logging.getLogger('dict_config_logger') +rcheck = (r'(?!(\A( \x09\x0A\x0D\x20-\x7E # ASCII ' + r'| \xC2-\xDF # non-overlong 2-byte ' + r'| \xE0\xA0-\xBF # excluding overlongs ' + r'| \xE1-\xEC\xEE\xEF{2} # straight 3-byte ' + r'| \xED\x80-\x9F # excluding surrogates ' + r'| \xF0\x90-\xBF{2} # planes 1-3 ' + r'| \xF1-\xF3{3} # planes 4-15 ' + r'| \xF4\x80-\x8F{2} # plane 16 )*\Z))') + + class XIAConfiguration(TimeStampedModel): """Model for XIA Configuration """ publisher = models.CharField(max_length=200, @@ -141,7 +154,12 @@ class MetadataLedger(TimeStampedModel): default=uuid.uuid4, editable=False) record_lifecycle_status = models.CharField( max_length=10, blank=True, choices=RECORD_ACTIVATION_STATUS_CHOICES) - source_metadata = models.JSONField(blank=True) + source_metadata = models.JSONField(blank=True, + validators=[RegexValidator(regex=rcheck, + message="The" + " Wrong " + "Format " + "Entered")]) source_metadata_extraction_date = models.DateTimeField(auto_now_add=True) source_metadata_hash = models.CharField(max_length=200) source_metadata_key = models.TextField() @@ -168,6 +186,11 @@ class MetadataLedger(TimeStampedModel): target_metadata_validation_status = models.CharField( max_length=10, blank=True, choices=METADATA_VALIDATION_CHOICES) + def clean(self): + source_data = self.source_metadata + data_checked = confusable_homoglyphs_check(source_data) + self.source_metadata = bleach_data_to_json(data_checked) + class SupplementalLedger(TimeStampedModel): """Model for Supplemental Metadata """ @@ -182,7 +205,13 @@ class SupplementalLedger(TimeStampedModel): default=uuid.uuid4, editable=False) record_lifecycle_status = models.CharField( max_length=10, blank=True, choices=RECORD_ACTIVATION_STATUS_CHOICES) - supplemental_metadata = models.JSONField(blank=True) + supplemental_metadata = models.JSONField(blank=True, + validators=[RegexValidator + (regex=rcheck, + message="The" + " Wrong " + "Format " + "Entered")]) supplemental_metadata_extraction_date = models.DateTimeField( auto_now_add=True) supplemental_metadata_hash = models.CharField(max_length=200) @@ -200,6 +229,11 @@ class SupplementalLedger(TimeStampedModel): supplemental_metadata_transmission_status_code = models.IntegerField( blank=True, null=True) + def clean(self): + supplemental_data = self.supplemental_metadata + data_checked = confusable_homoglyphs_check(supplemental_data) + self.supplemental_metadata = bleach_data_to_json(data_checked) + class MetadataFieldOverwrite(TimeStampedModel): """Model for taking list of fields name and it's values for overwriting diff --git a/app/setup.cfg b/app/setup.cfg index e22605f..3e229cd 100644 --- a/app/setup.cfg +++ b/app/setup.cfg @@ -1,7 +1,7 @@ # setup.cfg [metadata] name = openlxp-xia -version = 1.4.3 +version = 1.4.5 description = Sample installable XIA long_description = file:README.md long_description_content_type = text/markdown diff --git a/requirements.txt b/requirements.txt index 1f7bd4c..df94508 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,9 @@ +bleach==6.0.0 + boto3~=1.16.54 +confusable_homoglyphs==3.2.0 + coverage>=5.5,<6.0 ddt~=1.4.2 , <1.5.0 @@ -30,7 +34,7 @@ Pillow >=9.0.0 python-dateutil~=2.8.2 -python-dotenv>=0.19.0,<1.20.0 +python-dotenv==1.0.0 requests~=2.27.1 From 73dadfacf022d12aa79d913a0f3c2edb46047120 Mon Sep 17 00:00:00 2001 From: KarenAJ Date: Mon, 16 Oct 2023 10:20:29 -0500 Subject: [PATCH 4/6] updated mysql-connector-python (#13) Co-authored-by: ssandhu From a6f93e861d686b09f92e82e17c111df3a978923f Mon Sep 17 00:00:00 2001 From: Mike Son <75041697+micson-us@users.noreply.github.com> Date: Mon, 8 Apr 2024 10:29:31 -0400 Subject: [PATCH 5/6] Create start-app.sh --- start-app.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 start-app.sh diff --git a/start-app.sh b/start-app.sh new file mode 100644 index 0000000..48b7b00 --- /dev/null +++ b/start-app.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# start-server.sh + +python manage.py waitdb +python manage.py migrate +python manage.py createcachetable +python manage.py loaddata admin_theme_data.json +cd /opt/app/ +if [ -n "$TMP_SOURCE_DIR" ] ; then + (cd openlxp-xia; install -d -o www-data -p $TMP_SOURCE_DIR) +else + (cd openlxp-xia; install -d -o www-data -p tmp/source) +fi +pwd +service clamav-daemon restart +./start-server.sh From fc4784d00f667cbb75beb0215ed20b7c158f6c1c Mon Sep 17 00:00:00 2001 From: Mike Son <75041697+micson-us@users.noreply.github.com> Date: Mon, 8 Apr 2024 10:30:11 -0400 Subject: [PATCH 6/6] Create start-server.sh --- start-server.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 start-server.sh diff --git a/start-server.sh b/start-server.sh new file mode 100644 index 0000000..ee6627e --- /dev/null +++ b/start-server.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# start-server.sh +if [ -n "$DJANGO_SUPERUSER_USERNAME" ] && [ -n "$DJANGO_SUPERUSER_PASSWORD" ] ; then + (cd openlxp-xia; python manage.py createsuperuser --no-input) +fi +(cd openlxp-xia; gunicorn openlxp_xia_project.wsgi --reload --user www-data --bind 0.0.0.0:8010 --workers 3) & +nginx -g "daemon off;"