Skip to content

Commit

Permalink
Add delete-record feature with tests, README updates, and environment…
Browse files Browse the repository at this point in the history
… variables

- Implemented the delete-record feature and added the delete_by_field and delete_by_multiple_fields functions in registry.py.
- Updated controllers to include a new Click command for record deletion.
- Introduced environment variables for managing archived file destinations after deletion from the WAF.
- Updated the README with instructions related to the delete-record feature.
- Created comprehensive tests for the delete-record functionality to ensure reliability and accuracy.
  • Loading branch information
Simran Mattu committed Jan 23, 2025
1 parent 451c4bb commit 7a448ee
Show file tree
Hide file tree
Showing 9 changed files with 286 additions and 8 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,14 @@ To generate emails for contributors:
woudc-data-registry data generate-emails /path/to/dir
```

#### Delete Record

```bash
woudc-data-registry data delete-record /path/to/bad/file/
```

If a bad file was previously ingested, it can be removed using this command. This removes the file from the registry and the WAF.

### Development

```bash
Expand All @@ -248,6 +256,7 @@ python3 -m http.server # view on http://localhost:8000/
# run tests like this:
cd woudc_data_registry/tests
python3 test_data_registry.py
python3 test_delete_record.py

# or this:
python3 setup.py test
Expand Down
2 changes: 1 addition & 1 deletion data/migrate/Updated_Errors.csv
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Error Code,Error Type,Message Template,Notes
211,Error,Unrecognized data {row}
212,Warning,#{table} row has more values than #{table} has columns
213,Error,Fewer than minimum {bound} occurrences of table #{table} found
214,Error,More than maximum {bound} occurrences of table #{table} found
214,Warning,More than maximum {bound} occurrences of table #{table} found
215,Error,Fewer than minimum {bound} number of rows in table #{table}
216,Error,More than maximum {bound} number of rows in table #{table}
217,Warning,#CONTENT.Level should be {value} according to present tables
Expand Down
1 change: 1 addition & 0 deletions default.env
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export WDR_SEARCH_URL=https://${WDR_SEARCH_USERNAME}:${WDR_SEARCH_PASSWORD}@loca
## waf configuration
export WDR_WAF_BASEURL=https://woudc.org/archive/
export WDR_WAF_BASEDIR=/tmp
export WDR_FILE_TRASH=/path/to/removed_waf_files

# table configurations; optional: WDR_TABLE_SCHEMA, WDR_TABLE_CONFIG
export WDR_TABLE_SCHEMA=/path/to/data/tables-schema.json
Expand Down
4 changes: 3 additions & 1 deletion woudc_data_registry/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
WDR_EMAIL_CC = os.getenv('WDR_EMAIL_CC')
WDR_EMAIL_BCC = os.getenv('WDR_EMAIL_BCC')
WDR_TEMPLATE_PATH = os.getenv('WDR_TEMPLATE_PATH')
WDR_FILE_TRASH = os.getenv('WDR_FILE_TRASH')

if not WDR_SEARCH_INDEX_BASENAME:
msg = 'WDR_SEARCH_INDEX_BASENAME was not set. \
Expand All @@ -107,7 +108,8 @@
WDR_DATABASE_URL = f'{WDR_DB_TYPE}:///{WDR_DB_NAME}'
else:
if None in [WDR_DB_USERNAME, WDR_DB_PASSWORD, WDR_SEARCH_TYPE,
WDR_SEARCH_URL, WDR_WAF_BASEDIR, WDR_WAF_BASEURL]:
WDR_SEARCH_URL, WDR_WAF_BASEDIR, WDR_WAF_BASEURL,
WDR_FILE_TRASH]:
msg = 'System environment variables are not set!'
LOGGER.error(msg)
raise EnvironmentError(msg)
Expand Down
19 changes: 16 additions & 3 deletions woudc_data_registry/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@

from woudc_data_registry import config
from woudc_data_registry.util import (is_text_file, read_file,
send_email)
send_email, delete_file_from_record)


from woudc_data_registry.processing import Process

from woudc_data_registry.generate_metadata import update_extents
from woudc_data_registry.models import Contributor
from woudc_data_registry.registry import Registry

from woudc_data_registry.generate_metadata import update_extents
from woudc_data_registry.models import Contributor, DataRecord
from woudc_data_registry.report import OperatorReport, RunReport, EmailSummary
from woudc_data_registry.search import SearchIndex

Expand Down Expand Up @@ -329,7 +330,19 @@ def send_feedback(ctx, failed_files, test, ops):
LOGGER.info('Processing Reports have been sent')


@click.command()
@click.pass_context
@click.argument('file_path', type=click.Path(
exists=True, dir_okay=False, readable=True))
def delete_record(ctx, file_path):
LOGGER.info(f"Deleting record for file: {file_path}")
delete_file_from_record(file_path, DataRecord)
update_extents()
LOGGER.info("Done deleting record")


data.add_command(ingest)
data.add_command(verify)
data.add_command(generate_emails, name='generate-emails')
data.add_command(send_feedback, name='send-feedback')
data.add_command(delete_record, name='delete-record')
10 changes: 8 additions & 2 deletions woudc_data_registry/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ def __geo_interface__(self):
}

def __repr__(self):
return f'Country ({self.country_id}, {self.name_en})'
try:
return f'Country ({self.country_id}, {self.name_en})'
except AttributeError as e:
return f'Error: Missing attributes - {e}'


class Contributor(base):
Expand Down Expand Up @@ -564,7 +567,10 @@ def __geo_interface__(self):
}

def __repr__(self):
return f'Station ({self.station_id}, {self.station_name.name})'
try:
return f'Station ({self.station_id}, {self.station_name.name})'
except AttributeError as e:
return f'Error: Missing attributes - {e}'


class StationName(base):
Expand Down
51 changes: 51 additions & 0 deletions woudc_data_registry/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,57 @@ def update_by_field(

return

def delete_by_field(self, obj, by, value, case_insensitive=False):
"""
Delete a row from a table by field
:param obj: Object instance of the table
:param by: Field name to be queried
:param value: Value of the field in any query results
:param case_insensitive: `bool` of whether to query strings
case-insensitively
"""
field = getattr(obj, by)

if case_insensitive:
LOGGER.debug(f'Deleting for LOWER({field}) = LOWER({value})')
condition = func.lower(field) == value.lower()
else:
LOGGER.debug(f'Deleting for {field} = {value}')
condition = field == value

print(self.session.query(obj).filter(condition))
self.session.query(obj).filter(condition).delete()
self.session.commit()
# self.session.rollback()

def delete_by_multiple_fields(
self, table, values, fields=None, case_insensitive=()
):
"""
query a table by multiple fields
:param table: table to be queried
:param values: dictionary with query values
:param fields: fields to be filtered by
:param case_insensitive: Collection of string fields that should be
queried case-insensitively
:returns: query results
"""

conditions = []
target_fields = fields or values.keys()

for field in target_fields:
table_field = getattr(table, field)
if field in case_insensitive:
condition = func.lower(table_field) == values[field].lower()
conditions.append(condition)
else:
conditions.append(table_field == values[field])
results = self.session.query(table).filter(*conditions).delete()
return results

def save(self, obj=None):
"""
Helper function to save object to registry.
Expand Down
156 changes: 156 additions & 0 deletions woudc_data_registry/tests/test_delete_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import unittest
import os
import subprocess
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from woudc_data_registry.models import DataRecord
from woudc_data_registry import config

"""
You need to set up a test environment for your tests. So setup and populate a
database and directory with files that have been ingested.
Change WDR_DB_NAME and WDR_SEARCH_INDEX for testing perposes.
"""


class TestBasicDeletion(unittest.TestCase):
"""Test case for basic functionality of deleting a record."""
# I need to run 2 bash commands and then do some checks

def test_01_file_deletion(self):
"""Run bash commands and verify the outcome."""

# Bash commands to run
commands = [
'woudc-data-registry data ingest '
'./data/totalozone/totalozone-correct.csv',
'woudc-data-registry data delete-record '
+ config.WDR_WAF_BASEDIR + '/Archive-NewFormat'
'/TotalOzone_1.0_1/stn077/brewer/2010/totalozone-correct.csv',
'rm ' + config.WDR_FILE_TRASH + '/totalozone-correct.csv'
]

engine = create_engine(config.WDR_DATABASE_URL,
echo=config.WDR_DB_DEBUG)
Session = sessionmaker(bind=engine, expire_on_commit=False)
session = Session()

filenames_OG = [
file for file in os.listdir(config.WDR_FILE_TRASH)
if os.path.isfile(os.path.join(config.WDR_FILE_TRASH, file))
]

file_count_OG = len(filenames_OG)

result_OG = session.query(DataRecord.output_filepath).all()
result_list_OG = [row[0] for row in result_OG]
row_count_OG = len(result_list_OG)
print(result_list_OG)

# Ingesting the File
subprocess.run(commands[0], shell=True, check=True)

result = session.query(DataRecord.output_filepath).all()
result_list = [row[0] for row in result]
row_count = len(result_list)

self.assertEqual(row_count, row_count_OG + 1)
self.assertTrue(commands[1].split(' ')[-1] in result_list)

# Deleting the File
subprocess.run(commands[1], shell=True, check=True)

filenames_01 = [
file for file in os.listdir(config.WDR_FILE_TRASH)
if os.path.isfile(os.path.join(config.WDR_FILE_TRASH, file))
]
file_count_01 = len(filenames_01)

result2 = session.query(DataRecord.output_filepath).all()
result_list2 = [row[0] for row in result2]
row_count2 = len(result_list2)

self.assertEqual(file_count_01, file_count_OG + 1)
self.assertEqual(row_count2, row_count_OG)
self.assertEqual(result_list2, result_list_OG)
self.assertFalse(commands[0].split('/')[-1] in result_list2)

subprocess.run(commands[2], shell=True, check=True)

session.close()

def test_02_absent_file_deletion(self):
"""
Run bash commands and verify the outcome where the file
path does not exist.
"""

# Bash commands to run
commands = [
'woudc-data-registry data delete-record '
+ config.WDR_WAF_BASEDIR + '/Archive-NewFormat'
'/TotalOzone_1.0_1/stn077/brewer/2010/totalozone-correct.csv'
]

# Deleting the File
with self.assertRaises(subprocess.CalledProcessError) as context:
subprocess.run(commands[0], shell=True, check=True)

# Optional: Verify the error message or exit code
self.assertEqual(context.exception.returncode, 2)
self.assertIn("woudc-data-registry", context.exception.cmd)

def test_03_absent_file_DB_deletion(self):
"""
Run bash commands and verify the outcome where the file path
exists but the row does not.
"""
commands = [
'cp ./data/totalozone/totalozone-correct.csv '
+ config.WDR_WAF_BASEDIR + '/Archive-NewFormat'
'/TotalOzone_1.0_1/stn077/brewer/2010',
'rm ' + config.WDR_WAF_BASEDIR + '/Archive-NewFormat'
'/TotalOzone_1.0_1/stn077/brewer/2010/totalozone-correct.csv'
]
# Get information
engine = create_engine(config.WDR_DATABASE_URL,
echo=config.WDR_DB_DEBUG)
Session = sessionmaker(bind=engine, expire_on_commit=False)
session = Session()

filenames_OG = [
file for file in os.listdir(config.WDR_FILE_TRASH)
if os.path.isfile(os.path.join(config.WDR_FILE_TRASH, file))
]
file_count_OG = len(filenames_OG)

result_OG = session.query(DataRecord.output_filepath).all()
result_list_OG = [row[0] for row in result_OG]
row_count_OG = len(result_list_OG)

# Copy the file to the WAF so the path exists
# but the file is not in the DB
subprocess.run(commands[0], shell=True, check=True)

filenames_01 = [
file for file in os.listdir(config.WDR_FILE_TRASH)
if os.path.isfile(os.path.join(config.WDR_FILE_TRASH, file))
]
file_count_01 = len(filenames_OG)

result_01 = session.query(DataRecord.output_filepath).all()
result_list_01 = [row[0] for row in result_01]
row_count_01 = len(result_list_01)

self.assertEqual(filenames_OG, filenames_01)
self.assertEqual(file_count_OG, file_count_01)

self.assertEqual(result_list_OG, result_list_01)
self.assertEqual(row_count_OG, row_count_01)

subprocess.run(commands[1], shell=True, check=True)


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 7a448ee

Please sign in to comment.