From 77a8b0f64c1346037c31a4ea954c4c275dd3db61 Mon Sep 17 00:00:00 2001 From: James Brown Date: Fri, 22 Oct 2021 13:53:30 -0600 Subject: [PATCH] Move datasets to delete first in line We have reports of datasets that get re-harvested with an extra `1` in the URL. We have confirmed these reports. It seems the harvest is doing the best it can to diagnose if this is a new dataset or not; but still failing in some circumstances. This probably won't fix the bug; however it will mitigate it. By hopefully running through the datasets removal first, if the spatial harvester is essentially doing a "delete and add" when it should be replacing, then the name of the new dataset won't collide with the one that is marked for deleted but still in the system. --- ckanext/spatial/harvesters/waf.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ckanext/spatial/harvesters/waf.py b/ckanext/spatial/harvesters/waf.py index ed51ac48..376e2ed5 100644 --- a/ckanext/spatial/harvesters/waf.py +++ b/ckanext/spatial/harvesters/waf.py @@ -138,6 +138,19 @@ def create_extras(url, date, status): ids = [] + for location in delete: + obj = HarvestObject(job=harvest_job, + extras=create_extras('','', 'delete'), + guid=url_to_ids[location][0], + package_id=url_to_ids[location][1], + ) + model.Session.query(HarvestObject).\ + filter_by(guid=url_to_ids[location][0]).\ + update({'current': False}, False) + + obj.save() + ids.append(obj.id) + for location in new: guid=hashlib.md5(location.encode('utf8','ignore')).hexdigest() obj = HarvestObject(job=harvest_job, @@ -160,19 +173,6 @@ def create_extras(url, date, status): obj.save() ids.append(obj.id) - for location in delete: - obj = HarvestObject(job=harvest_job, - extras=create_extras('','', 'delete'), - guid=url_to_ids[location][0], - package_id=url_to_ids[location][1], - ) - model.Session.query(HarvestObject).\ - filter_by(guid=url_to_ids[location][0]).\ - update({'current': False}, False) - - obj.save() - ids.append(obj.id) - if len(ids) > 0: log.debug('{0} objects sent to the next stage: {1} new, {2} change, {3} delete'.format( len(ids), len(new), len(change), len(delete)))