From 514c87a8f1446704d587fc4b2020cf729b781341 Mon Sep 17 00:00:00 2001 From: Ben Sheldon Date: Tue, 21 May 2019 07:46:21 -0700 Subject: [PATCH] Delete a newly created page if the version import fails --- app/jobs/import_versions_job.rb | 52 ++++++++++++++++----------- test/jobs/import_versions_job_test.rb | 36 +++++++++++++++++++ test/test_helper.rb | 1 + 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/app/jobs/import_versions_job.rb b/app/jobs/import_versions_job.rb index 227b8db81..55f990a39 100644 --- a/app/jobs/import_versions_job.rb +++ b/app/jobs/import_versions_job.rb @@ -76,33 +76,43 @@ def import_record(record) return if existing && @import.skip_existing_records? - version = version_for_record(record, existing, @import.update_behavior) - version.page = page + begin + version = version_for_record(record, existing, @import.update_behavior) + version.page = page - if version.uri.nil? - if record['content'] - # TODO: upload content - raise Api::NotImplementedError, 'Raw content uploading not implemented yet.' + if version.uri.nil? + if record['content'] + # TODO: upload content + raise Api::NotImplementedError, 'Raw content uploading not implemented yet.' + end + elsif !Archiver.already_archived?(version.uri) || !version.version_hash + result = Archiver.archive(version.uri, expected_hash: version.version_hash) + version.version_hash = result[:hash] + if result[:url] != version.uri + version.source_metadata['original_url'] = version.uri + version.uri = result[:url] + end end - elsif !Archiver.already_archived?(version.uri) || !version.version_hash - result = Archiver.archive(version.uri, expected_hash: version.version_hash) - version.version_hash = result[:hash] - if result[:url] != version.uri - version.source_metadata['original_url'] = version.uri - version.uri = result[:url] + + if @import.skip_unchanged_versions? && version_changed?(version) + warn "Skipped version identical to previous. URL: #{page.url}, capture_time: #{version.capture_time}, source_type: #{version.source_type}" + return end - end - if @import.skip_unchanged_versions? && version_changed?(version) - warn "Skipped version identical to previous. URL: #{page.url}, capture_time: #{version.capture_time}, source_type: #{version.source_type}" - return - end + version.validate! + version.update_different_attribute(save: false) + version.save - version.validate! - version.update_different_attribute(save: false) - version.save + @added << version unless existing + rescue => _error + # Delete a newly created page if the version import fails + if page.uuid_previously_changed? && page.versions.empty? + page.maintainers = [] + page.destroy! + end - @added << version unless existing + raise + end end def version_for_record(record, existing_version = nil, update_behavior = 'replace') diff --git a/test/jobs/import_versions_job_test.rb b/test/jobs/import_versions_job_test.rb index 7772e87b1..92f2ab0bc 100644 --- a/test/jobs/import_versions_job_test.rb +++ b/test/jobs/import_versions_job_test.rb @@ -39,6 +39,42 @@ def teardown assert_equal(original_data['source_metadata'], versions(:page1_v1).source_metadata, 'source_metadata was changed') end + test 'does not add a page if there is an error processing versions' do + bad_page_url = 'http://badpage.com' + + import = Import.create_with_data( + { + user: users(:alice) + }, + [ + { + page_url: bad_page_url, + page_title: 'Bad Page', + site_agency: 'Bad Agency', + site_name: 'Bad Administration', + capture_time: 5.days.ago, + uri: 'https://test-bucket.s3.amazonaws.com/example-v1', + version_hash: 'INVALID_HASH', + source_type: 'versionista', + source_metadata: { test_meta: 'data' } + } + ].map(&:to_json).join("\n") + ) + + archive_stub = ->(url, expected_hash: nil) { + raise StandardError + } + + Archiver.stub :already_archived?, false do + Archiver.stub :archive, archive_stub do + ImportVersionsJob.perform_now(import) + + imported_page = Page.find_by(url: bad_page_url) + assert_nil imported_page, 'Should not import page with bad versions' + end + end + end + test 'replaces an existing version if requested' do page_versions_count = pages(:home_page).versions.count diff --git a/test/test_helper.rb b/test/test_helper.rb index 709e5cac1..f0a6b25b0 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,6 +1,7 @@ ENV['RAILS_ENV'] ||= 'test' require File.expand_path('../config/environment', __dir__) require 'rails/test_help' +require 'minitest/autorun' require 'webmock/minitest' class ActiveSupport::TestCase