diff --git a/app/jobs/bulk_zombie_url_uploader_job.rb b/app/jobs/bulk_zombie_url_uploader_job.rb index 4e146a77c..3008d87d7 100644 --- a/app/jobs/bulk_zombie_url_uploader_job.rb +++ b/app/jobs/bulk_zombie_url_uploader_job.rb @@ -20,9 +20,7 @@ def report_results def log_results results = @uploader.results - Rails.logger.info "BulkZombieUrlUploaderJob: #{results.file_name}" - Rails.logger.info " #{results.total_count} URLs" - Rails.logger.info " #{results.error_count} errors" + Rails.logger.info(BulkZombieUrlUploaderJob: results.file_name, total_urls: results.total_count, errors: results.error_count) end def send_results_email diff --git a/app/services/bulk_zombie_url_uploader.rb b/app/services/bulk_zombie_url_uploader.rb index 0c3c3def9..e965e26ff 100644 --- a/app/services/bulk_zombie_url_uploader.rb +++ b/app/services/bulk_zombie_url_uploader.rb @@ -28,17 +28,7 @@ def initialize_results end def process_upload - parse_csv.each { |row| process_row(row) } - rescue CSV::MalformedCSVError => e - handle_csv_error(e) - end - - def parse_csv - csv = CSV.parse(File.read(@file_path), headers: true) - raise CSV::MalformedCSVError, "Missing required headers" unless %w[URL DOC_ID].all? { |col| csv.headers.include?(col) } - csv - rescue CSV::MalformedCSVError, ArgumentError => e - raise CSV::MalformedCSVError.new('CSV', "Malformed or invalid CSV: #{e.message}") + CSV.parse(File.read(@file_path), headers: true).each { |row| process_row(row) } end def process_row(row) @@ -53,7 +43,7 @@ def process_row(row) end def handle_url_processing(url, document_id, row) - process_url_with_rescue(url, document_id) + process_url(url, document_id) update_results rescue StandardError => e handle_processing_error(e, url, document_id, row) @@ -69,26 +59,15 @@ def log_missing_document_id(row, url) Rails.logger.error("Skipping row: #{row.inspect}. Document ID is mandatory.") end - def handle_csv_error(error) - @results.add_error('Invalid CSV format', 'Entire file') - Rails.logger.error("Error parsing CSV: #{error.message}") - end - def log_upload_error(error) error_message = "Failed to process bulk zombie URL document (file: #{@file_name})." - backtrace = error.backtrace ? error.backtrace.join("\n") : 'No backtrace available' - Rails.logger.error("#{error_message} Error: #{error.message}\n#{backtrace}") + Rails.logger.error(error_message, error) end def handle_processing_error(error, url, document_id, row) key = url.presence || document_id @results&.add_error(error.message, key) - backtrace = error.backtrace ? error.backtrace.join("\n") : 'No backtrace available' - Rails.logger.error("Failure to process bulk upload zombie URL row: #{row.inspect}\n#{error.message}\n#{backtrace}") - end - - def process_url_with_rescue(url, document_id) - process_url(url, document_id) + Rails.logger.error('Failure to process bulk upload zombie URL row:', error) end def process_url(url, document_id) diff --git a/spec/services/bulk_zombie_url_uploader_spec.rb b/spec/services/bulk_zombie_url_uploader_spec.rb index 9b96263c0..3303cbdd7 100644 --- a/spec/services/bulk_zombie_url_uploader_spec.rb +++ b/spec/services/bulk_zombie_url_uploader_spec.rb @@ -1,36 +1,28 @@ # frozen_string_literal: true describe BulkZombieUrlUploader do - let(:filename) { 'test_file.csv' } - let(:filepath) { '/path/to/test_file.csv' } + let(:filename) { 'bulk_zombie_urls.csv' } + let(:filepath) { Rails.root.join('spec/fixtures/csv/bulk_zombie_urls.csv') } let(:uploader) { described_class.new(filename, filepath) } let(:results) { instance_double(BulkZombieUrls::Results, delete_ok: nil, increment_updated: nil, add_error: nil) } + let(:logger) { Rails.logger } before do allow(BulkZombieUrls::Results).to receive(:new).and_return(results) - uploader.instance_variable_set(:@results, results) - end - - describe '#initialize' do - subject(:uploader) { described_class.new(filename, filepath) } - - it 'initializes with the given filename and filepath' do - expect(uploader.instance_variable_get(:@file_name)).to eq(filename) - expect(uploader.instance_variable_get(:@file_path)).to eq(filepath) - end + allow(logger).to receive(:error) + uploader.send(:initialize_results) end describe '#upload' do subject(:upload) { uploader.upload } - let(:results) { instance_double(BulkZombieUrls::Results, delete_ok: nil, increment_updated: nil) } before do allow(uploader).to receive(:initialize_results).and_call_original allow(uploader).to receive(:process_upload) - allow(uploader).to receive(:results).and_return(results) + allow(uploader).to receive(:log_upload_error) end - context 'when no error occurs' do + context 'when no errors occur' do it 'initializes results and processes the upload' do upload expect(uploader).to have_received(:initialize_results) @@ -38,258 +30,210 @@ end end - it 'initializes results and processes the upload' do - allow(uploader).to receive(:initialize_results).and_call_original - allow(uploader).to receive(:process_upload) - - uploader.upload - - expect(uploader).to have_received(:initialize_results) - expect(uploader).to have_received(:process_upload) - end - - context 'when an error occurs' do + context 'when an error occurs during upload' do let(:error) { StandardError.new('test error') } before do allow(uploader).to receive(:process_upload).and_raise(error) - allow(uploader).to receive(:log_upload_error) end - it 'logs the upload error' do + it 'logs the upload error and ensures results are initialized' do upload expect(uploader).to have_received(:log_upload_error).with(error) + expect(uploader.results).not_to be_nil end end end - describe '#log_upload_error' do - subject(:log_upload_error) { uploader.send(:log_upload_error, error) } - - let(:error) { StandardError.new('Upload failed') } - - context 'when backtrace is present' do - before do - allow(error).to receive(:backtrace).and_return(['line 1', 'line 2']) - allow(Rails.logger).to receive(:error) - end - - it 'logs the error with backtrace' do - log_upload_error - expect(Rails.logger).to have_received(:error).with(/Failed to process bulk zombie URL document \(file: test_file.csv\)\..*Error: Upload failed.*line 1.*line 2/m) - end + describe '#initialize_results' do + it 'initializes the results object' do + uploader.send(:initialize_results) + expect(uploader.results).to eq(results) end - context 'when backtrace is nil' do - before do - allow(error).to receive(:backtrace).and_return(nil) - allow(Rails.logger).to receive(:error) - end - - it 'logs the error without backtrace' do - log_upload_error - expect(Rails.logger).to have_received(:error).with(/No backtrace available/) - end + it 'raises an error if results initialization fails' do + allow(BulkZombieUrls::Results).to receive(:new).and_return(nil) + expect { uploader.send(:initialize_results) }.to raise_error(BulkZombieUrlUploader::Error, 'Results object not initialized') end end - describe '#initialize_results' do - subject(:initialize_results) { uploader.send(:initialize_results) } - - context 'when results are successfully initialized' do - before do - allow(BulkZombieUrls::Results).to receive(:new).with(filename).and_return(instance_double(BulkZombieUrls::Results)) - end + describe '#process_upload' do + let(:csv_content) { "URL,DOC_ID\nhttps://example.com,123\n" } - it 'sets the results instance variable' do - initialize_results - expect(uploader.results).not_to be_nil - end + before do + allow(File).to receive(:read).with(filepath).and_return(csv_content) + allow(uploader).to receive(:process_row) end - context 'when results initialization fails' do - before do - allow(BulkZombieUrls::Results).to receive(:new).and_return(nil) - end - - it 'raises an error' do - expect { initialize_results }.to raise_error(BulkZombieUrlUploader::Error, 'Results object not initialized') - end + it 'processes each row of the CSV file' do + uploader.send(:process_upload) + expect(uploader).to have_received(:process_row).once end end - describe '#process_upload' do - subject(:process_upload) { uploader.send(:process_upload) } - - context 'when CSV::MalformedCSVError is not raised' do - before do - allow(uploader).to receive(:parse_csv).and_return([{'URL' => 'https://example.com', 'DOC_ID' => '123'}]) - allow(uploader).to receive(:process_row) - end + describe '#process_row' do + let(:row) { { 'URL' => 'https://example.com', 'DOC_ID' => '123' } } - it 'parses the CSV and processes each row' do - process_upload - expect(uploader).to have_received(:parse_csv) - expect(uploader).to have_received(:process_row).with('URL' => 'https://example.com', 'DOC_ID' => '123') - end + before do + uploader.send(:initialize_results) end - context 'when CSV::MalformedCSVError is raised' do - let(:error) { CSV::MalformedCSVError.new('CSV', 'Malformed CSV') } + context 'when document ID is missing' do + let(:row) { { 'URL' => 'https://example.com', 'DOC_ID' => nil } } - before do - allow(uploader).to receive(:parse_csv).and_raise(error) - allow(uploader).to receive(:handle_csv_error) + it 'logs a missing document ID error' do + uploader.send(:process_row, row) + expect(results).to have_received(:add_error).with('Document ID is missing', 'https://example.com') end + end - it 'calls handle_csv_error' do - process_upload rescue nil - expect(uploader).to have_received(:handle_csv_error).with(error) + context 'when document ID is present' do + it 'handles URL processing' do + allow(uploader).to receive(:handle_url_processing) + uploader.send(:process_row, row) + expect(uploader).to have_received(:handle_url_processing).with('https://example.com', '123', row) end end end - describe '#parse_csv' do - subject(:parse_csv) { uploader.send(:parse_csv) } + describe '#handle_url_processing' do + subject(:handle_url_processing) { uploader.send(:handle_url_processing, url, document_id, row) } - let(:csv_content) { "URL,DOC_ID\nhttps://example.com,123\n" } + let(:url) { 'https://example.com' } + let(:document_id) { '123' } + let(:row) { { 'URL' => url, 'DOC_ID' => document_id } } + let(:error) { StandardError.new('Something went wrong') } before do - allow(File).to receive(:read).with(filepath).and_return(csv_content) + allow(uploader).to receive(:process_url).and_raise(error) + allow(uploader).to receive(:handle_processing_error) + allow(uploader).to receive(:update_results) end - context 'when CSV is valid' do - it 'returns the parsed CSV object' do - expect(parse_csv).to be_a(CSV::Table) + context 'when process_url raises an error' do + it 'handles the error using handle_processing_error' do + handle_url_processing + expect(uploader).to have_received(:handle_processing_error).with(error, url, document_id, row) end - end - - context 'when CSV is malformed' do - let(:csv_content) { "Invalid content" } - it 'raises a MalformedCSVError' do - expect { parse_csv }.to raise_error(CSV::MalformedCSVError) + it 'does not call update_results' do + handle_url_processing + expect(uploader).not_to have_received(:update_results) end end - end - describe '#process_row' do - let(:row) { { 'URL' => 'https://example.com', 'DOC_ID' => '123' } } - - context 'when document ID is present' do - it 'processes the URL' do - allow(uploader).to receive(:handle_url_processing) - uploader.send(:process_row, row) - expect(uploader).to have_received(:handle_url_processing).with('https://example.com', '123', row) + context 'when process_url succeeds' do + before do + allow(uploader).to receive(:process_url) end - end - - context 'when document ID is missing' do - let(:row) { { 'URL' => 'https://example.com', 'DOC_ID' => nil } } - it 'logs a missing document ID error' do - uploader.send(:process_row, row) - expect(results).to have_received(:add_error).with('Document ID is missing', 'https://example.com') + it 'calls update_results' do + allow(uploader).to receive(:update_results) + handle_url_processing + expect(uploader).to have_received(:update_results).once end end end - describe '#handle_url_processing' do - subject(:handle_url_processing) { uploader.send(:handle_url_processing, url, document_id, row) } + describe '#handle_processing_error' do + subject(:handle_processing_error) do + uploader.send(:handle_processing_error, error, url, document_id, row) + end - let(:url) { 'https://example.com' } + let(:error) { StandardError.new('Something went wrong') } let(:document_id) { '123' } let(:row) { { 'URL' => url, 'DOC_ID' => document_id } } - context 'when StandardError is not raised during URL processing' do - before do - allow(uploader).to receive(:process_url_with_rescue) - allow(uploader).to receive(:update_results) + context 'when URL is present' do + let(:url) { 'https://example.com' } + + it 'adds an error to results with the error message and URL' do + handle_processing_error + expect(results).to have_received(:add_error).with('Something went wrong', url).once end - it 'processes the URL and updates results' do - handle_url_processing - expect(uploader).to have_received(:process_url_with_rescue).with(url, document_id) - expect(uploader).to have_received(:update_results) + it 'logs the processing error' do + handle_processing_error + expect(logger).to have_received(:error).with('Failure to process bulk upload zombie URL row:', error).once end end - context 'when StandardError is raised during URL processing' do - let(:error) { StandardError.new('Processing error') } + context 'when URL is not present' do + let(:url) { nil } - before do - allow(uploader).to receive(:process_url_with_rescue).and_raise(error) - allow(uploader).to receive(:handle_processing_error) + it 'adds an error to results with the error message and document ID' do + handle_processing_error + expect(results).to have_received(:add_error).with('Something went wrong', document_id).once end - it 'calls handle_processing_error' do - handle_url_processing rescue nil - expect(uploader).to have_received(:handle_processing_error).with(error, url, document_id, row) + it 'logs the processing error' do + handle_processing_error + expect(logger).to have_received(:error).with('Failure to process bulk upload zombie URL row:', error).once end end end describe '#update_results' do - subject(:update_results) { uploader.send(:update_results) } - - let(:results) { instance_double(BulkZombieUrls::Results, delete_ok: nil, increment_updated: nil) } + it 'calls delete_ok and increment_updated on results' do + uploader.send(:initialize_results) + uploader.send(:update_results) - before do - allow(uploader).to receive(:results).and_return(results) + expect(results).to have_received(:delete_ok).once + expect(results).to have_received(:increment_updated).once end + end - it 'updates the results object' do - uploader.send(:update_results) - expect(results).to have_received(:delete_ok) - expect(results).to have_received(:increment_updated) + describe '#delete_document' do + let(:document_id) { '123' } + + it 'deletes the document using I14yDocument' do + allow(I14yDocument).to receive(:delete) + uploader.send(:delete_document, document_id) + expect(I14yDocument).to have_received(:delete).with(handle: 'searchgov', document_id: document_id) end end describe '#process_url_with_searchgov' do - subject(:process_url_with_searchgov) { uploader.send(:process_url_with_searchgov, url, document_id) } - let(:url) { 'https://example.com' } let(:document_id) { '123' } + let(:searchgov_url) { instance_double(SearchgovUrl, destroy: true) } - context 'when SearchgovUrl is found' do - let(:searchgov_url) { instance_double(SearchgovUrl, destroy: true) } - + context 'when the URL exists in SearchgovUrl' do before do - allow(SearchgovUrl).to receive(:find_by).with(url:).and_return(searchgov_url) + allow(SearchgovUrl).to receive(:find_by).with(url: url).and_return(searchgov_url) end it 'destroys the SearchgovUrl record' do - process_url_with_searchgov + uploader.send(:process_url_with_searchgov, url, document_id) expect(searchgov_url).to have_received(:destroy) end end - context 'when SearchgovUrl is not found' do + context 'when the URL does not exist in SearchgovUrl' do before do - allow(SearchgovUrl).to receive(:find_by).with(url:).and_return(nil) + allow(SearchgovUrl).to receive(:find_by).with(url: url).and_return(nil) allow(uploader).to receive(:delete_document) end - it 'deletes the document' do - process_url_with_searchgov + it 'calls delete_document with the given document_id' do + uploader.send(:process_url_with_searchgov, url, document_id) expect(uploader).to have_received(:delete_document).with(document_id) end end end - + describe '#process_url' do subject(:process_url) { uploader.send(:process_url, url, document_id) } + let(:url) { 'https://example.com' } let(:document_id) { '123' } context 'when URL is present' do - let(:url) { 'https://example.com' } - before do allow(uploader).to receive(:process_url_with_searchgov) end - it 'processes the URL with Searchgov' do + it 'calls process_url_with_searchgov with the given URL and document_id' do process_url expect(uploader).to have_received(:process_url_with_searchgov).with(url, document_id) end @@ -302,93 +246,23 @@ allow(uploader).to receive(:delete_document) end - it 'deletes the document' do + it 'calls delete_document with the given document_id' do process_url expect(uploader).to have_received(:delete_document).with(document_id) end end end - describe '#handle_csv_error' do - subject(:handle_csv_error) { uploader.send(:handle_csv_error, error) } - - let(:error) { CSV::MalformedCSVError.new('CSV', 'Malformed CSV') } - - it 'logs an error for invalid CSV format' do - allow(Rails.logger).to receive(:error) - handle_csv_error - expect(results).to have_received(:add_error).with('Invalid CSV format', 'Entire file') - expect(Rails.logger).to have_received(:error).with(/Error parsing CSV: .*Malformed CSV/) - end - end - - describe '#handle_processing_error' do - subject(:handle_processing_error) { uploader.send(:handle_processing_error, error, url, document_id, row) } - - let(:error) { StandardError.new('Processing error') } - let(:url) { 'https://example.com' } - let(:document_id) { '123' } - let(:row) { { 'URL' => url, 'DOC_ID' => document_id } } + describe '#log_upload_error' do + let(:error) { StandardError.new('Something went wrong') } - before do + it 'logs the upload error with the file name and error details' do allow(Rails.logger).to receive(:error) - end - - it 'logs and records an error when a processing error occurs' do - handle_processing_error - expect(results).to have_received(:add_error).with('Processing error', url) - expect(Rails.logger).to have_received(:error).with(/Failure to process bulk upload zombie URL row:/) - end - - context 'when URL is blank' do - let(:url) { nil } - - it 'uses the document ID as the key for the error' do - handle_processing_error - expect(results).to have_received(:add_error).with('Processing error', document_id) - end - end - - context 'when backtrace is nil' do - before do - allow(error).to receive(:backtrace).and_return(nil) - end - - it 'logs the error without backtrace' do - handle_processing_error - expect(Rails.logger).to have_received(:error).with(/No backtrace available/) - end - end - end - - describe '#delete_document' do - subject(:delete_document) { uploader.send(:delete_document, document_id) } - - let(:document_id) { '123' } - - before do - allow(I14yDocument).to receive(:delete) - end - - it 'deletes the document from I14yDocument' do - delete_document - expect(I14yDocument).to have_received(:delete).with(handle: 'searchgov', document_id:) - end - end - - describe '#process_url_with_rescue' do - subject(:process_url_with_rescue) { uploader.send(:process_url_with_rescue, url, document_id) } - - let(:url) { 'https://example.com' } - let(:document_id) { '123' } - - before do - allow(uploader).to receive(:process_url) - end + uploader.send(:log_upload_error, error) - it 'calls process_url' do - process_url_with_rescue - expect(uploader).to have_received(:process_url).with(url, document_id) + expect(Rails.logger).to have_received(:error).with( + "Failed to process bulk zombie URL document (file: #{filename}).", error + ) end end end