Skip to content

Commit

Permalink
SRCH-5154 Bulk delete zombie records from Elastic Search
Browse files Browse the repository at this point in the history
  • Loading branch information
krbhavith committed Nov 26, 2024
1 parent b63ef39 commit e1c91c7
Show file tree
Hide file tree
Showing 12 changed files with 418 additions and 0 deletions.
41 changes: 41 additions & 0 deletions app/controllers/admin/bulk_zombie_url_upload_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# frozen_string_literal: true

module Admin
class BulkZombieUrlUploadController < AdminController
def index
@page_title = 'Bulk Zombie URL Upload'
end

def upload
begin
@file = params[:bulk_upload_urls]
BulkZombieUrlUploader::UrlFileValidator.new(@file).validate!
enqueue_job
flash[:success] = success_message(@file.original_filename)
rescue BulkZombieUrlUploader::Error => e
Rails.logger.error 'Url upload failed', e
flash[:error] = e.message
end

redirect_to admin_bulk_zombie_url_upload_index_path
end

private

def success_message(filename)
<<~SUCCESS_MESSAGE
Successfully uploaded #{filename} for processing.
The results will be emailed to you.
SUCCESS_MESSAGE
end

def enqueue_job
BulkZombieUrlUploaderJob.perform_later(
current_user,
@file.original_filename,
@file.tempfile.set_encoding('UTF-8').readlines,
reindex: ActiveModel::Type::Boolean.new.cast(params[:reindex])
)
end
end
end
33 changes: 33 additions & 0 deletions app/jobs/bulk_zombie_url_uploader_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

class BulkZombieUrlUploaderJob < ApplicationJob
queue_as :searchgov

delegate :upload, to: :@uploader

def perform(user, filename, urls, reindex: false)
@user = user
@uploader = BulkZombieUrlUploader.new(filename, urls)

upload
report_results
end

def report_results
log_results
send_results_email
end

def log_results
results = @uploader.results
Rails.logger.info "BulkZombieUrlUploaderJob: #{results.name}"
Rails.logger.info " #{results.total_count} URLs"
Rails.logger.info " #{results.error_count} errors"
end

def send_results_email
results = @uploader.results
email = BulkZombieUrlUploadResultsMailer.with(user: @user, results: results).results_email
email.deliver_now!
end
end
8 changes: 8 additions & 0 deletions app/mailers/bulk_zombie_url_upload_results_mailer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

class BulkZombieUrlUploadResultsMailer < ApplicationMailer
def results_email
@results = params[:results]
mail(to: params[:user].email, subject: "Bulk Zombie URL upload results for #{@results.name}")
end
end
100 changes: 100 additions & 0 deletions app/services/bulk_zombie_url_uploader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# frozen_string_literal: true

class BulkZombieUrlUploader
MAXIMUM_FILE_SIZE = 4.megabytes
VALID_CONTENT_TYPES = %w[text/plain].freeze

attr_reader :results

class Error < StandardError
end

class Results
attr_accessor :searchgov_domains, :ok_count, :error_count, :name

def initialize(name)
@name = name
@ok_count = 0
@error_count = 0
@searchgov_domains = Set.new
@errors = Hash.new { |hash, key| hash[key] = [] }
end

def delete_ok
self.ok_count += 1
end

def add_error(error_message, url)
self.error_count += 1
@errors[error_message] << url
end

def total_count
ok_count + error_count
end

def urls_with(error_message)
@errors[error_message]
end
end

class UrlFileValidator
def initialize(uploaded_file)
@uploaded_file = uploaded_file
end

def validate!
ensure_present
ensure_valid_content_type
ensure_not_too_big
end

def ensure_valid_content_type
return if BulkZombieUrlUploader::VALID_CONTENT_TYPES.include?(@uploaded_file.content_type)

error_message = "Files of type #{@uploaded_file.content_type} are not supported."
raise(BulkZombieUrlUploader::Error, error_message)
end

def ensure_present
return if @uploaded_file.present?

error_message = 'Please choose a file to upload.'
raise(BulkZombieUrlUploader::Error, error_message)
end

def ensure_not_too_big
return if @uploaded_file.size <= BulkZombieUrlUploader::MAXIMUM_FILE_SIZE

error_message = "#{@uploaded_file.original_filename} is too big; please split it."
raise(BulkZombieUrlUploader::Error, error_message)
end
end

def initialize(name, urls)
@urls = urls
@name = name
end

def upload
@results = Results.new(@name)
upload_urls
end

private

def upload_urls
@urls.each do |raw_url|
process_url(raw_url)
end
end

def process_url(raw_url)
searchgov_url = SearchgovUrl.find_or_initialize_by(url: raw_url.strip)
searchgov_url.destroy if searchgov_url.persisted?
@results.delete_ok
rescue StandardError => e
@results.add_error(e.message, raw_url)
Rails.logger.error "Failed to process url: #{raw_url}", e
end
end
37 changes: 37 additions & 0 deletions app/views/admin/bulk_zombie_url_upload/index.html.haml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
%h2
Bulk Zombie URL Upload
%p
To upload a list of URLs to be deleted, follow these instructions:
%ul.bulk-upload-instructions
%li
Create a new text file containing one URL per line. An example of this format
is shown below.
%li
Save the file on your computer; you can name the file whatever you like, as
long as it's plain text and has a .txt extension.
%li
%b
Do not use Microsoft Word files, or any other file type except plain text.
%li
The maximum file size is #{number_to_human_size(BulkUrlUploader::MAXIMUM_FILE_SIZE)}.
%li
Browse for the file on your computer.
%li
Upload the file to Search.gov using the upload button below.
%li
You will receive an email when processing of your URLs is complete.
%p
%strong
Sample file format:
%code
%pre
http://www.sample.gov/1.html
http://www.sample.gov/2.html
http://www.sample.gov/3.html
= form_tag upload_admin_bulk_zombie_url_upload_index_path, :multipart => true do
= file_field_tag 'bulk_zombie_upload_urls', :accept => 'text/plain', :class => 'file'
%p
= check_box_tag :reindex
= label_tag :reindex, 'Reindex existing URLs?'
%p
= submit_tag "Upload", :class => 'submit btn-upload'
1 change: 1 addition & 0 deletions app/views/admin/home/index.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
%li=link_to("Superfresh Urls", admin_superfresh_urls_path)
%li=link_to("Superfresh Bulk Upload", admin_superfresh_urls_bulk_upload_index_path)
%li=link_to("Bulk Search.gov URL Upload", admin_bulk_url_upload_index_path)
%li=link_to("Bulk Zombie URL Upload", admin_bulk_zombie_url_upload_index_path)
%li=link_to("ODIE URL Source Update", admin_odie_url_source_update_index_path)
%li=link_to("Bulk Affiliate Styles Upload", admin_bulk_affiliate_styles_upload_index_path)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<p>
The bulk zombie url upload job for <%= @results.name %> has
finished.
</p>

<p>
There were <%= @results.total_count %> URLs.
</p>

<% if @results.error_count == 0 %>
<p>
There were no errors.
</p>
<% else %>
<p>
<%= @results.ok_count %> URLs were deleted successfully.
</p>

<p>
<%= @results.error_count %> URLs failed validation.
</p>

<% @results.error_messages.each do |error_message| %>
<h3><%= error_message %></h3>
<% @results.urls_with(error_message).each do |url| %>
<%= url %>
<br />
<% end %>
<% end %>
<% end %>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
The bulk zombie url upload job for <%= @results.name %> has
finished.

There were <%= @results.total_count %> URLs.

<% if @results.error_count == 0 %>
There were no errors.
<% else %>
<%= @results.ok_count %> URLs were deleted successfully.
<%= @results.error_count %> URLs failed validation.

<% @results.error_messages.each do |error_message| %>
<%= error_message %>
<% @results.urls_with(error_message).each do |url| %>
<%= url %>
<% end %>
<% end %>
<% end %>
5 changes: 5 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@
post :upload
end
end
resources :bulk_zombie_url_upload, only: :index do
collection do
post :upload
end
end
resources :bulk_affiliate_styles_upload, only: :index do
collection do
post :upload
Expand Down
21 changes: 21 additions & 0 deletions features/bulk_zombie_url_upload.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Feature: Bulk Zombie URL Upload
In order to give affiliates the ability to submit a file of URLs for indexing
As an admin
I want to upload a file containing URLs

Scenario: Bulk-uploading URLs for on-demand indexing as an admin
Given I am logged in with email "[email protected]"
When I go to the bulk url upload admin page
Then I should see "Bulk Zombie URL Upload"
And I should see "The maximum file size is 4 MB"

When I attach the file "features/support/bulk_upload_urls.txt" to "bulk_zombie_upload_urls"
And I press "Upload"
Then I should be on the bulk zombie url upload admin page
And I should see "Successfully uploaded bulk_zombie_upload_urls.txt"
And I should see "The results will be emailed to you."

When I do not attach a file to "bulk_zombie_upload_urls"
And I press "Upload"
Then I should be on the bulk zombie url upload admin page
And I should see "Please choose a file to upload"
80 changes: 80 additions & 0 deletions spec/mailers/bulk_zombie_url_upload_results_mailer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# frozen_string_literal: true

RSpec.shared_examples 'a bulk zombie upload notification email' do
it 'has the correct subject' do
expect(mail.subject).to eq("Bulk zombie URL upload results for #{filename}")
end

it 'has the correct recepient' do
expect(mail.to).to eq([user.email])
end

it 'has the correct from header' do
expect(mail.from).to eq([DELIVER_FROM_EMAIL_ADDRESS])
end

it 'has the correct reply-to' do
expect(mail.reply_to).to eq([SUPPORT_EMAIL_ADDRESS])
end

it 'has the correct total number of URLs' do
expect(mail_body).to match(/There were #{results.total_count} URLs/)
end
end

RSpec.describe BulkZombieUrlUploadResultsMailer, type: :mailer do
describe '#results_email' do
let(:user) { users(:affiliate_admin) }
let(:filename) { 'test-file.txt' }
let(:results) do
results = BulkZombieUrlUploader::Results.new(filename)
results.add_ok
results.add_ok
results
end
let(:mail) do
described_class.with(user: user, results: results).results_email
end
let(:mail_body) { mail.body.encoded }

describe 'with no errors' do
it_behaves_like 'a bulk upload notification email'

it 'reports that there were no URLs with problems' do
expect(mail_body).to match(/There were no errors/)
end
end

describe 'with errors' do
let(:first_error_message) { 'First validation failure' }
let(:first_bad_url) { 'https://agency.gov/first-bad-url' }
let(:second_error_message) { 'Second validation failure' }
let(:second_bad_url) { 'https://agency.gov/second-bad-url' }

before do
results.add_error(first_error_message, first_bad_url)
results.add_error(second_error_message, second_bad_url)
end

it_behaves_like 'a bulk zombie upload notification email'

it 'reports the correct number of OK URLs' do
expect(mail_body).to match(
/#{results.ok_count} URLs were deleted successfully/
)
end

it 'reports the correct number of URLs with problems' do
expect(mail_body).to match(/#{results.error_count} URLs failed validation/)
end

it 'shows the first URL validation failure' do
expect(mail_body).to match(/#{first_error_message}\s+#{first_bad_url}/)
end

it 'shows the second URL validation failure' do
expect(mail_body).to match(/#{second_error_message}\s+#{second_bad_url}/)
end
end
end
end
Loading

0 comments on commit e1c91c7

Please sign in to comment.