Skip to content

Commit

Permalink
Lower batch size on ETL tasks
Browse files Browse the repository at this point in the history
When running the ETL rake tasks we sometimes might run into errors
because the pods run out of memory. Lowering the batch size should
hopefully help avoid those errors.
  • Loading branch information
MuriloDalRi committed Jun 5, 2024
1 parent 796d9bc commit 5def533
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion app/domain/etl/feedex/processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def process

private

BATCH_SIZE = 10_000
BATCH_SIZE = 5_000

def extract_events
batch = 1
Expand Down
2 changes: 1 addition & 1 deletion app/domain/etl/ga/internal_search_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def extract_events
batch = 1
Etl::GA::InternalSearchService.find_in_batches(date:) do |events|
log process: :ga, message: "Processing #{events.length} events in batch #{batch}"
Events::GA.import(events, batch_size: 10_000)
Events::GA.import(events, batch_size: 5_000)
batch += 1
end
end
Expand Down
2 changes: 1 addition & 1 deletion app/domain/etl/ga/user_feedback_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def extract_events
batch = 1
Etl::GA::UserFeedbackService.find_in_batches(date:) do |events|
log process: :ga, message: "Processing #{events.length} events in batch #{batch}"
Events::GA.import(events, batch_size: 10_000)
Events::GA.import(events, batch_size: 5_000)
batch += 1
end
end
Expand Down
2 changes: 1 addition & 1 deletion app/domain/etl/ga/views_and_navigation_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def extract_events
batch = 1
Etl::GA::ViewsAndNavigationService.find_in_batches(date:) do |events|
log process: :ga, message: "Processing #{events.length} events in batch #{batch}"
Events::GA.import(events, batch_size: 10_000)
Events::GA.import(events, batch_size: 5_000)
batch += 1
end
end
Expand Down
2 changes: 1 addition & 1 deletion app/domain/etl/main/metrics_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def create_metrics
log process: :metrics, message: "about to get the Dimensions::Date"
dimensions_date = Dimensions::Date.find_existing_or_create(date)
log process: :metrics, message: "got the Dimensions::Date"
Dimensions::Edition.live.find_in_batches(batch_size: 10_000)
Dimensions::Edition.live.find_in_batches(batch_size: 5_000)
.with_index do |batch, index|
log process: :metrics, message: "processing #{batch.length} items in batch #{index}"
values = batch.pluck(:id).map do |value|
Expand Down
2 changes: 1 addition & 1 deletion spec/integration/master/daily_metrics_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def stub_feedex_response
'page_size': 3,
}.to_json

stub_request(:get, "http://support-api.dev.gov.uk/feedback-by-day/#{yesterday}?page=1&per_page=10000")
stub_request(:get, "http://support-api.dev.gov.uk/feedback-by-day/#{yesterday}?page=1&per_page=5000")
.to_return(status: 200, body: response, headers: {})
end
end

0 comments on commit 5def533

Please sign in to comment.