Skip to content

Commit

Permalink
Reduce YAML load
Browse files Browse the repository at this point in the history
Before:
* Load on each resource
* Load/Dump on each resource if necessary

After:
* Load on each resource_uri_prefix
* Load/Dump on each resource if necessary
  • Loading branch information
sonots committed Sep 2, 2017
1 parent fff7919 commit ac15ccf
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 17 deletions.
2 changes: 1 addition & 1 deletion exe/triglav-agent-hdfs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Triglav::Agent::Configuration.configure do |config|
# config.cli_class = Triglav::Agent::Hdfs::CLI
# config.setting_class = Triglav::Agent::Hdfs::Setting
# config.worker_module = Triglav::Agent::Hdfs::Worker
# config.processor_class = Triglav::Agent::Hdfs::Processor
config.processor_class = Triglav::Agent::Hdfs::Processor
config.monitor_class = Triglav::Agent::Hdfs::Monitor
config.connection_class = Triglav::Agent::Hdfs::Connection
end
Expand Down
1 change: 1 addition & 0 deletions lib/triglav/agent/hdfs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ module Hdfs
require 'triglav/agent/hdfs/connection'
require 'triglav/agent/hdfs/version'
require 'triglav/agent/hdfs/monitor'
require 'triglav/agent/hdfs/processor'
30 changes: 14 additions & 16 deletions lib/triglav/agent/hdfs/monitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,50 +16,48 @@ class Monitor < Base::Monitor
# unit: 'daily', 'hourly', or 'singular'
# timezone: '+09:00'
# span_in_days: 32
def initialize(connection, resource_uri_prefix, resource)
# @param [Hash] last_modification_times for a resource
def initialize(connection, resource_uri_prefix, resource, last_modification_times)
@connection = connection
@resource_uri_prefix = resource_uri_prefix
@resource = resource
@status = Triglav::Agent::Status.new(resource_uri_prefix, resource.uri)
@last_modification_times = get_last_modification_times
@last_modification_times = get_last_modification_times(last_modification_times)
end

def process
unless resource_valid?
$logger.warn { "Broken resource: #{resource.to_s}" }
return nil
return [nil, nil]
end
$logger.debug { "Start process #{resource.uri}" }
started = Time.now
$logger.debug { "Start Monitor#process #{resource.uri}" }

events, new_last_modification_times = get_events

$logger.debug { "Finish process #{resource.uri}" }
elapsed = Time.now - started
$logger.debug { "Finish Monitor#process #{resource.uri} elapsed:#{elapsed.to_f}" }

return nil if events.nil? || events.empty?
yield(events) if block_given? # send_message
update_status_file(new_last_modification_times)
true
return [nil, nil] if events.nil? or events.empty?
[events, new_last_modification_times]
end

private

def get_events
new_last_modification_times = get_new_last_modification_times
latest_files = select_latest_files(new_last_modification_times)
new_last_modification_times[:max] = new_last_modification_times.values.max
events = build_events(latest_files)
[events, new_last_modification_times]
rescue => e
$logger.warn { "#{e.class} #{e.message} #{e.backtrace.join("\n ")}" }
nil
end

def update_status_file(last_modification_times)
last_modification_times[:max] = last_modification_times.values.max
@status.set(last_modification_times)
end

def get_last_modification_times
last_modification_times = @status.get || {}
def get_last_modification_times(last_modification_times)
last_modification_times ||= {}
# ToDo: want to remove accessing Status in Monitor class
max_last_modification_time = last_modification_times[:max] || @status.getsetnx([:max], $setting.debug? ? 0 : get_current_time)
removes = last_modification_times.keys - paths.keys
appends = paths.keys - last_modification_times.keys
Expand Down
75 changes: 75 additions & 0 deletions lib/triglav/agent/hdfs/processor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
require 'triglav/agent/base/processor'

module Triglav::Agent
module Hdfs
class Processor < Base::Processor
def process
before_process
success_count = 0
consecutive_error_count = 0
Parallel.each(resources, parallel_opts) do |resource|
raise Parallel::Break if stopped?
events = nil
new_resource_statuses = nil
begin
@connection_pool.with do |connection|
resource_statuses = get_resource_statuses(resource)
monitor = monitor_class.new(
connection, resource_uri_prefix, resource, resource_statuses
)
events, new_resource_statuses = monitor.process
end
if events
$logger.info { "send_messages:#{events.map(&:to_hash).to_json}" }
@api_client_pool.with {|api_client| api_client.send_messages(events) }
end
@mutex.synchronize do
set_resource_statuses(new_resource_statuses, resource) if new_resource_statuses
success_count += 1
consecutive_error_count = 0
end
rescue => e
log_error(e)
$logger.info { "failed_events:#{events.map(&:to_hash).to_json}" } if events
@mutex.synchronize do
raise TooManyError if (consecutive_error_count += 1) > self.class.max_consecuitive_error_count
end
end
end
success_count
ensure
after_process
end

private

def before_process
super
started = Time.now
@resource_uri_prefix_statuses = Triglav::Agent::Status.new(resource_uri_prefix).get
elapsed = Time.now - started
$logger.info { "Read status #{resource_uri_prefix} #{elapsed.to_f}sec" }
@started = Time.now
$logger.info { "Start Processor#process #{resource_uri_prefix}" }
end

def after_process
super
elapsed = Time.now - @started
$logger.info { "Finish Processor#process #{resource_uri_prefix} elapsed:#{elapsed.to_f}" }
end

def get_resource_statuses(resource)
resource_statuses = @resource_uri_prefix_statuses[resource.uri.to_sym]
end

def set_resource_statuses(resource_statuses, resource)
started = Time.now
resource_status = Triglav::Agent::Status.new(resource_uri_prefix, resource.uri)
resource_status.set(resource_statuses)
elapsed = Time.now - started
$logger.info { "Store status resource:#{resource.uri} #{elapsed.to_f}sec" }
end
end
end
end

0 comments on commit ac15ccf

Please sign in to comment.