diff --git a/.env.development b/.env.development index e6658e35..aa040ced 100644 --- a/.env.development +++ b/.env.development @@ -21,3 +21,7 @@ API_AUTH_SECRET_REVIEW=foobar PORT=3003 # Or set a development host for the app # DEVELOPMENT_HOST='laa-criminal-applications-datastore.test' + +# If set, enables prometheus middleware and server +# ENABLE_PROMETHEUS_EXPORTER=true +# PROMETHEUS_EXPORTER_VERBOSE=false diff --git a/Gemfile b/Gemfile index 42be23a5..591957ba 100644 --- a/Gemfile +++ b/Gemfile @@ -12,6 +12,9 @@ gem 'grape-entity', '~> 0.10.2' gem 'grape_logging' gem 'kaminari-activerecord' +# Monitoring +gem 'prometheus_exporter' + # Exceptions notifications gem 'sentry-rails' gem 'sentry-ruby' diff --git a/Gemfile.lock b/Gemfile.lock index 0ac387db..5a430ebf 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -10,67 +10,67 @@ GIT GEM remote: https://rubygems.org/ specs: - actioncable (7.0.5) - actionpack (= 7.0.5) - activesupport (= 7.0.5) + actioncable (7.0.5.1) + actionpack (= 7.0.5.1) + activesupport (= 7.0.5.1) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailbox (7.0.5) - actionpack (= 7.0.5) - activejob (= 7.0.5) - activerecord (= 7.0.5) - activestorage (= 7.0.5) - activesupport (= 7.0.5) + actionmailbox (7.0.5.1) + actionpack (= 7.0.5.1) + activejob (= 7.0.5.1) + activerecord (= 7.0.5.1) + activestorage (= 7.0.5.1) + activesupport (= 7.0.5.1) mail (>= 2.7.1) net-imap net-pop net-smtp - actionmailer (7.0.5) - actionpack (= 7.0.5) - actionview (= 7.0.5) - activejob (= 7.0.5) - activesupport (= 7.0.5) + actionmailer (7.0.5.1) + actionpack (= 7.0.5.1) + actionview (= 7.0.5.1) + activejob (= 7.0.5.1) + activesupport (= 7.0.5.1) mail (~> 2.5, >= 2.5.4) net-imap net-pop net-smtp rails-dom-testing (~> 2.0) - actionpack (7.0.5) - actionview (= 7.0.5) - activesupport (= 7.0.5) + actionpack (7.0.5.1) + actionview (= 7.0.5.1) + activesupport (= 7.0.5.1) rack (~> 2.0, >= 2.2.4) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.2.0) - actiontext (7.0.5) - actionpack (= 7.0.5) - activerecord (= 7.0.5) - activestorage (= 7.0.5) - activesupport (= 7.0.5) + actiontext (7.0.5.1) + actionpack (= 7.0.5.1) + activerecord (= 7.0.5.1) + activestorage (= 7.0.5.1) + activesupport (= 7.0.5.1) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (7.0.5) - activesupport (= 7.0.5) + actionview (7.0.5.1) + activesupport (= 7.0.5.1) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.1, >= 1.2.0) - activejob (7.0.5) - activesupport (= 7.0.5) + activejob (7.0.5.1) + activesupport (= 7.0.5.1) globalid (>= 0.3.6) - activemodel (7.0.5) - activesupport (= 7.0.5) - activerecord (7.0.5) - activemodel (= 7.0.5) - activesupport (= 7.0.5) - activestorage (7.0.5) - actionpack (= 7.0.5) - activejob (= 7.0.5) - activerecord (= 7.0.5) - activesupport (= 7.0.5) + activemodel (7.0.5.1) + activesupport (= 7.0.5.1) + activerecord (7.0.5.1) + activemodel (= 7.0.5.1) + activesupport (= 7.0.5.1) + activestorage (7.0.5.1) + actionpack (= 7.0.5.1) + activejob (= 7.0.5.1) + activerecord (= 7.0.5.1) + activesupport (= 7.0.5.1) marcel (~> 1.0) mini_mime (>= 1.1.0) - activesupport (7.0.5) + activesupport (7.0.5.1) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) @@ -79,16 +79,16 @@ GEM public_suffix (>= 2.0.2, < 6.0) ast (2.4.2) aws-eventstream (1.2.0) - aws-partitions (1.771.0) - aws-sdk-core (3.173.1) + aws-partitions (1.782.0) + aws-sdk-core (3.176.0) aws-eventstream (~> 1, >= 1.0.2) aws-partitions (~> 1, >= 1.651.0) aws-sigv4 (~> 1.5) jmespath (~> 1, >= 1.6.1) - aws-sdk-sns (1.60.0) - aws-sdk-core (~> 3, >= 3.165.0) + aws-sdk-sns (1.63.0) + aws-sdk-core (~> 3, >= 3.176.0) aws-sigv4 (~> 1.1) - aws-sigv4 (1.5.2) + aws-sigv4 (1.6.0) aws-eventstream (~> 1, >= 1.0.2) brakeman (6.0.0) builder (3.2.4) @@ -143,21 +143,22 @@ GEM grape rack hashdiff (1.0.1) - i18n (1.13.0) + i18n (1.14.1) concurrent-ruby (~> 1.0) ice_nine (0.11.2) io-console (0.6.0) - irb (1.6.4) + irb (1.7.0) reline (>= 0.3.0) jmespath (1.6.2) json (2.6.3) json-schema (4.0.0) addressable (>= 2.8) - jwt (2.7.0) + jwt (2.7.1) kaminari-activerecord (1.2.2) activerecord kaminari-core (= 1.2.2) kaminari-core (1.2.2) + language_server-protocol (3.17.0.3) loofah (2.21.3) crass (~> 1.0.2) nokogiri (>= 1.12.0) @@ -170,7 +171,7 @@ GEM method_source (1.0.0) mini_mime (1.1.2) mini_portile2 (2.8.2) - minitest (5.18.0) + minitest (5.18.1) moj-simple-jwt-auth (0.1.0) json jwt @@ -179,7 +180,7 @@ GEM ruby2_keywords (~> 0.0.1) mustermann-grape (1.0.2) mustermann (>= 1.0.0) - net-imap (0.3.4) + net-imap (0.3.6) date net-protocol net-pop (0.1.2) @@ -193,52 +194,55 @@ GEM mini_portile2 (~> 2.8.2) racc (~> 1.4) parallel (1.23.0) - parser (3.2.2.1) + parser (3.2.2.3) ast (~> 2.4.1) + racc pg (1.5.3) + prometheus_exporter (2.0.8) + webrick pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) public_suffix (5.0.1) - puma (6.2.2) + puma (6.3.0) nio4r (~> 2.0) - racc (1.6.2) + racc (1.7.1) rack (2.2.7) rack-accept (0.4.5) rack (>= 0.4) rack-test (2.1.0) rack (>= 1.3) - rails (7.0.5) - actioncable (= 7.0.5) - actionmailbox (= 7.0.5) - actionmailer (= 7.0.5) - actionpack (= 7.0.5) - actiontext (= 7.0.5) - actionview (= 7.0.5) - activejob (= 7.0.5) - activemodel (= 7.0.5) - activerecord (= 7.0.5) - activestorage (= 7.0.5) - activesupport (= 7.0.5) + rails (7.0.5.1) + actioncable (= 7.0.5.1) + actionmailbox (= 7.0.5.1) + actionmailer (= 7.0.5.1) + actionpack (= 7.0.5.1) + actiontext (= 7.0.5.1) + actionview (= 7.0.5.1) + activejob (= 7.0.5.1) + activemodel (= 7.0.5.1) + activerecord (= 7.0.5.1) + activestorage (= 7.0.5.1) + activesupport (= 7.0.5.1) bundler (>= 1.15.0) - railties (= 7.0.5) + railties (= 7.0.5.1) rails-dom-testing (2.0.3) activesupport (>= 4.2.0) nokogiri (>= 1.6) rails-html-sanitizer (1.6.0) loofah (~> 2.21) nokogiri (~> 1.14) - railties (7.0.5) - actionpack (= 7.0.5) - activesupport (= 7.0.5) + railties (7.0.5.1) + actionpack (= 7.0.5.1) + activesupport (= 7.0.5.1) method_source rake (>= 12.2) thor (~> 1.0) zeitwerk (~> 2.5) rainbow (3.1.1) rake (13.0.6) - regexp_parser (2.8.0) - reline (0.3.4) + regexp_parser (2.8.1) + reline (0.3.5) io-console (~> 0.5) rexml (3.2.5) rspec-core (3.12.2) @@ -249,7 +253,7 @@ GEM rspec-mocks (3.12.5) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.12.0) - rspec-rails (6.0.2) + rspec-rails (6.0.3) actionpack (>= 6.1) activesupport (>= 6.1) railties (>= 6.1) @@ -257,18 +261,19 @@ GEM rspec-expectations (~> 3.12) rspec-mocks (~> 3.12) rspec-support (~> 3.12) - rspec-support (3.12.0) - rubocop (1.51.0) + rspec-support (3.12.1) + rubocop (1.53.1) json (~> 2.3) + language_server-protocol (>= 3.17.0) parallel (~> 1.10) - parser (>= 3.2.0.0) + parser (>= 3.2.2.3) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 1.8, < 3.0) rexml (>= 3.2.5, < 4.0) rubocop-ast (>= 1.28.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.28.1) + rubocop-ast (1.29.0) parser (>= 3.2.1.0) rubocop-capybara (2.18.0) rubocop (~> 1.41) @@ -277,7 +282,7 @@ GEM rubocop-performance (1.18.0) rubocop (>= 1.7.0, < 2.0) rubocop-ast (>= 0.4.0) - rubocop-rails (2.19.1) + rubocop-rails (2.20.2) activesupport (>= 4.2.0) rack (>= 1.1) rubocop (>= 1.33.0, < 2.0) @@ -299,7 +304,7 @@ GEM simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) thor (1.2.2) - timeout (0.3.2) + timeout (0.4.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.4.2) @@ -307,6 +312,7 @@ GEM addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) + webrick (1.8.1) websocket-driver (0.7.5) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) @@ -327,6 +333,7 @@ DEPENDENCIES laa-criminal-legal-aid-schemas! moj-simple-jwt-auth (= 0.1.0) pg (~> 1.4) + prometheus_exporter pry puma rails (~> 7.0.4) diff --git a/app/api/datastore/v1/searching.rb b/app/api/datastore/v1/searching.rb index 825c59b5..4a95bcf9 100644 --- a/app/api/datastore/v1/searching.rb +++ b/app/api/datastore/v1/searching.rb @@ -16,13 +16,13 @@ class Searching < Base optional( :status, - type: Array[String], + type: [String], values: Types::APPLICATION_STATUSES ) optional( :review_status, - type: Array[String], + type: [String], values: Types::REVIEW_APPLICATION_STATUSES ) diff --git a/app/lib/prometheus_metrics/collectors.rb b/app/lib/prometheus_metrics/collectors.rb new file mode 100644 index 00000000..002fac91 --- /dev/null +++ b/app/lib/prometheus_metrics/collectors.rb @@ -0,0 +1,4 @@ +module PrometheusMetrics + module Collectors + end +end diff --git a/app/lib/prometheus_metrics/configuration.rb b/app/lib/prometheus_metrics/configuration.rb new file mode 100644 index 00000000..6e4bec25 --- /dev/null +++ b/app/lib/prometheus_metrics/configuration.rb @@ -0,0 +1,77 @@ +module PrometheusMetrics + module Configuration + require 'prometheus_exporter/server' + require_relative 'collectors' + + DEFAULT_PREFIX = 'ruby_'.freeze + SERVER_BINDING_HOST = '0.0.0.0'.freeze + SERVER_BINDING_PORT = 9394 + + CUSTOM_COLLECTORS = [ + # Add custom collector classes here + ].freeze + + # :nocov: + def self.should_configure? + return false if ENV.key?('SKIP_PROMETHEUS_EXPORTER') + + # For now we only initialise prometheus exporter on servers + # In the future this may change to also support workers + return false unless defined?(Rails) && + (Rails.const_defined?('Rails::Server') || File.basename($PROGRAM_NAME) == 'puma') + + ENV.fetch('ENABLE_PROMETHEUS_EXPORTER', 'false').inquiry.true? + end + + # We are running puma in single process mode, so this is safe + # If we move to multi process mode, we will have to run the + # exporter process separately (`bundle exec prometheus_exporter`) + def self.start_server + server = PrometheusExporter::Server::WebServer.new( + bind: SERVER_BINDING_HOST, port: SERVER_BINDING_PORT, + verbose: ENV.fetch('PROMETHEUS_EXPORTER_VERBOSE', 'false').inquiry.true? + ) + + # Register any custom collectors + CUSTOM_COLLECTORS.each { |klass| server.collector.register_collector(klass.new) } + + server.start + + true + rescue Errno::EADDRINUSE + warn "[PrometheusExporter] Server port `#{SERVER_BINDING_PORT}` already in use." + false + end + + def self.configure + return unless should_configure? + return unless start_server + + require 'prometheus_exporter/instrumentation' + require_relative 'grape_middleware' + + Rails.logger.info '[PrometheusExporter] Initialising instrumentation middleware...' + + # Metrics will be prefixed, for example `ruby_http_requests_total` + PrometheusExporter::Metric::Base.default_prefix = DEFAULT_PREFIX + + # This reports stats per request like HTTP status and timings + # NOTE: as this is a Grape application, some custom labels are required + # so we implemented a custom middleware just for that + Rails.application.middleware.unshift PrometheusMetrics::GrapeMiddleware + + # This reports basic process stats like RSS and GC info, type master + # means it is instrumenting the master process + PrometheusExporter::Instrumentation::Process.start(type: 'master') + + # NOTE: if running Puma in cluster mode, the following + # instrumentation will need to be changed + PrometheusExporter::Instrumentation::Puma.start unless PrometheusExporter::Instrumentation::Puma.started? + + # NOTE: if running Puma in cluster mode, the following + # instrumentation will need to be changed + PrometheusExporter::Instrumentation::ActiveRecord.start + end + # :nocov: + end +end diff --git a/app/lib/prometheus_metrics/grape_middleware.rb b/app/lib/prometheus_metrics/grape_middleware.rb new file mode 100644 index 00000000..e7bdb708 --- /dev/null +++ b/app/lib/prometheus_metrics/grape_middleware.rb @@ -0,0 +1,21 @@ +module PrometheusMetrics + require 'prometheus_exporter/middleware' + + class GrapeMiddleware < PrometheusExporter::Middleware + def custom_labels(env) + return unless env['api.endpoint'] + + api_version = env['api.version'] || 'n/a' + api_namespace = env['api.endpoint'].namespace + api_method = env['api.endpoint'].options[:method].first + + { + api_version:, + api_namespace:, + api_method:, + } + rescue StandardError + nil + end + end +end diff --git a/config/initializers/prometheus_exporter.rb b/config/initializers/prometheus_exporter.rb new file mode 100644 index 00000000..571a69a3 --- /dev/null +++ b/config/initializers/prometheus_exporter.rb @@ -0,0 +1,3 @@ +require 'prometheus_metrics/configuration' + +PrometheusMetrics::Configuration.configure diff --git a/config/kubernetes/production/config_map.yml b/config/kubernetes/production/config_map.yml index c41e9f79..5dc7e6b0 100644 --- a/config/kubernetes/production/config_map.yml +++ b/config/kubernetes/production/config_map.yml @@ -8,5 +8,6 @@ data: RACK_ENV: production RAILS_ENV: production RAILS_SERVE_STATIC_FILES: enabled + ENABLE_PROMETHEUS_EXPORTER: "true" # Datastore is accessed via local cluster networking (no SSL) DISABLE_HTTPS: enabled diff --git a/config/kubernetes/production/deployment.tpl b/config/kubernetes/production/deployment.tpl index 4c0d16fd..32308d13 100644 --- a/config/kubernetes/production/deployment.tpl +++ b/config/kubernetes/production/deployment.tpl @@ -26,6 +26,7 @@ spec: imagePullPolicy: Always ports: - containerPort: 3000 + - containerPort: 9394 resources: requests: cpu: 25m diff --git a/config/kubernetes/production/service.yml b/config/kubernetes/production/service.yml index 227ce9c7..51eaa683 100644 --- a/config/kubernetes/production/service.yml +++ b/config/kubernetes/production/service.yml @@ -12,3 +12,18 @@ spec: targetPort: 3000 selector: app: laa-criminal-applications-datastore-web-production +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus-service-staging + namespace: laa-criminal-applications-datastore-production + labels: + app: laa-criminal-applications-datastore-web-production +spec: + ports: + - port: 9394 + name: metrics + targetPort: 9394 + selector: + app: laa-criminal-applications-datastore-web-production diff --git a/config/kubernetes/staging/config_map.yml b/config/kubernetes/staging/config_map.yml index 033ad367..19b1e060 100644 --- a/config/kubernetes/staging/config_map.yml +++ b/config/kubernetes/staging/config_map.yml @@ -8,5 +8,6 @@ data: RACK_ENV: production RAILS_ENV: production RAILS_SERVE_STATIC_FILES: enabled + ENABLE_PROMETHEUS_EXPORTER: "true" # Datastore is accessed via local cluster networking (no SSL) DISABLE_HTTPS: enabled diff --git a/config/kubernetes/staging/deployment.tpl b/config/kubernetes/staging/deployment.tpl index d0c82897..def6deec 100644 --- a/config/kubernetes/staging/deployment.tpl +++ b/config/kubernetes/staging/deployment.tpl @@ -26,6 +26,7 @@ spec: imagePullPolicy: Always ports: - containerPort: 3000 + - containerPort: 9394 resources: requests: cpu: 25m diff --git a/config/kubernetes/staging/prometheus.yml b/config/kubernetes/staging/prometheus.yml index 94a9e1e3..d03afbd1 100644 --- a/config/kubernetes/staging/prometheus.yml +++ b/config/kubernetes/staging/prometheus.yml @@ -81,3 +81,12 @@ spec: annotations: message: Dead letter queue `{{ $labels.queue_name }}` has {{ $value }} message(s). dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/AWSSQS000/aws-sqs?orgId=1&from=now-24h&to=now&var-datasource=Cloudwatch&var-region=default&var-queue={{ $labels.queue_name }} + + - alert: CrimeApplyDatastore-PrometheusExporterFailure + expr: >- + ruby_collector_working{namespace=~"^laa-criminal-applications-datastore.*"} != 1 + for: 30m + labels: + severity: laa-crime-apply-alerts + annotations: + message: Prometheus exporter not working in pod `{{ $labels.pod }}` for more than 30m. diff --git a/config/kubernetes/staging/service.yml b/config/kubernetes/staging/service.yml index 2569f254..cc92f77b 100644 --- a/config/kubernetes/staging/service.yml +++ b/config/kubernetes/staging/service.yml @@ -12,3 +12,18 @@ spec: targetPort: 3000 selector: app: laa-criminal-applications-datastore-web-staging +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus-service-staging + namespace: laa-criminal-applications-datastore-staging + labels: + app: laa-criminal-applications-datastore-web-staging +spec: + ports: + - port: 9394 + name: metrics + targetPort: 9394 + selector: + app: laa-criminal-applications-datastore-web-staging diff --git a/docker-compose.yml b/docker-compose.yml index 6dab9e6b..442a9ed8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ version: '3.4' x-common-variables: &common-variables - ENV_NAME: staging + ENV_NAME: production RACK_ENV: production RAILS_ENV: production SECRET_KEY_BASE: 90957b5f6dab71710443434b3541698a2017f7dd01d9a19acae5b0fc8a64b3a6 @@ -35,9 +35,11 @@ services: PORT: 3003 DISABLE_HTTPS: "1" RAILS_SERVE_STATIC_FILES: "1" + ENABLE_PROMETHEUS_EXPORTER: "true" API_AUTH_SECRET_APPLY: foobar API_AUTH_SECRET_REVIEW: foobar ports: - - "3003:3003" + - "3003:3003" # puma server (rails app) + - "9394:9394" # prometheus exporter `/metrics` endpoint depends_on: - db diff --git a/run.sh b/run.sh index 1fecb61e..0ca66824 100755 --- a/run.sh +++ b/run.sh @@ -1,5 +1,5 @@ #!/bin/sh -cd /usr/src/app +cd /usr/src/app 2> /dev/null bundle exec bin/rails db:prepare bundle exec pumactl -F config/puma.rb start diff --git a/spec/lib/prometheus_metrics/grape_middleware_spec.rb b/spec/lib/prometheus_metrics/grape_middleware_spec.rb new file mode 100644 index 00000000..3484b7b0 --- /dev/null +++ b/spec/lib/prometheus_metrics/grape_middleware_spec.rb @@ -0,0 +1,46 @@ +require 'rails_helper' + +describe PrometheusMetrics::GrapeMiddleware do + describe '#custom_labels' do + subject { described_class.new(nil, { instrument: nil }).custom_labels(env) } + + context 'when it is an API request' do + let(:env) do + { + 'api.endpoint' => api_endpoint, + 'api.version' => api_version, + } + end + + # rubocop:disable RSpec/VerifiedDoubles + let(:api_endpoint) do + double('api.endpoint', namespace: '/applications', options: { method: ['GET'] }) + end + # rubocop:enable RSpec/VerifiedDoubles + + context 'when there is no version' do + let(:api_version) { nil } + + it { expect(subject).to eq({ api_method: 'GET', api_namespace: '/applications', api_version: 'n/a' }) } + end + + context 'when there is version' do + let(:api_version) { 'v1' } + + it { expect(subject).to eq({ api_method: 'GET', api_namespace: '/applications', api_version: 'v1' }) } + end + end + + context 'when it is not an API request' do + let(:env) { { 'api.endpoint' => nil } } + + it { expect(subject).to be_nil } + end + + context 'when something blows up' do + let(:env) { { 'api.endpoint' => { foo: :bar } } } + + it { expect(subject).to be_nil } + end + end +end