diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb index 3583297..11bd296 100644 --- a/app/models/detector/suggested_resource.rb +++ b/app/models/detector/suggested_resource.rb @@ -22,13 +22,20 @@ module Detector class SuggestedResource < ApplicationRecord before_save :update_fingerprint + # This exists for the before_save lifecycle hook to call the calculate_fingerprint method, to ensure that these + # records always have a correctly-calculated fingerprint. It has no arguments and returns nothing. def update_fingerprint - self.fingerprint = calculate_fingerprint(phrase) + self.fingerprint = Detector::SuggestedResource.calculate_fingerprint(phrase) end # This implements the OpenRefine fingerprinting algorithm. See # https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint - def calculate_fingerprint(old_phrase) + # + # @param old_phrase [String] A text string which needs to have its fingerprint calculated. This could either be the + # "phrase" field on the SuggestedResource record, or an incoming search term received from a contributing system. + # + # @return [String] A string of all words in the input, downcased, normalized, and alphabetized. + def self.calculate_fingerprint(old_phrase) modified_phrase = old_phrase modified_phrase = modified_phrase.strip modified_phrase = modified_phrase.downcase @@ -76,5 +83,18 @@ def self.bulk_replace(input) record.save end end + + # Identify any SuggestedResource record whose pre-calculated fingerprint matches the fingerprint of the incoming + # phrase. + # + # @note There is a uniqueness constraint on the SuggestedResource fingerprint field, so there should only ever be + # one match (if any). + # + # @param phrase [String]. A string representation of a searchterm (not an actual Term object) + # + # @return [Detector::SuggestedResource] The record whose fingerprint matches that of the search term. + def self.full_term_match(phrase) + SuggestedResource.where(fingerprint: calculate_fingerprint(phrase)) + end end end diff --git a/app/models/metrics/algorithms.rb b/app/models/metrics/algorithms.rb index 2ba9b24..d3e543d 100644 --- a/app/models/metrics/algorithms.rb +++ b/app/models/metrics/algorithms.rb @@ -45,6 +45,7 @@ def generate(month = nil) end Metrics::Algorithms.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], pmid: matches[:pmid], journal_exact: matches[:journal_exact], + suggested_resource_exact: matches[:suggested_resource_exact], unmatched: matches[:unmatched]) end @@ -73,8 +74,9 @@ def count_matches(events) def event_matches(event, matches) ids = match_standard_identifiers(event, matches) journal_exact = process_journals(event, matches) + suggested_resource_exact = process_suggested_resources(event, matches) - matches[:unmatched] += 1 if ids.identifiers.blank? && journal_exact.count.zero? + matches[:unmatched] += 1 if ids.identifiers.blank? && journal_exact.count.zero? && suggested_resource_exact.count.zero? end # Checks for StandardIdentifer matches @@ -107,5 +109,21 @@ def process_journals(event, matches) matches[:journal_exact] += 1 if journal_exact.count.positive? journal_exact end + + # Checks for SuggestedResource matches + # + # @note This only checks for exact matches of the search term, so any extra or missing words will result in no + # match. + # + # @param event [SearchEvent] an individual search event to check for matches + # @param matches [Hash] a Hash that keeps track of how many of each algorithm we match + # @return [Array] an array of the one Detector::SuggestedResource record whose fingerprint matches that of the + # search phrase (if one exists). The uniqueness constraint on the fingerprint should mean there is only ever one + # matched record. + def process_suggested_resources(event, matches) + suggested_resource_exact = Detector::SuggestedResource.full_term_match(event.term.phrase) + matches[:suggested_resource_exact] += 1 if suggested_resource_exact.count.positive? + suggested_resource_exact + end end end diff --git a/db/migrate/20240813181057_add_suggested_resource_exact_to_metrics_algorithm.rb b/db/migrate/20240813181057_add_suggested_resource_exact_to_metrics_algorithm.rb new file mode 100644 index 0000000..7d6257c --- /dev/null +++ b/db/migrate/20240813181057_add_suggested_resource_exact_to_metrics_algorithm.rb @@ -0,0 +1,5 @@ +class AddSuggestedResourceExactToMetricsAlgorithm < ActiveRecord::Migration[7.1] + def change + add_column :metrics_algorithms, :suggested_resource_exact, :integer + end +end diff --git a/db/schema.rb b/db/schema.rb index a7d1c32..5be5861 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_08_05_203949) do +ActiveRecord::Schema[7.1].define(version: 2024_08_13_181057) do create_table "detector_journals", force: :cascade do |t| t.string "name" t.json "additional_info" @@ -40,6 +40,7 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.integer "journal_exact" + t.integer "suggested_resource_exact" end create_table "search_events", force: :cascade do |t| diff --git a/test/fixtures/detector/suggested_resources.yml b/test/fixtures/detector/suggested_resources.yml index 82c43a7..bf56056 100644 --- a/test/fixtures/detector/suggested_resources.yml +++ b/test/fixtures/detector/suggested_resources.yml @@ -29,3 +29,9 @@ web_of_knowledge: url: https://libguides.mit.edu/webofsci phrase: web of knowledge fingerprint: knowledge of web + +nobel_laureate: + title: Professor Moungi Bawendi + url: https://news.mit.edu/2023/mit-chemist-moungi-bawendi-shares-nobel-prize-chemistry-1004 + phrase: moungi bawendi + fingerprint: bawendi moungi diff --git a/test/fixtures/search_events.yml b/test/fixtures/search_events.yml index b99b4cb..168250f 100644 --- a/test/fixtures/search_events.yml +++ b/test/fixtures/search_events.yml @@ -38,3 +38,10 @@ old_month_nature_medicine: term: journal_nature_medicine source: test created_at: <%= 1.year.ago %> +suggested_resource_jstor: + term: suggested_resource_jstor + source: test +old_suggested_resource_jstor: + term: suggested_resource_jstor + source: test + created_at: <%= 1.year.ago %> diff --git a/test/fixtures/terms.yml b/test/fixtures/terms.yml index 121cf0f..ef2114e 100644 --- a/test/fixtures/terms.yml +++ b/test/fixtures/terms.yml @@ -28,3 +28,6 @@ isbn_9781319145446: journal_nature_medicine: phrase: 'nature medicine' + +suggested_resource_jstor: + phrase: 'jstor' diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb index 57031b9..815f497 100644 --- a/test/models/detector/suggested_resource_test.rb +++ b/test/models/detector/suggested_resource_test.rb @@ -125,5 +125,31 @@ class SuggestedResourceTest < ActiveSupport::TestCase assert_equal 'delta gamma', resource.fingerprint end + + test 'fingerprint matches on search term' do + expected = detector_suggested_resources('jstor') + actual = Detector::SuggestedResource.full_term_match('jstor') + + assert_equal 1, actual.count + assert_equal expected, actual.first + end + + test 'fingerprint matches on any word order or punctuation' do + expected = detector_suggested_resources('nobel_laureate') + actual_one = Detector::SuggestedResource.full_term_match('Moungi Bawendi') + actual_two = Detector::SuggestedResource.full_term_match('Bawendi, Moungi') + + assert_equal 1, actual_one.count + assert_equal expected, actual_one.first + assert_equal actual_one.first, actual_two.first + end + + test 'partial fingerprint matches do not count' do + actual_partial = Detector::SuggestedResource.full_term_match('science web') + actual_extra = Detector::SuggestedResource.full_term_match('the web of science') + + assert_predicate actual_partial.count, :zero? + assert_predicate actual_extra.count, :zero? + end end end diff --git a/test/models/metrics/algorithms_test.rb b/test/models/metrics/algorithms_test.rb index 279a081..7c22377 100644 --- a/test/models/metrics/algorithms_test.rb +++ b/test/models/metrics/algorithms_test.rb @@ -49,6 +49,12 @@ class Algorithms < ActiveSupport::TestCase assert_equal 1, aggregate.journal_exact end + test 'suggested_resource exact counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + + assert_equal 1, aggregate.suggested_resource_exact + end + test 'unmatched counts are included are included in monthly aggregation' do aggregate = Metrics::Algorithms.new.generate(DateTime.now) @@ -124,6 +130,12 @@ class Algorithms < ActiveSupport::TestCase assert_equal 2, aggregate.journal_exact end + test 'suggested_resource exact counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + + assert_equal 2, aggregate.suggested_resource_exact + end + test 'unmatched counts are included are included in total aggregation' do aggregate = Metrics::Algorithms.new.generate @@ -159,6 +171,11 @@ class Algorithms < ActiveSupport::TestCase SearchEvent.create(term: terms(:journal_nature_medicine), source: 'test') end + suggested_resource_exact_count = rand(1...100) + suggested_resource_exact_count.times do + SearchEvent.create(term: terms(:suggested_resource_jstor), source: 'test') + end + unmatched_expected_count = rand(1...100) unmatched_expected_count.times do SearchEvent.create(term: terms(:hi), source: 'test') @@ -171,6 +188,7 @@ class Algorithms < ActiveSupport::TestCase assert_equal isbn_expected_count, aggregate.isbn assert_equal pmid_expected_count, aggregate.pmid assert_equal journal_exact_count, aggregate.journal_exact + assert_equal suggested_resource_exact_count, aggregate.suggested_resource_exact assert_equal unmatched_expected_count, aggregate.unmatched end end