From 44b1c4529f6eb8ded046f77c169756e1b29c283d Mon Sep 17 00:00:00 2001 From: Peter Mangiafico Date: Wed, 12 May 2021 11:25:37 -0700 Subject: [PATCH] add additional tests --- lib/agent/author_name.rb | 7 +- spec/lib/agent/author_name_spec.rb | 15 +++- spec/lib/web_of_science/query_author_spec.rb | 79 +++++++++++++++++--- 3 files changed, 82 insertions(+), 19 deletions(-) diff --git a/lib/agent/author_name.rb b/lib/agent/author_name.rb index 25bad4fc1..5610c64ae 100644 --- a/lib/agent/author_name.rb +++ b/lib/agent/author_name.rb @@ -50,12 +50,7 @@ def full_name # an 'or' conjuction is likely to generate results that mostly match this variant, # but additional variants might add something when using an 'ExactMatch' search. # @return [String] name(s) to be queried in an OR (disjunction) query - def text_search_query - text_search_terms.map { |x| "\"#{x}\"" }.join(' or ') - end - - def text_search_terms(options = {}) - use_first_initial = options[:use_first_initial] || true + def text_search_terms(use_first_initial: true) @text_search_terms ||= [first_name_query(use_first_initial), middle_name_query(use_first_initial)].flatten.reject(&:empty?).uniq end diff --git a/spec/lib/agent/author_name_spec.rb b/spec/lib/agent/author_name_spec.rb index 88aefd528..a13278837 100644 --- a/spec/lib/agent/author_name_spec.rb +++ b/spec/lib/agent/author_name_spec.rb @@ -120,10 +120,21 @@ end describe '#text_search_terms' do - it 'includes first_name_query and middle_name_query elements' do + it 'includes first_name_query and middle_name_query elements when first initial is unique' do fnames = all_names.send(:first_name_query, true) mnames = all_names.send(:middle_name_query, true) - expect(all_names.text_search_terms).to include(*fnames, *mnames) + expect(fnames.size).to eq 2 # two name variants, full first name plus first initial + expect(mnames.size).to eq 4 # four name variants, which include middle name and middle initial variants + expect(all_names.text_search_terms).to include(*fnames, *mnames) # default is to use first initial, this verifies + expect(all_names.text_search_terms(use_first_initial: true)).to include(*fnames, *mnames) + end + + it 'includes first_name_query and middle_name_query elements when first initial is not unique' do + fnames = all_names.send(:first_name_query, false) + mnames = all_names.send(:middle_name_query, false) + expect(fnames.size).to eq 1 # only one name variant with only full first name (i.e. no first initial) + expect(mnames.size).to eq 2 # two name variants, includes full middle name and middle initial + expect(all_names.text_search_terms(use_first_initial: false)).to include(*fnames, *mnames) end end diff --git a/spec/lib/web_of_science/query_author_spec.rb b/spec/lib/web_of_science/query_author_spec.rb index 3a8eea183..94a608e2a 100644 --- a/spec/lib/web_of_science/query_author_spec.rb +++ b/spec/lib/web_of_science/query_author_spec.rb @@ -13,6 +13,9 @@ let(:blank_author) { create :author, :blank_first_name } let(:names) { query_author.send(:names) } + let(:alternate_identity) { create :author_identity } # this creates the associated author as well + let(:alternate_author_identity) { alternate_identity.author } + # avoid caching Savon client across examples (affects VCR) before { allow(WebOfScience).to receive(:client).and_return(WebOfScience::Client.new(Settings.WOS.AUTH_CODE)) } @@ -121,21 +124,75 @@ end end - context 'for a single alternate identity with invalid data' do + context 'for a single alternate identity' do + let(:alt_last_name) { alternate_identity.last_name } + let(:alt_first_name) { alternate_identity.first_name } + let(:alt_middle_name) { alternate_identity.middle_name } + describe '#names' do - let(:author_one_identity) { create :author } - let(:bad_alternate_identity) { create :author_identity } + context 'with invalid data and ambiguous first name' do + it 'ignores the bad alternate identity data and first initial variants' do + alternate_identity.update(first_name: '.', institution: 'Example') + expect(alternate_author_identity.unique_first_initial?).to be false # because of a non-Stanford alternate identity + expect(alternate_author_identity.author_identities.first.first_name).to eq '.' # bad first name + # we do not get the name variant with the period for a first name (i.e. the alternate identity) + # nor do we get first initial variants because of the ambiguous first initial + # (we would have more if we allowed the bad name variant and the ambiguous first initial) + expect(described_class.new(alternate_author_identity).send(:names)).to match_array %w[Edler,Alice + Edler,Alice,Jim + Edler,Alice,J] + end + end + + context 'with invalid data and non-ambiguous first name' do + it 'ignores the bad alternate identity data but includes first initial variants' do + alternate_identity.update(first_name: '.', institution: 'Stanford') + expect(alternate_author_identity.unique_first_initial?).to be true # because alternate identity is Stanford and unique + expect(alternate_author_identity.author_identities.first.first_name).to eq '.' # bad first name + # we do not get the name variant with the period for a first name (i.e. no alternate identity) + expect(described_class.new(alternate_author_identity).send(:names)).to match_array %w[Edler,Alice + Edler,A + Edler,Alice,Jim + Edler,Alice,J + Edler,AJ + Edler,A,J] + end + end - before do - bad_alternate_identity.update(first_name: '.') - author_one_identity.author_identities << bad_alternate_identity + context 'with valid data and ambiguous first name' do + it 'ignores the first initial variants' do + alternate_identity.update(first_name: 'Sam', institution: 'Example') + expect(alternate_author_identity.unique_first_initial?).to be false # because of a non-Stanford alternate identity + # we do not get first initial variants because of the ambiguous first initial + # but we do get the other variants with the alternate identity + # (we would have more if we allowed the bad name variant and the ambiguous first initial) + expect(described_class.new(alternate_author_identity).send(:names)).to match_array ['Edler,Alice', + 'Edler,Alice,Jim', + 'Edler,Alice,J', + "#{alt_last_name},#{alt_first_name}", + "#{alt_last_name},#{alt_first_name},#{alt_middle_name}", + "#{alt_last_name},#{alt_first_name},#{alt_middle_name[0]}"] + end end - it 'ignores the bad alternate identity data' do - expect(author_one_identity.author_identities.first.first_name).to eq '.' # bad first name - # we do not get the name variant with the period for a first name (we would have more if we allowed the bad name variant) - expect(described_class.new(author_one_identity).send(:names)).to eq %w[Edler,Alice Edler,A Edler,Alice,Jim - Edler,Alice,J Edler,AJ Edler,A,J] + context 'with valid data and non-ambiguous first name' do + it 'includes all name variants' do + alternate_identity.update(first_name: 'Alice2', institution: 'Stanford') + expect(alternate_author_identity.unique_first_initial?).to be true # because alternate identity is Stanford and unique + # we get all variants with first initials and also the alternate identity + expect(described_class.new(alternate_author_identity).send(:names)).to match_array ['Edler,Alice', + 'Edler,A', + 'Edler,Alice,Jim', + 'Edler,Alice,J', + 'Edler,AJ', + 'Edler,A,J', + "#{alt_last_name},#{alt_first_name}", + "#{alt_last_name},#{alt_first_name[0]}", + "#{alt_last_name},#{alt_first_name},#{alt_middle_name}", + "#{alt_last_name},#{alt_first_name},#{alt_middle_name[0]}", + "#{alt_last_name},#{alt_first_name[0]}#{alt_middle_name[0]}", + "#{alt_last_name},#{alt_first_name[0]},#{alt_middle_name[0]}"] + end end end end