Skip to content

Commit

Permalink
add additional tests
Browse files Browse the repository at this point in the history
  • Loading branch information
peetucket committed Jun 8, 2021
1 parent 9d03b1b commit 44b1c45
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 19 deletions.
7 changes: 1 addition & 6 deletions lib/agent/author_name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,7 @@ def full_name
# an 'or' conjuction is likely to generate results that mostly match this variant,
# but additional variants might add something when using an 'ExactMatch' search.
# @return [String] name(s) to be queried in an OR (disjunction) query
def text_search_query
text_search_terms.map { |x| "\"#{x}\"" }.join(' or ')
end

def text_search_terms(options = {})
use_first_initial = options[:use_first_initial] || true
def text_search_terms(use_first_initial: true)
@text_search_terms ||=
[first_name_query(use_first_initial), middle_name_query(use_first_initial)].flatten.reject(&:empty?).uniq
end
Expand Down
15 changes: 13 additions & 2 deletions spec/lib/agent/author_name_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,21 @@
end

describe '#text_search_terms' do
it 'includes first_name_query and middle_name_query elements' do
it 'includes first_name_query and middle_name_query elements when first initial is unique' do
fnames = all_names.send(:first_name_query, true)
mnames = all_names.send(:middle_name_query, true)
expect(all_names.text_search_terms).to include(*fnames, *mnames)
expect(fnames.size).to eq 2 # two name variants, full first name plus first initial
expect(mnames.size).to eq 4 # four name variants, which include middle name and middle initial variants
expect(all_names.text_search_terms).to include(*fnames, *mnames) # default is to use first initial, this verifies
expect(all_names.text_search_terms(use_first_initial: true)).to include(*fnames, *mnames)
end

it 'includes first_name_query and middle_name_query elements when first initial is not unique' do
fnames = all_names.send(:first_name_query, false)
mnames = all_names.send(:middle_name_query, false)
expect(fnames.size).to eq 1 # only one name variant with only full first name (i.e. no first initial)
expect(mnames.size).to eq 2 # two name variants, includes full middle name and middle initial
expect(all_names.text_search_terms(use_first_initial: false)).to include(*fnames, *mnames)
end
end

Expand Down
79 changes: 68 additions & 11 deletions spec/lib/web_of_science/query_author_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
let(:blank_author) { create :author, :blank_first_name }
let(:names) { query_author.send(:names) }

let(:alternate_identity) { create :author_identity } # this creates the associated author as well
let(:alternate_author_identity) { alternate_identity.author }

# avoid caching Savon client across examples (affects VCR)
before { allow(WebOfScience).to receive(:client).and_return(WebOfScience::Client.new(Settings.WOS.AUTH_CODE)) }

Expand Down Expand Up @@ -121,21 +124,75 @@
end
end

context 'for a single alternate identity with invalid data' do
context 'for a single alternate identity' do
let(:alt_last_name) { alternate_identity.last_name }
let(:alt_first_name) { alternate_identity.first_name }
let(:alt_middle_name) { alternate_identity.middle_name }

describe '#names' do
let(:author_one_identity) { create :author }
let(:bad_alternate_identity) { create :author_identity }
context 'with invalid data and ambiguous first name' do
it 'ignores the bad alternate identity data and first initial variants' do
alternate_identity.update(first_name: '.', institution: 'Example')
expect(alternate_author_identity.unique_first_initial?).to be false # because of a non-Stanford alternate identity
expect(alternate_author_identity.author_identities.first.first_name).to eq '.' # bad first name
# we do not get the name variant with the period for a first name (i.e. the alternate identity)
# nor do we get first initial variants because of the ambiguous first initial
# (we would have more if we allowed the bad name variant and the ambiguous first initial)
expect(described_class.new(alternate_author_identity).send(:names)).to match_array %w[Edler,Alice
Edler,Alice,Jim
Edler,Alice,J]
end
end

context 'with invalid data and non-ambiguous first name' do
it 'ignores the bad alternate identity data but includes first initial variants' do
alternate_identity.update(first_name: '.', institution: 'Stanford')
expect(alternate_author_identity.unique_first_initial?).to be true # because alternate identity is Stanford and unique
expect(alternate_author_identity.author_identities.first.first_name).to eq '.' # bad first name
# we do not get the name variant with the period for a first name (i.e. no alternate identity)
expect(described_class.new(alternate_author_identity).send(:names)).to match_array %w[Edler,Alice
Edler,A
Edler,Alice,Jim
Edler,Alice,J
Edler,AJ
Edler,A,J]
end
end

before do
bad_alternate_identity.update(first_name: '.')
author_one_identity.author_identities << bad_alternate_identity
context 'with valid data and ambiguous first name' do
it 'ignores the first initial variants' do
alternate_identity.update(first_name: 'Sam', institution: 'Example')
expect(alternate_author_identity.unique_first_initial?).to be false # because of a non-Stanford alternate identity
# we do not get first initial variants because of the ambiguous first initial
# but we do get the other variants with the alternate identity
# (we would have more if we allowed the bad name variant and the ambiguous first initial)
expect(described_class.new(alternate_author_identity).send(:names)).to match_array ['Edler,Alice',
'Edler,Alice,Jim',
'Edler,Alice,J',
"#{alt_last_name},#{alt_first_name}",
"#{alt_last_name},#{alt_first_name},#{alt_middle_name}",
"#{alt_last_name},#{alt_first_name},#{alt_middle_name[0]}"]
end
end

it 'ignores the bad alternate identity data' do
expect(author_one_identity.author_identities.first.first_name).to eq '.' # bad first name
# we do not get the name variant with the period for a first name (we would have more if we allowed the bad name variant)
expect(described_class.new(author_one_identity).send(:names)).to eq %w[Edler,Alice Edler,A Edler,Alice,Jim
Edler,Alice,J Edler,AJ Edler,A,J]
context 'with valid data and non-ambiguous first name' do
it 'includes all name variants' do
alternate_identity.update(first_name: 'Alice2', institution: 'Stanford')
expect(alternate_author_identity.unique_first_initial?).to be true # because alternate identity is Stanford and unique
# we get all variants with first initials and also the alternate identity
expect(described_class.new(alternate_author_identity).send(:names)).to match_array ['Edler,Alice',
'Edler,A',
'Edler,Alice,Jim',
'Edler,Alice,J',
'Edler,AJ',
'Edler,A,J',
"#{alt_last_name},#{alt_first_name}",
"#{alt_last_name},#{alt_first_name[0]}",
"#{alt_last_name},#{alt_first_name},#{alt_middle_name}",
"#{alt_last_name},#{alt_first_name},#{alt_middle_name[0]}",
"#{alt_last_name},#{alt_first_name[0]}#{alt_middle_name[0]}",
"#{alt_last_name},#{alt_first_name[0]},#{alt_middle_name[0]}"]
end
end
end
end
Expand Down

0 comments on commit 44b1c45

Please sign in to comment.