Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

do not remove middle name variants from the query when the user has a middle name entered #1082

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions lib/agent/author_name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def text_search_query

def text_search_terms
@text_search_terms ||=
[first_name_query, middle_name_query].flatten.reject(&:empty?).uniq
[name_query].flatten.reject(&:empty?).uniq
end

def ==(other)
Expand All @@ -64,29 +64,21 @@ def ==(other)

private

# Name variants for:
# 'Lastname,Firstname' or
# 'Lastname,FirstInitial'
# @return [Array<String>|String] names
def first_name_query
# Name variants that include only last,first if there is no middle name entered
# or includes the middle name and middle initial variants if a middle name exists (has at least one alpha character starting)
# also add first initial variant if we allow this in the settings
def name_query
return '' if last.empty? && first.empty?
query = ["#{last_name},#{first_name}"]
if middle =~ /^[[:alpha:]]/
query = ["#{last_name},#{first_name},#{middle_name}", "#{last_name},#{first_name},#{middle_initial}"]
else
query = ["#{last_name},#{first_name}"]
query += ["#{last_name},#{first_initial}#{middle_initial}", "#{last_name},#{first_initial},#{middle_initial}"] if Settings.HARVESTER.USE_FIRST_INITIAL
end
query += ["#{last_name},#{first_initial}"] if Settings.HARVESTER.USE_FIRST_INITIAL
query
end

# Name variants for:
# 'Lastname,Firstname,Middlename' or
# 'Lastname,Firstname,MiddleInitial' or
# 'Lastname,FirstInitial,MiddleInitial'
# @return [Array<String>|String] names
def middle_name_query
return '' unless middle =~ /^[[:alpha:]]/
query = ["#{last_name},#{first_name},#{middle_name}", "#{last_name},#{first_name},#{middle_initial}"]
query += ["#{last_name},#{first_initial}#{middle_initial}", "#{last_name},#{first_initial},#{middle_initial}"] if Settings.HARVESTER.USE_FIRST_INITIAL
query
end

# Some names may contain particles, e.g. the
# PubmedSourceRecord#author_to_hash checks for particles like:
# /el-|el |da |de |del |do |dos |du |le /
Expand Down
82 changes: 34 additions & 48 deletions spec/lib/agent/author_name_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
let(:ln) { 'Stanford' }
let(:all_names) { described_class.new(ln, fn, mn) }
let(:no_names) { described_class.new(nil, nil, nil) }
let(:no_middle_name) { described_class.new(ln, fn, '') }

describe '#initialize' do
it 'casts empty names to strings' do
Expand Down Expand Up @@ -107,28 +108,25 @@
describe '#text_search_query' do
context 'when all names are present' do
# additional SW specs are in publication_query_by_author_name_spec.rb
it 'includes first_name_query and middle_name_query elements' do
allow(all_names).to receive(:first_name_query).and_return(['abc', 'def'])
allow(all_names).to receive(:middle_name_query).and_return(['qrs', 'xyz'])
expect(all_names.text_search_query).to eq "\"abc\" or \"def\" or \"qrs\" or \"xyz\""
it 'includes appropriate name elements' do
expect(all_names.text_search_query).to eq "\"Stanford,Amasa,Leland\" or \"Stanford,Amasa,L\""
end
end
end

describe '#text_search_terms' do
it 'includes first_name_query and middle_name_query elements' do
fnames = all_names.send(:first_name_query)
mnames = all_names.send(:middle_name_query)
expect(all_names.text_search_terms).to include(*fnames, *mnames)
it 'includes name_query elements' do
fnames = all_names.send(:name_query)
expect(all_names.text_search_terms).to include(*fnames)
end
end

describe '#first_name_query' do
describe '#name_query' do
it 'when no names are present returns an empty String' do
expect(no_names.send(:first_name_query)).to eq ''
expect(no_names.send(:name_query)).to eq ''
end
context 'when all names are present' do
let(:fn_query) { all_names.send(:first_name_query) }
let(:fn_query) { all_names.send(:name_query) }
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(false)
end
Expand All @@ -138,70 +136,58 @@
expect(fn_query).not_to include(be_empty)
expect(fn_query.size).to eq(fn_query.uniq.size)
end
it 'includes name with first_name' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name}"
it 'does not include only first_name variant (since middle name exists)' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name}"
end
it 'excludes name with first_initial when settings do not allow for it' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}"
end
it 'does not include name with middle_name' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
expect(fn_query).to all(exclude(",#{all_names.middle_name}"))
it 'includes name with middle_name' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
end
it 'does not include name with middle_initial' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_initial}"
expect(fn_query).to all(exclude(",#{all_names.middle_initial}"))
it 'includes name with middle_initial variant' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_initial}"
end
end
context 'when all names are present and settings allow for first initial' do
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(true)
end
let(:fn_query) { all_names.send(:first_name_query) }
let(:fn_query) { all_names.send(:name_query) }
it 'includes name with first_initial when settings allow for it' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_initial}"
end
end
end

describe '#middle_name_query' do
it 'when no names are present returns an empty String' do
expect(no_names.send(:middle_name_query)).to eq ''
end
context 'when all names are present' do
let(:mn_query) { all_names.send(:middle_name_query) }
context 'when middle name not present' do
let(:fn_query) { no_middle_name.send(:name_query) }
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(false)
end
it 'is Array<String> with non-empty unique values' do
expect(mn_query).to be_an Array
expect(mn_query).to all(be_a(String))
expect(mn_query).not_to include(be_empty)
expect(mn_query.size).to eq(mn_query.uniq.size)
end
it 'includes name with middle_name' do
expect(mn_query).to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
end
it 'includes name with middle_initial' do
expect(mn_query).to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_initial}"
it 'includes name with first_name' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name}"
end
it 'does not include last_name,first_name' do
expect(mn_query).not_to include "#{all_names.last_name},#{all_names.first_name}"
it 'excludes name with first_initial when settings do not allow for it' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}"
end
it 'does not include last_name,first_initial' do
expect(mn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}"
it 'does not include middle_name variants' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},"
expect(fn_query).to all(exclude(",#{all_names.middle_name}"))
end
it 'excludes name with middle_initial appended to first initial when settings do not allow for it' do
expect(mn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}#{all_names.middle_initial}"
it 'does not include middle_initial variants' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_initial}"
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},"
expect(fn_query).to all(exclude(",#{all_names.middle_initial}"))
end
end
context 'when all names are present and settings allow for first initial' do
let(:mn_query) { all_names.send(:middle_name_query) }
context 'when middle names not present and settings allow for first initial' do
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(true)
end
it 'includes name with middle_initial appended to first initial when settings allow for it' do
expect(mn_query).to include "#{all_names.last_name},#{all_names.first_initial}#{all_names.middle_initial}"
let(:fn_query) { no_middle_name.send(:name_query) }
it 'includes name with first_initial when settings allow for it' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_initial}"
end
end
end
Expand Down
4 changes: 2 additions & 2 deletions spec/lib/web_of_science/query_author_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@

it 'ignores the bad alternate identity data' do
expect(author_one_identity.author_identities.first.first_name).to eq '.' # bad first name
# we get three name variants out (we would have more if we allowed the bad name variant)
expect(described_class.new(author_one_identity).send(:names)).to eq %w[Edler,Alice Edler,Alice,Jim Edler,Alice,J]
# we get two name variants out (we would have more if we allowed the bad name variant)
expect(described_class.new(author_one_identity).send(:names)).to eq %w[Edler,Alice,Jim Edler,Alice,J]
end
end
end
Expand Down