Skip to content

Commit

Permalink
automatically only use first initial when not ambiguous
Browse files Browse the repository at this point in the history
  • Loading branch information
peetucket committed Mar 20, 2019
1 parent ccc6148 commit c4c7494
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 34 deletions.
7 changes: 7 additions & 0 deletions app/models/author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ def institution
Settings.HARVESTER.INSTITUTION.name
end

# indicates if the LastName, FirstInitial form for this user is ambiguous within our author database and there are no alternate identities
def ambiguous_first_initial?
return true unless first_name && last_name
first_initial_not_unique = self.class.where('preferred_first_name like ? and preferred_last_name = ?', "#{first_name[0]}%", last_name).where(active_in_cap: true, cap_import_enabled: true).size > 1
(first_initial_not_unique || !author_identities.empty?)
end

# @return [Array<Integer>] ScienceWireIds for approved publications
def approved_sciencewire_ids
publications.where("contributions.status = 'approved'")
Expand Down
1 change: 0 additions & 1 deletion config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ DOI:

HARVESTER:
USE_MIDDLE_NAME: true
USE_FIRST_INITIAL: false
USE_AUTHOR_IDENTITIES: false
INSTITUTION:
name: Stanford University
Expand Down
15 changes: 8 additions & 7 deletions lib/agent/author_name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ def text_search_query
text_search_terms.map { |x| "\"#{x}\"" }.join(' or ')
end

def text_search_terms
def text_search_terms(options = {})
use_first_initial = options[:use_first_initial] || true
@text_search_terms ||=
[first_name_query, middle_name_query].flatten.reject(&:empty?).uniq
[first_name_query(use_first_initial), middle_name_query(use_first_initial)].flatten.reject(&:empty?).uniq
end

def ==(other)
Expand All @@ -68,10 +69,10 @@ def ==(other)
# 'Lastname,Firstname' or
# 'Lastname,FirstInitial'
# @return [Array<String>|String] names
def first_name_query
def first_name_query(use_first_initial)
return '' if last.empty? && first.empty?
query = ["#{last_name},#{first_name}"]
query += ["#{last_name},#{first_initial}"] if Settings.HARVESTER.USE_FIRST_INITIAL
query = ["#{last_name},#{first_name}"]
query += ["#{last_name},#{first_initial}"] if use_first_initial
query
end

Expand All @@ -80,10 +81,10 @@ def first_name_query
# 'Lastname,Firstname,MiddleInitial' or
# 'Lastname,FirstInitial,MiddleInitial'
# @return [Array<String>|String] names
def middle_name_query
def middle_name_query(use_first_initial)
return '' unless middle =~ /^[[:alpha:]]/
query = ["#{last_name},#{first_name},#{middle_name}", "#{last_name},#{first_name},#{middle_initial}"]
query += ["#{last_name},#{first_initial}#{middle_initial}", "#{last_name},#{first_initial},#{middle_initial}"] if Settings.HARVESTER.USE_FIRST_INITIAL
query += ["#{last_name},#{first_initial}#{middle_initial}", "#{last_name},#{first_initial},#{middle_initial}"] if use_first_initial
query
end

Expand Down
2 changes: 1 addition & 1 deletion lib/web_of_science/query_author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def names
ident.last_name,
ident.first_name,
Settings.HARVESTER.USE_MIDDLE_NAME ? ident.middle_name : ''
).text_search_terms
).text_search_terms(use_first_initial: !author.ambiguous_first_initial?)
end.flatten.uniq
end

Expand Down
13 changes: 13 additions & 0 deletions spec/factories/author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@
end
end

factory :odd_name, parent: :author do
active_in_cap { true }
cap_import_enabled { true }
official_first_name { 'Somebody' }
official_last_name { 'WithReallyUnusualName' }
official_middle_name { '' }
preferred_first_name { 'Somebody' }
preferred_last_name { 'WithReallyUnusualName' }
preferred_middle_name { '' }
email { '[email protected]' }
emails_for_harvest { '[email protected]' }
end

# Public data from
# - https://stanfordwho.stanford.edu
# - https://med.stanford.edu/profiles/russ-altman
Expand Down
55 changes: 30 additions & 25 deletions spec/lib/agent/author_name_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,18 @@

describe '#text_search_terms' do
it 'includes first_name_query and middle_name_query elements' do
fnames = all_names.send(:first_name_query)
mnames = all_names.send(:middle_name_query)
fnames = all_names.send(:first_name_query, true)
mnames = all_names.send(:middle_name_query, true)
expect(all_names.text_search_terms).to include(*fnames, *mnames)
end
end

describe '#first_name_query' do
it 'when no names are present returns an empty String' do
expect(no_names.send(:first_name_query)).to eq ''
expect(no_names.send(:first_name_query, true)).to eq ''
end
context 'when all names are present' do
let(:fn_query) { all_names.send(:first_name_query) }
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(false)
end
context 'when all names are present with middle initial' do
let(:fn_query) { all_names.send(:first_name_query, true) }
it 'is Array<String> with non-empty unique values' do
expect(fn_query).to be_an Array
expect(fn_query).to all(be_a(String))
Expand All @@ -141,8 +138,8 @@
it 'includes name with first_name' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name}"
end
it 'excludes name with first_initial when settings do not allow for it' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}"
it 'includes name with first_initial when settings allow for it' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_initial}"
end
it 'does not include name with middle_name' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
Expand All @@ -153,26 +150,37 @@
expect(fn_query).to all(exclude(",#{all_names.middle_initial}"))
end
end
context 'when all names are present and settings allow for first initial' do
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(true)
context 'when all names are present without middle initial' do
let(:fn_query) { all_names.send(:first_name_query, false) }
it 'is Array<String> with non-empty unique values' do
expect(fn_query).to be_an Array
expect(fn_query).to all(be_a(String))
expect(fn_query).not_to include(be_empty)
expect(fn_query.size).to eq(fn_query.uniq.size)
end
let(:fn_query) { all_names.send(:first_name_query) }
it 'includes name with first_initial when settings allow for it' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_initial}"
it 'includes name with first_name' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name}"
end
it 'does not include name with first_initial' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}"
end
it 'does not include name with middle_name' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
expect(fn_query).to all(exclude(",#{all_names.middle_name}"))
end
it 'does not include name with middle_initial' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_initial}"
expect(fn_query).to all(exclude(",#{all_names.middle_initial}"))
end
end
end

describe '#middle_name_query' do
it 'when no names are present returns an empty String' do
expect(no_names.send(:middle_name_query)).to eq ''
expect(no_names.send(:middle_name_query, false)).to eq ''
end
context 'when all names are present' do
let(:mn_query) { all_names.send(:middle_name_query) }
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(false)
end
let(:mn_query) { all_names.send(:middle_name_query, false) }
it 'is Array<String> with non-empty unique values' do
expect(mn_query).to be_an Array
expect(mn_query).to all(be_a(String))
Expand All @@ -196,10 +204,7 @@
end
end
context 'when all names are present and settings allow for first initial' do
let(:mn_query) { all_names.send(:middle_name_query) }
before do
allow(Settings.HARVESTER).to receive(:USE_FIRST_INITIAL).and_return(true)
end
let(:mn_query) { all_names.send(:middle_name_query, true) }
it 'includes name with middle_initial appended to first initial when settings allow for it' do
expect(mn_query).to include "#{all_names.last_name},#{all_names.first_initial}#{all_names.middle_initial}"
end
Expand Down
13 changes: 13 additions & 0 deletions spec/models/author_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@
end
end

describe '#ambiguous_first_initial?' do
it 'confirms ambiguous first initial' do
subject.update_from_cap_authorship_profile_hash(auth_hash)
expect(subject.author_identities.size).to eq(2) # has alternate identities
expect(subject.ambiguous_first_initial?).to eq(true) # thus cannot search with first initial
end
it 'confirms non-ambiguous first initial' do
odd_name = create :odd_name
expect(odd_name.author_identities.size).to eq(0) # has no alternate identities
expect(odd_name.ambiguous_first_initial?).to eq(false) # and no other odd names likes this at stanford, so ok to search with first initial
end
end

describe '#first_name' do
it 'is the preferred_first_name' do
subject.update_from_cap_authorship_profile_hash(auth_hash)
Expand Down

0 comments on commit c4c7494

Please sign in to comment.