From 0fb69c080f07a3527181363970f01bcac0a8530e Mon Sep 17 00:00:00 2001 From: Peter Mangiafico Date: Fri, 5 Apr 2019 10:00:19 -0700 Subject: [PATCH] allow more first name searches when alternate identities exist but are all stanford or blank institutio --- app/models/author.rb | 8 ++++++-- spec/factories/author.rb | 18 ++++++++++++++++++ spec/models/author_spec.rb | 24 ++++++++++++++++++------ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/app/models/author.rb b/app/models/author.rb index 1d0bd52af..ab2220379 100644 --- a/app/models/author.rb +++ b/app/models/author.rb @@ -38,11 +38,15 @@ def institution Settings.HARVESTER.INSTITUTION.name end - # indicates if the LastName, FirstInitial form for this user is ambiguous within our author database and there are no alternate identities + # indicates if the LastName, FirstInitial form for this user is ambiguous within our author database (including any alternate identities that include Stanford as an insitution) + # also checks to see if there are alternate identities with institutions other than Stanford, which is problematic, and should be considered ambiguous def ambiguous_first_initial? return true unless first_name && last_name first_initial_not_unique = self.class.where('preferred_first_name like ? and preferred_last_name = ?', "#{first_name[0]}%", last_name).where(active_in_cap: true, cap_import_enabled: true).size > 1 - (first_initial_not_unique || !author_identities.empty?) + author_identities_not_unique = author_identities.map do |author_identity| + (!author_identity.institution.blank? && !author_identity.institution.include?('Stanford')) || self.class.where('preferred_first_name like ? and preferred_last_name = ? and id != ?', "#{author_identity.first_name[0]}%", author_identity.last_name, author_identity.author_id).where(active_in_cap: true, cap_import_enabled: true).size > 1 + end + (first_initial_not_unique || author_identities_not_unique.include?(true)) end # @return [Array] ScienceWireIds for approved publications diff --git a/spec/factories/author.rb b/spec/factories/author.rb index 0cb204578..69643f997 100644 --- a/spec/factories/author.rb +++ b/spec/factories/author.rb @@ -29,6 +29,7 @@ university_id { FactoryBot.generate(:random_id) } california_physician_license { FactoryBot.generate(:random_string) } active_in_cap { true } + cap_import_enabled { true } email { 'alice.edler@stanford.edu' } official_first_name { 'Alice' } official_last_name { 'Edler' } @@ -39,6 +40,23 @@ emails_for_harvest { 'alice.edler@stanford.edu' } end + factory :author_duped_last_name, parent: :author do + sunetid { FactoryBot.generate(:random_id) } + cap_profile_id { FactoryBot.generate(:random_id) } + university_id { FactoryBot.generate(:random_id) } + california_physician_license { FactoryBot.generate(:random_string) } + active_in_cap { true } + cap_import_enabled { true } + email { 'alice.edler@stanford.edu' } + official_first_name { 'Albert' } + official_last_name { 'Edler' } + official_middle_name { '' } + preferred_first_name { 'Albert' } + preferred_last_name { 'Edler' } + preferred_middle_name { '' } + emails_for_harvest { 'albert.edler@stanford.edu' } + end + factory :inactive_author, parent: :author do active_in_cap { false } end diff --git a/spec/models/author_spec.rb b/spec/models/author_spec.rb index 1bb8277da..cfb8a818b 100644 --- a/spec/models/author_spec.rb +++ b/spec/models/author_spec.rb @@ -30,16 +30,28 @@ end describe '#ambiguous_first_initial?' do - it 'confirms ambiguous first initial' do - subject.update_from_cap_authorship_profile_hash(auth_hash) - expect(subject.author_identities.size).to eq(2) # has alternate identities - expect(subject.ambiguous_first_initial?).to eq(true) # thus cannot search with first initial - end - it 'confirms non-ambiguous first initial' do + it 'confirms non-ambiguous first initial within stanford with no alternate identities' do odd_name = create :odd_name expect(odd_name.author_identities.size).to eq(0) # has no alternate identities expect(odd_name.ambiguous_first_initial?).to eq(false) # and no other odd names likes this at stanford, so ok to search with first initial end + it 'confirms non-ambiguous first initial within stanford with stanford only alternate identities' do + subject.update_from_cap_authorship_profile_hash(auth_hash) + expect(subject.author_identities.size).to eq(2) # has alternate identities + expect(subject.ambiguous_first_initial?).to eq(false) # ok, because all of the alternate identities are stanford or no institution, and no other first name ambiguity + end + it 'confirms ambiguous first initial within stanford with no alternate identities' do + create :author_duped_last_name + expect(subject.author_identities.size).to eq(0) # no alternate identities + expect(subject.ambiguous_first_initial?).to eq(true) # yes, because we now have another stanford author with the same last name and same first initial + end + it 'confirms ambiguous first initial even when non ambiguous within Stanford due to a non-Stanford alternate identity existing' do + author_with_alternate_identities = create :author_with_alternate_identities + expect(author_with_alternate_identities.author_identities.size).to eq(1) # alternate identities for primary author + expect(author_with_alternate_identities.author_identities.first.institution).not_to be blank? # alternate institution is not empty + expect(author_with_alternate_identities.author_identities.first.institution.include?('Stanford')).to be false # alternate institution is not Stanford + expect(author_with_alternate_identities.ambiguous_first_initial?).to eq(true) # yes, because even though there are no other stanford authors with similar names, they have a non-Stanford alternate identity + end end describe '#first_name' do