Skip to content

Commit

Permalink
Merge pull request #749 from sul-dlss/647-refactor-name-extraction
Browse files Browse the repository at this point in the history
Refactor name extraction to introduce role mapping
  • Loading branch information
aaron-collier authored Jul 19, 2021
2 parents e54edaf + 698a940 commit 54071fe
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 1 deletion.
1 change: 1 addition & 0 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ RSpec/DescribedClass:
- 'spec/lib/traject/macros/harvard_scw_spec.rb'
- 'spec/lib/traject/macros/laussane_spec.rb'
- 'spec/lib/traject/macros/manchester_spec.rb'
- 'spec/lib/traject/macros/mods_spec.rb'
- 'spec/lib/traject/macros/string_helper_spec.rb'
- 'spec/lib/traject/macros/title_extraction_spec.rb'

Expand Down
13 changes: 13 additions & 0 deletions lib/macros/language_extraction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def arabic_script_lang_or_default(arabic_script_lang, default)
ar_values = []
default_values = []
accumulator.each do |val|
val = translate_role_to_ar(val)
lang_code = val.match?(/[ضصثقفغعهخحمنتالبيسشظطذدزرو]/) ? arabic_script_lang : default
ar_values << val if lang_code == arabic_script_lang
default_values << val if lang_code == default
Expand Down Expand Up @@ -63,5 +64,17 @@ def tei_lower_resource_language
accumulator.replace([{ language: TO_BCP47[:"#{language}"], values: [extracted_string] }]) if extracted_string
end
end

# Returns the provided value with the english role translated to arabic if found
def translate_role_to_ar(val)
role_name = val.match(/(\()(\w+)(\))/)&.to_a&.at(2)
return val unless role_name

role_ar_map = Traject::TranslationMap.new('role_ar_from_en')
translated_role = role_ar_map[role_name]
return val unless translated_role

val.gsub(role_name, translated_role)
end
end
end
5 changes: 4 additions & 1 deletion lib/macros/mods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ def extract_name(xpath, role: nil, exclude: nil)
name_nodes.each do |val|
name_parts << val&.content&.strip
end
accumulator.replace(["#{name_parts.join(', ')} (#{role})"]) if name_parts.present?

role_map = Traject::TranslationMap.new('role_from_contributor')
role = " (#{role_map[role]})" if role
accumulator.replace(["#{name_parts.join(', ')}#{role}"]) if name_parts.present?
end
end

Expand Down
12 changes: 12 additions & 0 deletions lib/translation_maps/role_ar_from_en.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Artist: فنان
Author: مؤلف
Calligrapher: خطاط
Copyist: الناسخ
Creator: المنشئ
Former owner: مالك سابق
Former repository: المستودع السابق
Illuminator: المنور
Illustrator: المصور
Painter: دهان
Patron: نمط
Scribe: كاتب
14 changes: 14 additions & 0 deletions lib/translation_maps/role_from_contributor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
artist: Artist
author: Author
calligrapher: Calligrapher
copyist: Copyist
copyist.: Copyist
cre: Creator
creator: Creator
former owner.: Former owner
former repository: Former repository
illuminator: Illuminator
illustrator: Illustrator
painter (artist): Painter
patron: Patron
scribe.: Scribe
9 changes: 9 additions & 0 deletions spec/lib/traject/macros/language_extraction_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,14 @@
expect(callable.call(nil, [extracted_string])).to eq([{ language: 'ar-Arab', values: [extracted_string] }])
end
end

context 'when extracted string contains Arabic characters and an english role' do
it 'returns the correct script value arabic_script_lang' do
extracted_string = 'الولايات المتحدة الامريكيه (Copyist)'
translated_string = 'الولايات المتحدة الامريكيه (الناسخ)'
callable = instance.arabic_script_lang_or_default('ar-Arab', 'en')
expect(callable.call(nil, [extracted_string])).to eq([{ language: 'ar-Arab', values: [translated_string] }])
end
end
end
end
73 changes: 73 additions & 0 deletions spec/lib/traject/macros/mods_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# frozen_string_literal: true

require 'macros/mods'
require 'macros/language_extraction'

RSpec.describe Macros::Mods do
subject(:indexer) do
Traject::Indexer.new.tap do |indexer|
indexer.instance_eval do
extend Macros::Mods
extend Macros::LanguageExtraction
end
end
end

describe '#extract_name' do
let(:record) do
<<~XML
<mods:mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xmlns:sets="http://hul.harvard.edu/ois/xml/ns/sets"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:HarvardDRS="http://hul.harvard.edu/ois/xml/ns/HarvardDRS"
xmlns:librarycloud="http://hul.harvard.edu/ois/xml/ns/librarycloud" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd" version="3.6">
<mods:name>
<mods:namePart>Doe, John</mods:namePart>
<mods:role>
<mods:roleTerm type="text">copyist.</mods:roleTerm>
</mods:role>
</mods:name>
</mods>
XML
end
let(:ng_rec) { Nokogiri::XML.parse(record) }

context 'when contributor role matches the search' do
before do
indexer.instance_eval do
to_field 'cho_contributor', extract_name('//*/mods:name[1][mods:role/mods:roleTerm/', role: 'copyist.')
end
end

it 'returns values with a mapped role' do
expect(indexer.map_record(ng_rec)).to eq({ 'cho_contributor' => ['Doe, John (Copyist)'] })
end
end

context 'when contributor role does not match the search' do
before do
indexer.instance_eval do
to_field 'cho_contributor', extract_name('//*/mods:name[1][mods:role/mods:roleTerm/', role: 'scribe.')
end
end

it 'returns an empty hash' do
expect(indexer.map_record(ng_rec)).to eq({})
end
end

context 'when role is excluded from the search' do
before do
indexer.instance_eval do
to_field 'cho_contributor', extract_name('//*/mods:name[1][mods:role/mods:roleTerm/', exclude: true)
end
end

it 'returns values without role included' do
expect(indexer.map_record(ng_rec)).to eq({ 'cho_contributor' => ['Doe, John'] })
end
end
end
end

0 comments on commit 54071fe

Please sign in to comment.