diff --git a/lib/geomash/tgn.rb b/lib/geomash/tgn.rb index c9d3bff..2a23051 100644 --- a/lib/geomash/tgn.rb +++ b/lib/geomash/tgn.rb @@ -44,7 +44,7 @@ def self.get_tgn_data(tgn_id) unless self.blazegraph_enabled primary_tgn_response = Typhoeus::Request.get("https://vocab.getty.edu/tgn/#{tgn_id}", followlocation: true, headers: GETTY_TGN_HEADERS, timeout: 500) - return if primary_tgn_response.failure? # Couldn't find TGN... FIXME: additional check needed if TGN is down? + return unless primary_tgn_response.success? # Couldn't find TGN... FIXME: additional check needed if TGN is down? as_json_tgn_response = JSON.parse(primary_tgn_response.body) end @@ -124,23 +124,26 @@ def self.get_tgn_data(tgn_id) end # coordinates - coords = nil - if tgn_main_term_info[:latitude].present? - coords = {} - coords[:latitude] = tgn_main_term_info[:latitude] - coords[:longitude] = tgn_main_term_info[:longitude] - coords[:combined] = "#{tgn_main_term_info[:latitude]},#{tgn_main_term_info[:longitude]}" + coords = {} + coord_dims = %i(latitude longitude) + if tgn_main_term_info[coord_dims.first].present? + coord_dims.each do |dim| + coord_val = tgn_main_term_info[dim] + if coord_val.match?(/\A-*\.[0-9]/) + coord_val = coord_val.match?(/\A-/) ? "-0#{coord_val.gsub(/\A-/, '')}" : "0#{coord_val}" + end + coords[dim] = coord_val + end + coords[:combined] = "#{coords[coord_dims.first]},#{coords[coord_dims.last]}" end hier_geo = {} non_hier_geo = {} #Default term to best label language... - tgn_term = tgn_main_term_info[:label_en] - tgn_term ||= tgn_main_term_info[:label_default] - tgn_term ||= tgn_main_term_info[:label_other] - tgn_term ||= tgn_main_term_info[:label_alt] - tgn_term ||= tgn_main_term_info[:label_remaining] + tgn_term = tgn_main_term_info[:label_en] || tgn_main_term_info[:label_default] || + tgn_main_term_info[:label_other] || tgn_main_term_info[:label_alt] || + tgn_main_term_info[:label_remaining] tgn_term_type = if tgn_main_term_info[:aat_place].present? tgn_main_term_info[:aat_place].split('/').last @@ -153,122 +156,129 @@ def self.get_tgn_data(tgn_id) return if tgn_term.blank? && tgn_term_type.blank? case tgn_term_type - when '300128176' #continent - hier_geo[:continent] = tgn_term - when '300128207', '300387130', '300387506' #nation, autonomous areas, countries - hier_geo[:country] = tgn_term - when '300000774' #province - hier_geo[:province] = tgn_term - when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions - hier_geo[:region] = tgn_term - when '300000776', '300000772', '300235093' #state, department, governorate + when '300008347', '300008389' #inhabited place, cities + hier_geo[:city] = tgn_term + when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places + hier_geo[:city_section] = tgn_term + when '300128176' #continent + hier_geo[:continent] = tgn_term + when '300128207', '300387130', '300387506' #nation, autonomous areas, countries + hier_geo[:country] = tgn_term + when '300000774' #province + hier_geo[:province] = tgn_term + when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions + hier_geo[:region] = tgn_term + when '300000776', '300000772', '300235093' #state, department, governorate + hier_geo[:state] = tgn_term + when '300387081' #national district + if tgn_term == 'District of Columbia' hier_geo[:state] = tgn_term - when '300387081' #national district - if tgn_term == 'District of Columbia' - hier_geo[:state] = tgn_term - else - hier_geo[:territory] = tgn_term - end - when '300135982', '300387176', '300387122' #territory, dependent state, union territory - hier_geo[:territory] = tgn_term - when '300000771', '300387092', '300387071' #county, parishes, unitary authorities - hier_geo[:county] = tgn_term - when '300008347', '300008389' #inhabited place, cities - hier_geo[:city] = tgn_term - when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places - hier_geo[:city_section] = tgn_term - when '300008791', '300387062' #island - hier_geo[:island] = tgn_term - when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761','300387064' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), first/third level subdivisions (political entities), valleys (landforms) - hier_geo[:area] = tgn_term - when '300386699' #Top level element of World - non_hier_geo[:value] = 'World' - non_hier_geo[:qualifier] = nil else - aat_main_term_info = {} - label_remaining_check = false - - if self.blazegraph_enabled - query = %{ - SELECT ?Object ?Predicate #{aat_from_context} - WHERE - { - ?Predicate ?Object - } - }.squish - - aat_type_response = Typhoeus::Request.post(self.blazegraph_config[0], body: { query: query }, timeout: 500, headers: { Accept: 'application/sparql-results+json' }) - else - aat_type_response = Typhoeus::Request.get("https://vocab.getty.edu/aat/#{tgn_term_type}", followlocation: true, headers: GETTY_TGN_HEADERS, timeout: 500) - end + hier_geo[:territory] = tgn_term + end + when '300135982', '300387176', '300387122' #territory, dependent state, union territory + hier_geo[:territory] = tgn_term + when '300000771', '300387092', '300387071' #county, parishes, unitary authorities + hier_geo[:county] = tgn_term + when '300008791', '300387062' #island + hier_geo[:island] = tgn_term + when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761','300387064' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), first/third level subdivisions (political entities), valleys (landforms) + hier_geo[:area] = tgn_term + when '300386699' #Top level element of World + non_hier_geo[:value] = 'World' + non_hier_geo[:qualifier] = nil + else + aat_main_term_info = {} + label_remaining_check = false - if aat_type_response.success? - as_json_aat_term_type = JSON.parse(aat_type_response.body) + if self.blazegraph_enabled + query = %{ + SELECT ?Object ?Predicate #{aat_from_context} + WHERE + { + ?Predicate ?Object + } + }.squish + + aat_type_response = Typhoeus::Request.post(self.blazegraph_config[0], body: { query: query }, timeout: 500, headers: { Accept: 'application/sparql-results+json' }) + else + aat_type_response = Typhoeus::Request.get("https://vocab.getty.edu/aat/#{tgn_term_type}", followlocation: true, headers: GETTY_TGN_HEADERS, timeout: 500) + end - if as_json_aat_term_type.dig('results', 'bindings').present? - as_json_aat_term_type.dig('results', 'bindings').each do |ntriple| - case ntriple['Predicate']['value'] - when 'http://www.w3.org/2004/02/skos/core#prefLabel' - if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en' - aat_main_term_info[:label_en] ||= ntriple['Object']['value'] - elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en-us' - aat_main_term_info[:label_en] ||= ntriple['Object']['value'] - elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin' - aat_main_term_info[:label_other] ||= ntriple['Object']['value'] - elsif ntriple['Object']['xml:lang'].blank? - aat_main_term_info[:label_default] ||= ntriple['Object']['value'] - else - label_remaining_check = true if aat_main_term_info[:label_remaining].present? - aat_main_term_info[:label_remaining] ||= ntriple['Object']['value'] - end - when 'http://www.w3.org/2004/02/skos/core#altLabel' - if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en' - aat_main_term_info[:label_alt] ||= ntriple['Object']['value'] - end - end - end - else - as_json_aat_term_type.values.each do |ntriple| - ntriple.each do |uri_key, uri_val| - case uri_key - when 'http://www.w3.org/2004/02/skos/core#prefLabel' - aat_main_term_info[:label_default] ||= uri_val&.first&.[]('value') - when 'http://www.w3.org/2004/02/skos/core#altLabel' - tgn_main_term_info[:label_alt] ||= uri_val&.first&.[]('value') + if aat_type_response.success? + as_json_aat_term_type = JSON.parse(aat_type_response.body) + + if as_json_aat_term_type.dig('results', 'bindings').present? + as_json_aat_term_type.dig('results', 'bindings').each do |ntriple| + case ntriple['Predicate']['value'] + when 'http://www.w3.org/2004/02/skos/core#prefLabel' + if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en' + aat_main_term_info[:label_en] ||= ntriple['Object']['value'] + elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en-us' + aat_main_term_info[:label_en] ||= ntriple['Object']['value'] + elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin' + aat_main_term_info[:label_other] ||= ntriple['Object']['value'] + elsif ntriple['Object']['xml:lang'].blank? + aat_main_term_info[:label_default] ||= ntriple['Object']['value'] + else + label_remaining_check = true if aat_main_term_info[:label_remaining].present? + aat_main_term_info[:label_remaining] ||= ntriple['Object']['value'] + end + when 'http://www.w3.org/2004/02/skos/core#altLabel' + if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en' + aat_main_term_info[:label_alt] ||= ntriple['Object']['value'] end - end end end - end - #Default term to best label language... - aat_term = aat_main_term_info[:label_en] - aat_term ||= aat_main_term_info[:label_default] - aat_term ||= aat_main_term_info[:label_other] - aat_term ||= aat_main_term_info[:label_alt] - - if aat_term.blank? - if label_remaining_check - raise "Could not determine a single aat non_hier_geo label for TGN: #{tgn_id}" - else - aat_term = aat_main_term_info[:label_remaining] + else + as_json_aat_term_type.values.each do |ntriple| + ntriple.each do |uri_key, uri_val| + case uri_key + when 'http://www.w3.org/2004/02/skos/core#prefLabel' + aat_main_term_info[:label_default] ||= uri_val&.first&.[]('value') + when 'http://www.w3.org/2004/02/skos/core#altLabel' + tgn_main_term_info[:label_alt] ||= uri_val&.first&.[]('value') + end + end end end + end + #Default term to best label language... + aat_term = aat_main_term_info[:label_en] || aat_main_term_info[:label_default] || + aat_main_term_info[:label_other] || aat_main_term_info[:label_alt] - #Fix cases like http://vocab.getty.edu/aat/300132316 which are bays (bodies of water) - aat_term = aat_term.gsub(/ \(.+\)$/, '') - - if (aat_term =~ /ies$/).present? || (aat_term =~ /es$/).present? || (aat_term =~ /s$/).present? - aat_term = aat_term.singularize + if aat_term.blank? + if label_remaining_check + raise "Could not determine a single aat non_hier_geo label for TGN: #{tgn_id}" + else + aat_term = aat_main_term_info[:label_remaining] end + end + + # Fix cases like http://vocab.getty.edu/aat/300132316 which are bays (bodies of water) + aat_term = aat_term.gsub(/ \(.+\)$/, '') + + if (aat_term =~ /ies$/).present? || (aat_term =~ /es$/).present? || (aat_term =~ /s$/).present? + aat_term = case aat_term # deal with edge cases where singularize doesn't work + when 'caves', 'coves', 'reserves' + aat_term.gsub(/s\z/, '') + when 'isthmuses' + 'isthmus' + else + aat_term.singularize + end + elsif %w[rioni quartieri sestieri].include?(aat_term) # more edge cases, for ex: 7006128, 7593081, 7003041 + aat_term = aat_term.gsub(/i\z/, 'e') + end - #Fix cases like "Boston Harbor" as "Boston Harbor (harbor)" isn't that helpful - non_hier_geo[:value] = tgn_term - non_hier_geo[:qualifier] = tgn_term.downcase.include?(aat_term.downcase) ? nil : aat_term + # Fix cases like "Boston Harbor" as "Boston Harbor (harbor)" isn't that helpful + non_hier_geo[:value] = tgn_term + non_hier_geo[:qualifier] = tgn_term.downcase.include?(aat_term.downcase) ? nil : aat_term end - #Broader places - #FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label. - if broader_place_type_list.present? #Case of World... top of hierachy check + # Broader places + # FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label. + if broader_place_type_list.present? #Case of World... top of hierarchy check query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref ?place_label_latn_pinyin #{aat_from_context} #{tgn_from_context} WHERE {" broader_place_type_list.each do |place_uri| @@ -287,7 +297,7 @@ def self.get_tgn_data(tgn_id) }.squish end - query = query[0..-12] + query = query[0..-8] query += ". }} GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?aat_pref" query = query.squish @@ -394,7 +404,7 @@ def self.get_tgn_data(tgn_id) end end tgn_data = {} - tgn_data[:coords] = coords + tgn_data[:coords] = coords.presence tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil tgn_data[:non_hier_geo] = non_hier_geo.present? ? non_hier_geo : nil tgn_data diff --git a/test/tgn_test.rb b/test/tgn_test.rb index 9429cd0..737e575 100644 --- a/test/tgn_test.rb +++ b/test/tgn_test.rb @@ -2,86 +2,103 @@ class TGNTest < ActiveSupport::TestCase def test_tgn_lookup_from_id - if Geomash::TGN.tgn_enabled == 'true' - result = Geomash::TGN.get_tgn_data('2051159') + if Geomash::TGN.tgn_enabled == true + result = Geomash::TGN.get_tgn_data('2051159') - assert_equal '45', result[:coords][:latitude] - assert_equal '-84.1333', result[:coords][:longitude] - assert_equal '45,-84.1333', result[:coords][:combined] - assert_equal 'Atlanta', result[:hier_geo][:city] - assert_equal 'Montmorency', result[:hier_geo][:county] - assert_equal 'Michigan', result[:hier_geo][:state] - assert_equal 'United States', result[:hier_geo][:country] - assert_equal 'North and Central America', result[:hier_geo][:continent] + assert_equal '45', result[:coords][:latitude] + assert_equal '-84.1333', result[:coords][:longitude] + assert_equal '45,-84.1333', result[:coords][:combined] + assert_equal 'Atlanta', result[:hier_geo][:city] + assert_equal 'Montmorency', result[:hier_geo][:county] + assert_equal 'Michigan', result[:hier_geo][:state] + assert_equal 'United States', result[:hier_geo][:country] + assert_equal 'North and Central America', result[:hier_geo][:continent] + #Check for a weird prefLabel case of only zh-latn-pinyin-x-notone + result = Geomash::TGN.get_tgn_data('7002066') + assert_equal '45.75', result[:coords][:latitude] + assert_equal '126.65', result[:coords][:longitude] + assert_equal '45.75,126.65', result[:coords][:combined] + assert_equal 'Harbin', result[:hier_geo][:city] + assert_equal 'Heilongjiang', result[:hier_geo][:province] + assert_equal 'China', result[:hier_geo][:country] + assert_equal 'Asia', result[:hier_geo][:continent] + assert_nil result[:non_hier_geo] - #Check for a weird prefLabel case of only zh-latn-pinyin-x-notone - result = Geomash::TGN.get_tgn_data('7002066') - assert_equal '45.75', result[:coords][:latitude] - assert_equal '126.65', result[:coords][:longitude] - assert_equal '45.75,126.65', result[:coords][:combined] - assert_equal 'Harbin', result[:hier_geo][:city] - assert_equal 'Heilongjiang', result[:hier_geo][:province] - assert_equal 'China', result[:hier_geo][:country] - assert_equal 'Asia', result[:hier_geo][:continent] - assert_equal 'Harbin', result[:non_hier_geo][:value] - assert_nil result[:non_hier_geo][:qualifier] + result = Geomash::TGN.get_tgn_data('7008038') + assert_equal '48.866667', result[:coords][:latitude] + assert_equal '2.333333', result[:coords][:longitude] + assert_equal '48.866667,2.333333', result[:coords][:combined] + assert_equal 'Paris', result[:hier_geo][:city] + assert_equal 'Île-de-France', result[:hier_geo][:region] + assert_equal 'France', result[:hier_geo][:country] + assert_equal 'Europe', result[:hier_geo][:continent] + assert_nil result[:non_hier_geo] - result = Geomash::TGN.get_tgn_data('7008038') - assert_equal '48.866667', result[:coords][:latitude] - assert_equal '2.333333', result[:coords][:longitude] - assert_equal '48.866667,2.333333', result[:coords][:combined] - assert_equal 'Paris', result[:hier_geo][:city] - assert_equal 'Île-de-France', result[:hier_geo][:province] - assert_equal 'France', result[:hier_geo][:country] - assert_equal 'Europe', result[:hier_geo][:continent] - assert_equal nil, result[:non_hier_geo][:value] - assert_nil result[:non_hier_geo][:qualifier] + result = Geomash::TGN.get_tgn_data('7013534') + assert_equal '28.45', result[:coords][:latitude] + assert_equal '-80.5333', result[:coords][:longitude] + assert_equal '28.45,-80.5333', result[:coords][:combined] + assert_equal 'Brevard', result[:hier_geo][:county] + assert_equal 'Florida', result[:hier_geo][:state] + assert_equal 'United States', result[:hier_geo][:country] + assert_equal 'North and Central America', result[:hier_geo][:continent] + assert_equal 'Canaveral, Cape', result[:non_hier_geo][:value] + assert_nil result[:non_hier_geo][:qualifier] - result = Geomash::TGN.get_tgn_data('7013534') - assert_equal '28.45', result[:coords][:latitude] - assert_equal '-80.5333', result[:coords][:longitude] - assert_equal '28.45,-80.5333', result[:coords][:combined] - assert_equal 'Brevard', result[:hier_geo][:county] - assert_equal 'Florida', result[:hier_geo][:state] - assert_equal 'United States', result[:hier_geo][:country] - assert_equal 'North and Central America', result[:hier_geo][:continent] - assert_equal 'Canaveral, Cape', result[:non_hier_geo][:value] - assert_nil result[:non_hier_geo][:qualifier] + result = Geomash::TGN.get_tgn_data('1089719') + assert_equal '15.3333', result[:coords][:latitude] + assert_equal '38.9667', result[:coords][:longitude] + assert_equal '15.3333,38.9667', result[:coords][:combined] + assert_equal 'Eritrea', result[:hier_geo][:country] + assert_equal 'Asmara', result[:hier_geo][:city] + assert_equal 'Africa', result[:hier_geo][:continent] + assert_equal 'Central', result[:hier_geo][:region] + assert_nil result[:non_hier_geo] - result = Geomash::TGN.get_tgn_data('1089719') - assert_equal '15.3333', result[:coords][:latitude] - assert_equal '38.9667', result[:coords][:longitude] - assert_equal '15.3333,38.9667', result[:coords][:combined] - assert_equal 'Eritrea', result[:hier_geo][:country] - assert_equal 'Asmara', result[:hier_geo][:city] - assert_equal 'Africa', result[:hier_geo][:continent] - assert_equal 'Central', result[:hier_geo][:region] - assert_nil result[:non_hier_geo] + result = Geomash::TGN.get_tgn_data('8728596') + assert_equal '14.26667', result[:coords][:latitude] + assert_equal '170.65', result[:coords][:longitude] + assert_equal '14.26667,170.65', result[:coords][:combined] + assert_equal 'Eastern District', result[:hier_geo][:area] + assert_equal 'American Samoa', result[:hier_geo][:territory] + assert_equal 'Oceania', result[:hier_geo][:continent] + assert_nil result[:non_hier_geo] - result = Geomash::TGN.get_tgn_data('8728596') - assert_equal '14.26667', result[:coords][:latitude] - assert_equal '170.65', result[:coords][:longitude] - assert_equal '14.26667,170.65', result[:coords][:combined] - assert_equal 'Eastern District', result[:hier_geo][:area] - assert_equal 'American Samoa', result[:hier_geo][:territory] - assert_equal 'Oceania', result[:hier_geo][:continent] - assert_nil result[:non_hier_geo] + result = Geomash::TGN.get_tgn_data('7016024') + assert_equal '53.8667', result[:coords][:latitude] + assert_equal '-166.5333', result[:coords][:longitude] + assert_equal '53.8667,-166.5333', result[:coords][:combined] + assert_equal 'Unalaska', result[:hier_geo][:city] + assert_equal 'Fox Islands', result[:hier_geo][:area] + assert_equal 'Unalaska Island', result[:hier_geo][:island] + assert_equal 'Alaska', result[:hier_geo][:state] + assert_equal 'North and Central America', result[:hier_geo][:continent] + assert_equal 'United States', result[:hier_geo][:country] + assert_nil result[:non_hier_geo] - result = Geomash::TGN.get_tgn_data('7016024') - assert_equal '53.8667', result[:coords][:latitude] - assert_equal '-166.5333', result[:coords][:longitude] - assert_equal '53.8667,-166.5333', result[:coords][:combined] - assert_equal 'Unalaska', result[:hier_geo][:city] - assert_equal 'Fox Islands', result[:hier_geo][:area] - assert_equal 'Unalaska Island', result[:hier_geo][:island] - assert_equal 'Alaska', result[:hier_geo][:state] - assert_equal 'North and Central America', result[:hier_geo][:continent] - assert_equal 'United States', result[:hier_geo][:country] - assert_nil result[:non_hier_geo] + result = Geomash::TGN.get_tgn_data('invalid_identifier') + assert_nil result - result = Geomash::TGN.get_tgn_data('invalid_identifier') - assert_nil result + result = Geomash::TGN.get_tgn_data('7017069') + assert_equal '47.333', result[:coords][:latitude] + assert_equal '-0.5', result[:coords][:longitude] + + result = Geomash::TGN.get_tgn_data('1007758') + assert_equal '0.25', result[:coords][:latitude] + assert_equal '173.4167', result[:coords][:longitude] + + result = Geomash::TGN.get_tgn_data('2181605') + assert_equal 'Blue Hills Reservation', result[:non_hier_geo][:value] + assert_equal 'reserve', result[:non_hier_geo][:qualifier] + + result = Geomash::TGN.get_tgn_data('7593081') + assert_equal 'Tiburtino', result[:non_hier_geo][:value] + assert_equal 'quartiere', result[:non_hier_geo][:qualifier] + + result = Geomash::TGN.get_tgn_data('7415273') + assert_equal 'Morro Ayutla, Ensenada', result[:non_hier_geo][:value] + assert_equal 'cove', result[:non_hier_geo][:qualifier] end end end