Skip to content

Commit

Permalink
Merge pull request #169 from MITLibraries/tco-72
Browse files Browse the repository at this point in the history
Adds barcode detector and lookup
  • Loading branch information
matt-bernhardt authored Jan 13, 2025
2 parents 0c576f3 + ea880f1 commit 7f8bd83
Show file tree
Hide file tree
Showing 22 changed files with 399 additions and 22 deletions.
6 changes: 4 additions & 2 deletions app/graphql/types/standard_identifiers_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@

module Types
class StandardIdentifiersType < Types::BaseObject
description 'A detector for standard identifiers in search terms. Currently supported: ISBN, ISSN, PMID, DOI'
description 'A detector for standard identifiers in search terms. Currently supported: Barcode, ISBN, ISSN, PMID, DOI'

field :details, DetailsType, description: 'Additional information about the detected identifier(s)'
field :kind, String, null: false, description: 'The type of identifier detected (one of ISBN, ISSN, PMID, DOI)'
field :kind, String, null: false, description: 'The type of identifier detected (one of Barcode, ISBN, ISSN, PMID, DOI)'
field :value, String, null: false, description: 'The identifier detected in the search term'

# details does external lookups and should only be run if the fields
# have been explicitly requested
def details
case @object[:kind]
when :barcode
LookupBarcode.new.info(@object[:value])
when :doi
LookupDoi.new.info(@object[:value])
when :isbn
Expand Down
3 changes: 2 additions & 1 deletion app/models/detector/standard_identifiers.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

class Detector
# Detector::StandardIdentifiers detects the identifiers DOI, ISBN, ISSN, PMID.
# Detector::StandardIdentifiers detects the identifiers Barcode, DOI, ISBN, ISSN, PMID.
# See /docs/reference/pattern_detection_and_enhancement.md for details.
class StandardIdentifiers
attr_reader :detections
Expand Down Expand Up @@ -52,6 +52,7 @@ def self.record(term)
# patterns are regex patterns to be applied to the basic search box input
def patterns
{
barcode: /^39080[0-9]{9}$/,
isbn: /\b(ISBN-*(1[03])* *(: ){0,1})*(([0-9Xx][- ]*){13}|([0-9Xx][- ]*){10})\b/,
issn: /\b[0-9]{4}-[0-9]{3}[0-9xX]\b/,
pmid: /\b((pmid|PMID):\s?(\d{7,8}))\b/,
Expand Down
66 changes: 66 additions & 0 deletions app/models/lookup_barcode.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# frozen_string_literal: true

# LookupBarcode takes a 14-digit integer (flagged by a regex within the Detector::StandardIdentifier class) and consults
# the Primo API for the associated record. The structure of this class is pretty close to the other lookup models, with
# an info method being the only public method. If Primo finds a record for the submitted barcode, the class returns some
# metadata about the record, along with a link to the complete record using the discovery/fulldisplay path.
class LookupBarcode
# info takes a barcode as an argument and returns associated metadata about that item, provided Primo is able to
# locate it. If no record is found for any reason, the method returns nil.
#
# @note While the barcode argument is technically a string, in reality it should be a 14-digit integer in order to
# return anything meaningful.
# @param barcode String
# @return Hash or Nil
def info(barcode)
xml = fetch(barcode)

return if xml == 'Error'

metadata = extract_metadata(xml)

if metadata.reject { |_k, v| v.empty? }.present?
metadata[:barcode] = barcode
metadata[:link_resolver_url] = link_resolver_url(metadata)
metadata
else
Rails.logger.debug { "Barcode lookup error. Barcode #{barcode} detected by Primo returned no data" }
nil
end
end

private

def extract_metadata(xml)
{
recordId: xml.xpath('//default:recordIdentifier', 'default' => 'http://www.loc.gov/zing/srw/').text,
title: xml.xpath('//dc:title', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
date: xml.xpath('//dc:date', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
publisher: xml.xpath('//dc:publisher', 'dc' => 'http://purl.org/dc/elements/1.1/').text,
authors: xml.xpath('//dc:contributor', 'dc' => 'http://purl.org/dc/elements/1.1/').text
}
end

def url(barcode)
"https://mit.alma.exlibrisgroup.com/view/sru/01MIT_INST?version=1.2&operation=searchRetrieve&recordSchema=dc&query=alma.all_for_ui=#{barcode}"
end

def fetch(barcode)
resp = HTTP.headers(accept: 'application/xml').get(url(barcode))

if resp.status == 200
Nokogiri::XML(resp.to_s)
else
Rails.logger.debug do
"Barcode lookup error. Barcode #{barcode} detected but Primo returned an error status"
end
Rails.logger.debug { "URL: #{url(barcode)}" }
Sentry.capture_message('Primo API error after barcode detection')
'Error'
end
end

def link_resolver_url(metadata)
"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma#{metadata[:recordId]}"
end
end
7 changes: 4 additions & 3 deletions app/models/metrics/algorithms.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# suggested_resource_exact :integer
# lcsh :integer
# citation :integer
# barcode :integer
#
module Metrics
# Algorithms aggregates statistics for matches for all SearchEvents
Expand Down Expand Up @@ -49,8 +50,8 @@ def generate(month = nil)
else
count_matches(SearchEvent.includes(:term))
end
Metrics::Algorithms.create(month:, citation: matches[:citation], doi: matches[:doi], issn: matches[:issn],
isbn: matches[:isbn], lcsh: matches[:lcsh], pmid: matches[:pmid],
Metrics::Algorithms.create(month:, barcode: matches[:barcode], citation: matches[:citation], doi: matches[:doi],
issn: matches[:issn], isbn: matches[:isbn], lcsh: matches[:lcsh], pmid: matches[:pmid],
journal_exact: matches[:journal_exact],
suggested_resource_exact: matches[:suggested_resource_exact],
unmatched: matches[:unmatched])
Expand Down Expand Up @@ -122,7 +123,7 @@ def match_lcsh(event, matches)
# @param matches [Hash] a Hash that keeps track of how many of each algorithm we match
# @return [Array] an array of matched StandardIdentifiers
def match_standard_identifiers(event, matches)
known_ids = %i[unmatched pmid isbn issn doi]
known_ids = %i[unmatched pmid isbn issn doi barcode]
ids = Detector::StandardIdentifiers.new(event.term.phrase)

known_ids.each do |id|
Expand Down
2 changes: 2 additions & 0 deletions app/views/report/algorithm_metrics.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<% else %>
<th>Month</th>
<% end %>
<th>Barcode</th>
<th>DOI</th>
<th>ISSN</th>
<th>ISBN</th>
Expand All @@ -31,6 +32,7 @@
<% else %>
<td><%= metric.month.strftime("%B %Y") %></td>
<% end %>
<td><%= metric.barcode %></td>
<td><%= metric.doi %></td>
<td><%= metric.issn %></td>
<td><%= metric.isbn %></td>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddBarcodeToMetricsAlgorithms < ActiveRecord::Migration[7.1]
def change
add_column :metrics_algorithms, :barcode, :integer
end
end
3 changes: 2 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions db/seeds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,14 @@
Detector.find_or_create_by(name: 'Journal')
Detector.find_or_create_by(name: 'SuggestedResource')
Detector.find_or_create_by(name: 'Citation')
Detector.find_or_create_by(name: 'Barcode')

# DetectorCategories
DetectorCategory.find_or_create_by(
detector: Detector.find_by(name: 'Barcode'),
category: Category.find_by(name: 'Transactional'),
confidence: 0.95
)
DetectorCategory.find_or_create_by(
detector: Detector.find_by(name: 'Citation'),
category: Category.find_by(name: 'Transactional'),
Expand Down
24 changes: 24 additions & 0 deletions test/controllers/graphql_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,30 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
assert_equal('10.1038/nphys1170', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['value'])
end

test 'search event query can return detected barcodes' do
VCR.use_cassette('barcode 39080027236626') do
post '/graphql', params: { query: '{
logSearchEvent(sourceSystem: "timdex", searchTerm: "39080027236626") {
detectors {
standardIdentifiers {
kind
value
details {
title
}
}
}
}
}' }

json = response.parsed_body

assert_equal('barcode', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['kind'])
assert_equal('39080027236626', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['value'])
assert_equal('Transactions of the Institution of Naval Architects.', json['data']['logSearchEvent']['detectors']['standardIdentifiers'].first['details']['title'])
end
end

test 'search event query can return detected journals' do
post '/graphql', params: { query: '{
logSearchEvent(sourceSystem: "timdex", searchTerm: "nature") {
Expand Down
5 changes: 5 additions & 0 deletions test/fixtures/detector_categories.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ seven:
detector: citation
category: transactional
confidence: 0.3

eight:
detector: barcode
category: transactional
confidence: 0.95
3 changes: 3 additions & 0 deletions test/fixtures/detectors.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
# created_at :datetime not null
# updated_at :datetime not null
#
barcode:
name: 'Barcode'

citation:
name: 'Citation'

Expand Down
6 changes: 6 additions & 0 deletions test/fixtures/fingerprints.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,9 @@ multiple_detections:

citation:
value: '12 2 2005 2007 6 a accessed altun available context current dec education experience httpcieedasueduvolume6number12 hypertext in issues july language learners no of on online reading serial the understanding vol web'

barcode:
value: 39080678901234

not_a_barcode:
value: '39080678901234 extra some text with'
7 changes: 7 additions & 0 deletions test/fixtures/search_events.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,10 @@ old_suggested_resource_jstor:
term: suggested_resource_jstor
source: test
created_at: <%= 1.year.ago %>
current_month_barcode:
term: barcode
source: test
old_barcode:
term: barcode
source: test
created_at: <%= 1.year.ago %>
8 changes: 8 additions & 0 deletions test/fixtures/terms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,11 @@ multiple_detections:
citation:
phrase: "A. Altun, &quot;Understanding hypertext in the context of reading on the web: Language learners' experience,&quot; Current Issues in Education, vol. 6, no. 12, July, 2005. [Online serial]. Available: http://cie.ed.asu.edu/volume6/number12/. [Accessed Dec. 2, 2007]."
fingerprint: citation

barcode:
phrase: '39080678901234'
fingerprint: barcode

not_a_barcode:
phrase: '39080678901234 with some extra text'
fingerprint: not_a_barcode
2 changes: 1 addition & 1 deletion test/models/detector/bulk_checker_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class CitationTest < ActiveSupport::TestCase
test 'standard_identifier_bulk_checker' do
bulk = Detector::StandardIdentifiers.check_all_matches(output: true)

assert_equal(5, bulk.count)
assert_equal(6, bulk.count)
end

test 'suggested_resources_bulk_checker' do
Expand Down
35 changes: 35 additions & 0 deletions test/models/lookup_barcode_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true

require 'test_helper'

class LookupBarcodeTest < ActiveSupport::TestCase
test 'metadata object is returned with expected fields' do
VCR.use_cassette('barcode 39080027236626') do
metadata = LookupBarcode.new.info('39080027236626')

expected_keys = %i[title date publisher authors link_resolver_url]

expected_keys.each do |key|
assert_includes(metadata.keys, key)
end
end
end

test 'link resolver URL returns a simple item URL' do
VCR.use_cassette('barcode 39080027236626') do
metadata = LookupBarcode.new.info('39080027236626')

expected_url = 'https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990002933430106761'

assert_equal(expected_url, metadata[:link_resolver_url])
end
end

test 'barcode not found' do
VCR.use_cassette('barcode not found') do
metadata = LookupBarcode.new.info('this-is-not-a-barcode')

assert_nil(metadata)
end
end
end
25 changes: 25 additions & 0 deletions test/models/metrics/algorithms_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,18 @@
# suggested_resource_exact :integer
# lcsh :integer
# citation :integer
# barcode :integer
#
require 'test_helper'

class Algorithms < ActiveSupport::TestCase
# Monthlies
test 'barcode counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)

assert_equal 1, aggregate.barcode
end

test 'citation counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)

Expand Down Expand Up @@ -92,6 +99,11 @@ class Algorithms < ActiveSupport::TestCase
# drop all searchevents to make math easier and minimize fragility over time as more fixtures are created
SearchEvent.delete_all

barcode_expected_count = rand(1...100)
barcode_expected_count.times do
SearchEvent.create(term: terms(:barcode), source: 'test')
end

citation_expected_count = rand(1...100)
citation_expected_count.times do
SearchEvent.create(term: terms(:citation), source: 'test')
Expand Down Expand Up @@ -129,6 +141,7 @@ class Algorithms < ActiveSupport::TestCase

aggregate = Metrics::Algorithms.new.generate(DateTime.now)

assert_equal barcode_expected_count, aggregate.barcode
assert_equal citation_expected_count, aggregate.citation
assert_equal doi_expected_count, aggregate.doi
assert_equal issn_expected_count, aggregate.issn
Expand All @@ -139,6 +152,12 @@ class Algorithms < ActiveSupport::TestCase
end

# Total
test 'barcode counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate

assert_equal 2, aggregate.barcode
end

test 'citation counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate

Expand Down Expand Up @@ -197,6 +216,11 @@ class Algorithms < ActiveSupport::TestCase
# drop all searchevents to make math easier and minimize fragility over time as more fixtures are created
SearchEvent.delete_all

barcode_expected_count = rand(1...100)
barcode_expected_count.times do
SearchEvent.create(term: terms(:barcode), source: 'test')
end

citation_expected_count = rand(1...100)
citation_expected_count.times do
SearchEvent.create(term: terms(:citation), source: 'test')
Expand Down Expand Up @@ -239,6 +263,7 @@ class Algorithms < ActiveSupport::TestCase

aggregate = Metrics::Algorithms.new.generate

assert_equal barcode_expected_count, aggregate.barcode
assert_equal citation_expected_count, aggregate.citation
assert_equal doi_expected_count, aggregate.doi
assert_equal issn_expected_count, aggregate.issn
Expand Down
5 changes: 5 additions & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
header&.each do |redacted_text|
interaction.filter!(redacted_text, '<REDACTED_NEL>')
end

header = interaction.response&.headers&.[]('Set-Cookie')
header&.each do |redacted_text|
interaction.filter!(redacted_text, '<FAKE_COOKIE_DATA>')
end
end
end

Expand Down
Loading

0 comments on commit 7f8bd83

Please sign in to comment.