From e4eef925d07e9d2c5b9065788e0c49614a3cb450 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 15 Mar 2024 14:02:13 +0100 Subject: [PATCH 1/2] Feature: resolving resources within specific ontologies, supporting various output formats(#69) * remove useless line preventing sending the reset password email (#65) * [ontoportal-bot] Gemfile.lock update * Feature: api endpoint returns json-ld for the element with that URI * implement GET, POST requests, and GET /parse to submit INRATHES ontology * Enhance tests using real data submission * Enhance bin/ontoportal to make it able to run localy with UI * Small fixes - change controller name and test controller name - remove /parse endpoint - rackup to shotgun in bin/ontoportal * Fix test dereference resource controller - in json test, before we test the result we sort the hashes with the function (sort_nested_hash) - in xml, ntriples and turtle, we split the result and the expected result, sort them and compare them * update gemfile: add json-ld (3.0.2) * change derefrencement namespacing and clean code * Fix dereference resource tests expected resultsto handle parse triples * fix xml serialization test for AG and Gb by cleaning the xml string --------- Co-authored-by: OntoPortal Bot Co-authored-by: imadbourouche --- Dockerfile | 1 + Gemfile | 2 + Gemfile.lock | 16 +- bin/ontoportal | 4 +- .../dereference_resource_controller.rb | 59 ++++++ helpers/users_helper.rb | 4 +- models/simple_wrappers.rb | 2 + .../test_dereference_resource_controller.rb | 191 ++++++++++++++++++ 8 files changed, 268 insertions(+), 11 deletions(-) create mode 100644 controllers/dereference_resource_controller.rb create mode 100644 test/controllers/test_dereference_resource_controller.rb diff --git a/Dockerfile b/Dockerfile index 6feebd11..94e0e656 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,7 @@ RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ openjdk-11-jre-headless \ raptor2-utils \ wait-for-it \ + libraptor2-dev \ && rm -rf /var/lib/apt/lists/* RUN mkdir -p /srv/ontoportal/ontologies_api diff --git a/Gemfile b/Gemfile index 351dda00..0ccc627d 100644 --- a/Gemfile +++ b/Gemfile @@ -15,6 +15,8 @@ gem 'sinatra-advanced-routes' gem 'sinatra-contrib', '~> 1.0' gem 'request_store' gem 'parallel' +gem 'json-ld' + # Rack middleware gem 'ffi' diff --git a/Gemfile.lock b/Gemfile.lock index 53f48a58..d210ac10 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,7 +11,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 6c51346b6f150a69391794b7909b30592acbbe0e + revision: 3f8b1f0b62c4334306f9ed5cb7b17a1b645e7db3 branch: development specs: goo (0.0.2) @@ -57,7 +57,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: d37aeafbd7bef120917fb4d601f8287a9a859f69 + revision: 337dce98ec27627d14a440ff2a6ed09483cdac12 branch: development specs: ontologies_linked_data (0.0.1) @@ -117,7 +117,7 @@ GEM bcrypt_pbkdf (1.1.0) bigdecimal (1.4.2) builder (3.2.4) - capistrano (3.18.0) + capistrano (3.18.1) airbrussh (>= 1.0.0) i18n rake (>= 10.0.0) @@ -191,12 +191,12 @@ GEM representable (~> 3.0) retriable (>= 2.0, < 4.a) rexml - google-cloud-core (1.6.1) + google-cloud-core (1.7.0) google-cloud-env (>= 1.0, < 3.a) google-cloud-errors (~> 1.0) google-cloud-env (2.1.1) faraday (>= 1.0, < 3.a) - google-cloud-errors (1.3.1) + google-cloud-errors (1.4.0) google-protobuf (3.25.3-x86_64-darwin) google-protobuf (3.25.3-x86_64-linux) googleapis-common-protos (1.5.0) @@ -230,13 +230,16 @@ GEM i18n (0.9.5) concurrent-ruby (~> 1.0) json (2.7.1) + json-ld (3.0.2) + multi_json (~> 1.12) + rdf (>= 2.2.8, < 4.0) json-schema (2.8.1) addressable (>= 2.4) json_pure (2.7.1) jwt (2.8.1) base64 kgio (2.11.4) - libxml-ruby (5.0.2) + libxml-ruby (5.0.3) link_header (0.0.8) logger (1.6.0) macaddr (1.7.2) @@ -422,6 +425,7 @@ DEPENDENCIES ffi goo! haml (~> 5.2.2) + json-ld json-schema (~> 2.0) minitest (~> 4.0) minitest-stub_any_instance diff --git a/bin/ontoportal b/bin/ontoportal index 4840dad3..ea93cebd 100755 --- a/bin/ontoportal +++ b/bin/ontoportal @@ -76,7 +76,7 @@ build_docker_run_cmd() { local goo_path="$3" local sparql_client_path="$4" - local docker_run_cmd="docker compose run --rm -it" + local docker_run_cmd="docker compose -p ontoportal_docker run --rm -it --name api-service" local bash_cmd="" # Conditionally add bind mounts only if the paths are not empty @@ -177,7 +177,7 @@ run_command() { dev() { echo "Starting OntoPortal API development server..." - local custom_command="bundle exec shotgun --host 0.0.0.0 --env=development" + local custom_command="bundle exec shotgun --host 0.0.0.0 --env=development --port 9393" run_command "$custom_command" "$@" } diff --git a/controllers/dereference_resource_controller.rb b/controllers/dereference_resource_controller.rb new file mode 100644 index 00000000..e566c38c --- /dev/null +++ b/controllers/dereference_resource_controller.rb @@ -0,0 +1,59 @@ +require_relative '../test/test_case' + + +class DereferenceResourceController < ApplicationController + namespace "/ontologies" do + get "/:acronym/resolve/:uri" do + acronym = params[:acronym] + uri = params[:uri] + + if acronym.blank? || uri.blank? + error 500, "Usage: ontologies/:acronym/resolve/:uri?output_format= OR POST: acronym, uri, output_format parameters" + end + + output_format = params[:output_format].presence || 'jsonld' + process_request(acronym, uri, output_format) + end + + private + + def process_request(acronym_param, uri_param, output_format) + acronym = acronym_param + uri = URI.decode_www_form_component(uri_param) + + error 500, "INVALID URI" unless valid_url?(uri) + sub = LinkedData::Models::Ontology.find(acronym).first&.latest_submission + + error 500, "Ontology not found" unless sub + + r = Resource.new(sub.id, uri) + case output_format + when 'jsonld' + content_type 'application/json' + reply JSON.parse(r.to_json) + when 'json' + content_type 'application/json' + reply JSON.parse(r.to_json) + when 'xml' + content_type 'application/xml' + reply r.to_xml + when 'turtle' + content_type 'text/turtle' + reply r.to_turtle + when 'ntriples' + content_type 'application/n-triples' + reply r.to_ntriples + else + error 500, "Invalid output format" + end + + end + + def valid_url?(url) + uri = URI.parse(url) + uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) + rescue URI::InvalidURIError + false + end + end +end \ No newline at end of file diff --git a/helpers/users_helper.rb b/helpers/users_helper.rb index e2c69e60..a9a14d30 100644 --- a/helpers/users_helper.rb +++ b/helpers/users_helper.rb @@ -23,9 +23,7 @@ def send_reset_token(email, username) error 404, "User not found" unless user reset_token = token(36) user.resetToken = reset_token - - return user if user.valid? - + user.save(override_security: true) LinkedData::Utils::Notifications.reset_password(user, reset_token) user diff --git a/models/simple_wrappers.rb b/models/simple_wrappers.rb index e4097aff..f6aeb027 100644 --- a/models/simple_wrappers.rb +++ b/models/simple_wrappers.rb @@ -29,3 +29,5 @@ ProvisionalRelation = LinkedData::Models::ProvisionalRelation SearchHelper = Sinatra::Helpers::SearchHelper + +Resource = LinkedData::Models::Resource \ No newline at end of file diff --git a/test/controllers/test_dereference_resource_controller.rb b/test/controllers/test_dereference_resource_controller.rb new file mode 100644 index 00000000..4fb615ff --- /dev/null +++ b/test/controllers/test_dereference_resource_controller.rb @@ -0,0 +1,191 @@ +require_relative '../test_case' +require 'rexml/document' + +class TestDereferenceResourceController < TestCase + + def self.before_suite + LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: 'INRAETHESDEREF', + name: 'INRAETHES', + file_path: './test/data/ontology_files/thesaurusINRAE_nouv_structure.rdf', + ont_count: 1, + ontology_format: 'SKOS', + submission_count: 1 + }) + + @@graph = "INRAETHESDEREF-0" + @@uri = CGI.escape("http://opendata.inrae.fr/thesaurusINRAE/c_6496") + end + + def test_dereference_resource_controller_json + get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=json" + assert last_response.ok? + + result = last_response.body + expected_result = <<-JSON + { + "@context": { + "ns0": "http://opendata.inrae.fr/thesaurusINRAE/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "owl": "http://www.w3.org/2002/07/owl#", + "skos": "http://www.w3.org/2004/02/skos/core#" + }, + "@graph": [ + { + "@id": "ns0:c_6496", + "@type": [ + "owl:NamedIndividual", + "skos:Concept" + ], + "skos:broader": { + "@id": "ns0:c_a9d99f3a" + }, + "skos:topConceptOf": { + "@id": "ns0:mt_65" + }, + "skos:inScheme": [ + { + "@id": "ns0:thesaurusINRAE" + }, + { + "@id": "ns0:mt_65" + } + ], + "skos:prefLabel": { + "@value": "altération de l'ADN", + "@language": "fr" + } + }, + { + "@id": "ns0:mt_65", + "skos:hasTopConcept": { + "@id": "ns0:c_6496" + } + } + ] + } + JSON + a = sort_nested_hash(JSON.parse(result)) + b = sort_nested_hash(JSON.parse(expected_result)) + assert_equal b, a + end + + def test_dereference_resource_controller_xml + get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=xml" + assert last_response.ok? + + result = last_response.body + + expected_result_1 = <<-XML + + + + + + + + + altération de l'ADN + + + + + + XML + + expected_result_2 = <<-XML + + + + + + + altération de l'ADN + + + + + + + + XML + + + clean_xml = -> (x) { x.strip.gsub('/>', '').gsub('', '').split(' ').reject(&:empty?)} + + + a = result.gsub('\\"', '"')[1..-2].split("\\n").map{|x| clean_xml.call(x)}.flatten + b_1 = expected_result_1.split("\n").map{|x| clean_xml.call(x)}.flatten + b_2 = expected_result_2.split("\n").map{|x| clean_xml.call(x)}.flatten + + assert_includes [b_1.sort, b_2.sort], a.sort + end + + def test_dereference_resource_controller_ntriples + get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=ntriples" + assert last_response.ok? + + result = last_response.body + expected_result = <<-NTRIPLES + . + . + . + . + . + . + "alt\\\\u00E9rationdel'ADN"@fr . + . + NTRIPLES + a = result.gsub('\\"', '"').gsub(' ', '')[1..-2].split("\\n").reject(&:empty?) + b = expected_result.gsub(' ', '').split("\n").reject(&:empty?) + assert_equal b.sort, a.sort + end + + def test_dereference_resource_controller_turtle + get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=turtle" + assert last_response.ok? + + result = last_response.body + expected_result = <<-TURTLE + @prefix rdf: . + @prefix ns0: . + @prefix owl: . + @prefix skos: . + + ns0:c_6496 + a owl:NamedIndividual, skos:Concept ; + skos:broader ns0:c_a9d99f3a ; + skos:inScheme ns0:mt_65, ns0:thesaurusINRAE ; + skos:prefLabel "altération de l'ADN"@fr ; + skos:topConceptOf ns0:mt_65 . + + ns0:mt_65 + skos:hasTopConcept ns0:c_6496 . + TURTLE + a = result.gsub('\\"', '"').gsub(' ', '')[1..-2].split("\\n").reject(&:empty?) + b = expected_result.gsub(' ', '').split("\n").reject(&:empty?) + + assert_equal b.sort, a.sort + end + + private + + def sort_nested_hash(hash) + sorted_hash = {} + + hash.each do |key, value| + if value.is_a?(Hash) + sorted_hash[key] = sort_nested_hash(value) + elsif value.is_a?(Array) + sorted_hash[key] = value.map { |item| item.is_a?(Hash) ? sort_nested_hash(item) : item }.sort_by { |item| item.to_s } + else + sorted_hash[key] = value + end + end + + sorted_hash.sort.to_h + end + +end \ No newline at end of file From ccf4f770be2ddb64463c88c373a6dfff3ee9bfec Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Mon, 18 Mar 2024 09:27:48 +0100 Subject: [PATCH 2/2] Feature: URI drerfrencement content negotiation (#72) * remove useless line preventing sending the reset password email (#65) * [ontoportal-bot] Gemfile.lock update * Feature: api endpoint returns json-ld for the element with that URI * implement GET, POST requests, and GET /parse to submit INRATHES ontology * Enhance tests using real data submission * Enhance bin/ontoportal to make it able to run localy with UI * Small fixes - change controller name and test controller name - remove /parse endpoint - rackup to shotgun in bin/ontoportal * Fix test dereference resource controller - in json test, before we test the result we sort the hashes with the function (sort_nested_hash) - in xml, ntriples and turtle, we split the result and the expected result, sort them and compare them * update gemfile: add json-ld (3.0.2) * change derefrencement namespacing and clean code * Fix dereference resource tests expected resultsto handle parse triples * Feature: add content negotiation middleware * Add headers to tests instead of output_format * Apply middleware to only /ontologies/:acronym/resolve/:uri * Add test cases for AllegroGraph and fix xml test * move the content_negotiation middleware into rack folder and module * re-implement again the usage of the output_format param if no format is given in the request header * clean the tests for no more necessary checks * clean and simplify the content negotiation middleware * add the accepted format in the error response of resolvability endpoint * refactor the content negotiation middleware code to be more clear --------- Co-authored-by: Syphax bouazzouni Co-authored-by: OntoPortal Bot --- .../dereference_resource_controller.rb | 30 ++-- lib/rack/content_negotiation.rb | 131 ++++++++++++++++++ .../test_dereference_resource_controller.rb | 18 ++- 3 files changed, 155 insertions(+), 24 deletions(-) create mode 100644 lib/rack/content_negotiation.rb diff --git a/controllers/dereference_resource_controller.rb b/controllers/dereference_resource_controller.rb index e566c38c..77de82f9 100644 --- a/controllers/dereference_resource_controller.rb +++ b/controllers/dereference_resource_controller.rb @@ -1,5 +1,6 @@ require_relative '../test/test_case' +use Rack::ContentNegotiation class DereferenceResourceController < ApplicationController namespace "/ontologies" do @@ -11,7 +12,8 @@ class DereferenceResourceController < ApplicationController error 500, "Usage: ontologies/:acronym/resolve/:uri?output_format= OR POST: acronym, uri, output_format parameters" end - output_format = params[:output_format].presence || 'jsonld' + output_format = env["format"].presence || params[:output_format].presence || 'application/n-triples' + process_request(acronym, uri, output_format) end @@ -28,25 +30,19 @@ def process_request(acronym_param, uri_param, output_format) r = Resource.new(sub.id, uri) case output_format - when 'jsonld' - content_type 'application/json' - reply JSON.parse(r.to_json) - when 'json' - content_type 'application/json' - reply JSON.parse(r.to_json) - when 'xml' - content_type 'application/xml' - reply r.to_xml - when 'turtle' - content_type 'text/turtle' - reply r.to_turtle - when 'ntriples' - content_type 'application/n-triples' - reply r.to_ntriples + when 'application/ld+json', 'application/json' + r.to_json + when 'application/rdf+xml', 'application/xml' + r.to_xml + when 'text/turtle' + r.to_turtle + when 'application/n-triples' + r.to_ntriples else - error 500, "Invalid output format" + error 500, "Invalid output format, valid format are: application/json, application/ld+json, application/xml, application/rdf+xml, text/turtle and application/n-triples" end + end def valid_url?(url) diff --git a/lib/rack/content_negotiation.rb b/lib/rack/content_negotiation.rb new file mode 100644 index 00000000..4c91da6a --- /dev/null +++ b/lib/rack/content_negotiation.rb @@ -0,0 +1,131 @@ +module Rack + class ContentNegotiation + DEFAULT_CONTENT_TYPE = "application/n-triples" # N-Triples + VARY = { 'Vary' => 'Accept' }.freeze + ENDPOINTS_FILTER = %r{^/ontologies/[^/]+/resolve/[^/]+$} # Accepted API endpoints to apply content negotiation + + # @return [#call] + attr_reader :app + + # @return [Hash{Symbol => String}] + attr_reader :options + + ## + # @param [#call] app + # @param [Hash{Symbol => Object}] options + # Other options passed to writer. + # @option options [String] :default (DEFAULT_CONTENT_TYPE) Specific content type + # @option options [RDF::Format, #to_sym] :format Specific RDF writer format to use + def initialize(app, options = {}) + @app, @options = app, options + @options[:default] = (@options[:default] || DEFAULT_CONTENT_TYPE).to_s + end + + ## + # Handles a Rack protocol request. + # Parses Accept header to find appropriate mime-type and sets content_type accordingly. + # + # Inserts ordered content types into the environment as `ORDERED_CONTENT_TYPES` if an Accept header is present + # + # @param [Hash{String => String}] env + # @return [Array(Integer, Hash, #each)] Status, Headers and Body + # @see https://rubydoc.info/github/rack/rack/file/SPEC + def call(env) + if env['PATH_INFO'].match?(ENDPOINTS_FILTER) + if env.has_key?('HTTP_ACCEPT') + accepted_types = parse_accept_header(env['HTTP_ACCEPT']) + if !accepted_types.empty? + env["format"] = accepted_types.first + add_content_type_header(app.call(env), env["format"]) + else + not_acceptable + end + else + env["format"] = options[:default] + add_content_type_header(app.call(env), env["format"]) + end + else + app.call(env) + end + end + + protected + + # Parses an HTTP `Accept` header, returning an array of MIME content types ordered by precedence rules. + # + # @param [String, #to_s] header + # @return [Array] Array of content types sorted by precedence + # @see https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 + def parse_accept_header(header) + entries = header.to_s.split(',') + parsed_entries = entries.map { |entry| parse_accept_entry(entry) } + sorted_entries = parsed_entries.sort_by { |entry| entry.quality }.reverse + content_types = sorted_entries.map { |entry| entry.content_type } + content_types.flatten.compact + end + + + + # Parses an individual entry from the Accept header. + # + # @param [String] entry An entry from the Accept header + # @return [Entry] An object representing the parsed entry + def parse_accept_entry(entry) + # Represents an entry parsed from the Accept header + entry_struct = Struct.new(:content_type, :quality, :wildcard_count, :param_count) + content_type, *params = entry.split(';').map(&:strip) + quality = 1.0 # Default quality + params.reject! do |param| + if param.start_with?('q=') + quality = param[2..-1].to_f + true + end + end + wildcard_count = content_type.count('*') + entry_struct.new(content_type, quality, wildcard_count, params.size) + end + + + ## + # Returns a content type appropriate for the given `media_range`, + # returns `nil` if `media_range` contains a wildcard subtype + # that is not mapped. + # + # @param [String, #to_s] media_range + # @return [String, nil] + def find_content_type_for_media_range(media_range) + case media_range.to_s + when '*/*', 'text/*' + options[:default] + when 'application/n-triples' + 'application/n-triples' + when 'text/turtle' + 'text/turtle' + when 'application/json', 'application/ld+json', 'application/*' + 'application/ld+json' + when 'text/xml', 'text/rdf+xml', 'application/rdf+xml', 'application/xml' + 'application/rdf+xml' + else + nil + end + end + + ## + # Outputs an HTTP `406 Not Acceptable` response. + # + # @param [String, #to_s] message + # @return [Array(Integer, Hash, #each)] + def not_acceptable(message = nil) + code = 406 + http_status = [code, Rack::Utils::HTTP_STATUS_CODES[code]].join(' ') + message = http_status + (message.nil? ? "\n" : " (#{message})\n") + [code, { 'Content-Type' => "text/plain" }.merge(VARY), [message]] + end + + def add_content_type_header(response, type) + response[1] = response[1].merge(VARY).merge('Content-Type' => type) + response + end + + end +end diff --git a/test/controllers/test_dereference_resource_controller.rb b/test/controllers/test_dereference_resource_controller.rb index 4fb615ff..e828a5a1 100644 --- a/test/controllers/test_dereference_resource_controller.rb +++ b/test/controllers/test_dereference_resource_controller.rb @@ -20,7 +20,8 @@ def self.before_suite end def test_dereference_resource_controller_json - get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=json" + header 'Accept', 'application/json' + get "/ontologies/#{@@graph}/resolve/#{@@uri}" assert last_response.ok? result = last_response.body @@ -73,7 +74,8 @@ def test_dereference_resource_controller_json end def test_dereference_resource_controller_xml - get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=xml" + header 'Accept', 'application/xml' + get "/ontologies/#{@@graph}/resolve/#{@@uri}" assert last_response.ok? result = last_response.body @@ -124,7 +126,8 @@ def test_dereference_resource_controller_xml end def test_dereference_resource_controller_ntriples - get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=ntriples" + header 'Accept', 'application/n-triples' + get "/ontologies/#{@@graph}/resolve/#{@@uri}" assert last_response.ok? result = last_response.body @@ -135,16 +138,17 @@ def test_dereference_resource_controller_ntriples . . . - "alt\\\\u00E9rationdel'ADN"@fr . + "alt\\u00E9rationdel'ADN"@fr . . NTRIPLES - a = result.gsub('\\"', '"').gsub(' ', '')[1..-2].split("\\n").reject(&:empty?) + a = result.gsub(' ', '').split("\n").reject(&:empty?) b = expected_result.gsub(' ', '').split("\n").reject(&:empty?) assert_equal b.sort, a.sort end def test_dereference_resource_controller_turtle - get "/ontologies/#{@@graph}/resolve/#{@@uri}?output_format=turtle" + header 'Accept', 'text/turtle' + get "/ontologies/#{@@graph}/resolve/#{@@uri}" assert last_response.ok? result = last_response.body @@ -164,7 +168,7 @@ def test_dereference_resource_controller_turtle ns0:mt_65 skos:hasTopConcept ns0:c_6496 . TURTLE - a = result.gsub('\\"', '"').gsub(' ', '')[1..-2].split("\\n").reject(&:empty?) + a = result.gsub(' ', '').split("\n").reject(&:empty?) b = expected_result.gsub(' ', '').split("\n").reject(&:empty?) assert_equal b.sort, a.sort