diff --git a/.travis.yml b/.travis.yml index cbe1dbf..6b6be7a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,4 @@ +sudo: false language: ruby rvm: - 2.2 diff --git a/README.md b/README.md index 9cd3424..7d6ce70 100644 --- a/README.md +++ b/README.md @@ -113,21 +113,24 @@ I, [2014-12-04T15:12:48.853964 #86734] INFO -- : EZID DeleteIdentifier -- succe ## Batch Download -Instantiate an `Ezid::Client` and call `batch_download` with hash options -- see http://ezid.cdlib.org/doc/apidoc.html#parameters. Repeated values should be given as an array value for the parameter key. - -Note that, due to the asynchronous nature of this request, the response only returns the URL at which the batch will be available to download (as described in the EZID documentation). Use the `notify` option to specify one or more email addresses to receive notification when the download file is actually available. - -**Example** - -``` ->> c = Ezid::Client.new -=> # user="eziduser" session=CLOSED> ->> response = c.batch_download(format: "csv", notify: "eziduser@example.com", column: ["_id", "_target", "_status", "_profile", "_export", "_created", "_updated"], convertTimestamps: "yes", permanence: "real", owner: "eziduser") -I, [2015-02-20T15:16:53.462660 #55850] INFO -- : EZID BatchDownload -- success: http://ezid.cdlib.org/download/473deecb96.csv.gz -=> # ->> response.download_url -=> "http://ezid.cdlib.org/download/da543b91a0.csv.gz" -``` +See http://ezid.cdlib.org/doc/apidoc.html#parameters. Repeated values should be given as an array value for the parameter key. + +``` +>> batch = Ezid::BatchDownload.new(:csv) + => # +>> batch.column = ["_id", "_target"] + => ["_id", "_target"] +>> batch.createdAfter = Date.today.to_time + => 2016-02-24 00:00:00 -0500 +>> batch + => # +>> batch.download_url +I, [2016-02-24T18:03:40.828005 #1084] INFO -- : EZID BatchDownload -- success: http://ezid.cdlib.org/download/4a63401e17.csv.gz + => "http://ezid.cdlib.org/download/4a63401e17.csv.gz" +>> batch.download_file +File successfully download to /current/working/directory/4a63401e17.csv.gz. + => nil + ``` ## Metadata handling diff --git a/VERSION b/VERSION index 26aaba0..f0bb29e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.0 +1.3.0 diff --git a/lib/ezid/batch_download.rb b/lib/ezid/batch_download.rb new file mode 100644 index 0000000..c640f16 --- /dev/null +++ b/lib/ezid/batch_download.rb @@ -0,0 +1,131 @@ +require "hashie" +require "net/http" +require "uri" +require_relative "reserved_metadata" + +module Ezid + class BatchDownloadError < Error; end + + class BatchDownload < Hashie::Dash + include Hashie::Extensions::Coercion + + ANVL = "anvl".freeze + CSV = "csv".freeze + XML = "xml".freeze + FORMATS = [ ANVL, CSV, XML ].freeze + + YES = "yes".freeze + NO = "no".freeze + BOOLEANS = [ YES, NO ].freeze + + TEST = "test".freeze + REAL = "real".freeze + PERMANENCE = [ TEST, REAL ].freeze + + ARK = "ark".freeze + DOI = "doi".freeze + URN = "urn".freeze + TYPES = [ ARK, DOI, URN, ].freeze + + # CSV Columns + ID = "_id".freeze + MAPPED_CREATOR = "_mappedCreator".freeze + MAPPED_TITLE = "_mappedTitle".freeze + MAPPED_PUBLISHER = "_mappedPublisher".freeze + MAPPED_DATE = "_mappedDate".freeze + MAPPED_TYPE = "_mappedType".freeze + + MAX_DOWNLOAD_TRIES = 300 + DOWNLOAD_RETRY_INTERVAL = 1 + + # Parameters + property :format, required: true # {anvl|csv|xml} + property :column # repeatable + property :notify # repeatable + property :convertTimestamps # {yes|no} + + # Search constraints + property :createdAfter + property :createdBefore + property :crossref # {yes|no} + property :exported # {yes|no} + property :owner # repeatable + property :ownergroup # repeatable + property :permanence # {test|real} + property :profile # (repeatable) + property :status # {reserved|public|unavailable} (repeatable) + property :type # {ark|doi|urn} (repeatable) + property :updatedAfter + property :updatedBefore + + coerce_value FalseClass, ->(v) { NO } + coerce_value TrueClass, ->(v) { YES } + coerce_value DateTime, ->(v) { v.to_time.utc.iso8601 } + coerce_value Time, Integer + + def initialize(format, args={}) + super(args.merge(format: format)) + end + + def params + to_h + end + + def get_response + @response ||= client.batch_download(params) + end + + def reload + @response = nil + end + + def download_url + get_response.download_url + end + + def download_file(path: nil) + path ||= Dir.getwd + fullpath = File.directory?(path) ? File.join(path, download_filename) : path + tries = 0 + begin + tries += 1 + download = Net::HTTP.get_response(download_uri) + download.value + rescue Net::HTTPServerException => e + if download.is_a?(Net::HTTPNotFound) + if tries < MAX_DOWNLOAD_TRIES + print "Download file not yet available (attempt #{tries} of #{MAX_DOWNLOAD_TRIES})." + puts " Trying again in #{DOWNLOAD_RETRY_INTERVAL} second(s) ..." + sleep DOWNLOAD_RETRY_INTERVAL + retry + else + raise BatchDownloadError, + "Maximum download attempts (#{MAX_DOWNLOAD_TRIES}) reached unsuccessfully." + end + else + raise + end + else + File.open(fullpath, "wb") do |f| + f.write(download.body) + end + puts "File successfully download to #{fullpath}." + end + end + + private + + def download_uri + URI(download_url) + end + + def download_filename + File.basename(download_uri.path) + end + + def client + Client.new + end + + end +end diff --git a/lib/ezid/client.rb b/lib/ezid/client.rb index 917c897..8b48061 100644 --- a/lib/ezid/client.rb +++ b/lib/ezid/client.rb @@ -1,11 +1,13 @@ require "net/http" +require_relative "error" +require_relative "status" require_relative "configuration" require_relative "session" require_relative "metadata" require_relative "identifier" require_relative "proxy_identifier" -require_relative "error" +require_relative "batch_download" Dir[File.expand_path("../responses/*.rb", __FILE__)].each { |m| require m } Dir[File.expand_path("../requests/*.rb", __FILE__)].each { |m| require m } diff --git a/lib/ezid/identifier.rb b/lib/ezid/identifier.rb index 54a14b3..75c1db1 100644 --- a/lib/ezid/identifier.rb +++ b/lib/ezid/identifier.rb @@ -14,11 +14,6 @@ class Identifier # Attributes to display on inspect INSPECT_ATTRS = %w( id status target created ).freeze - # EZID status terms - PUBLIC = "public".freeze - RESERVED = "reserved".freeze - UNAVAILABLE = "unavailable".freeze - class << self attr_accessor :defaults @@ -151,19 +146,19 @@ def delete # Is the identifier reserved? # @return [Boolean] def reserved? - status == RESERVED + status == Status::RESERVED end # Is the identifier public? # @return [Boolean] def public? - status == PUBLIC + status == Status::PUBLIC end # Is the identifier unavailable? # @return [Boolean] def unavailable? - status =~ /^#{UNAVAILABLE}/ + status.to_s.start_with? Status::UNAVAILABLE end # Is the identifier deletable? @@ -182,7 +177,7 @@ def unavailable!(reason = nil) if unavailable? and reason.nil? return end - value = UNAVAILABLE + value = Status::UNAVAILABLE if reason value += " | #{reason}" end @@ -192,7 +187,7 @@ def unavailable!(reason = nil) # Mark the identifier as public # @return [String] the new status def public! - self.status = PUBLIC + self.status = Status::PUBLIC end protected diff --git a/lib/ezid/metadata.rb b/lib/ezid/metadata.rb index ac7aecb..0750423 100644 --- a/lib/ezid/metadata.rb +++ b/lib/ezid/metadata.rb @@ -1,4 +1,5 @@ require "hashie" +require_relative "reserved_metadata" module Ezid # @@ -7,6 +8,7 @@ module Ezid # @api private # class Metadata < Hashie::Mash + include ReservedMetadata # EZID metadata field/value separator ANVL_SEPARATOR = ": " @@ -27,16 +29,11 @@ class Metadata < Hashie::Mash LINE_CONTINUATION_RE = /\r?\n\s+/ # A line ending LINE_ENDING_RE = /\r?\n/ - # EZID reserved metadata elements that are read-only - # @see http://ezid.cdlib.org/doc/apidoc.html#internal-metadata - READONLY = %w( _owner _ownergroup _shadows _shadowedby _datacenter _created _updated ).freeze - # EZID metadata profiles - # @see http://ezid.cdlib.org/doc/apidoc.html#metadata-profiles - # @note crossref is not included because it is a simple element - PROFILES = %w( dc datacite erc ).freeze - RESERVED_ALIASES = [ :coowners=, :export=, :profile=, :status=, :target=, - :coowners, :export, :profile, :status, :target, - :datacenter, :owner, :ownergroup, :shadowedby, :shadows ] + # @api private + RESERVED_ALIASES = %w( + coowners datacenter export owner ownergroup + profile shadowedby shadows status target + ).freeze def initialize(data={}) super coerce(data) @@ -75,39 +72,20 @@ def to_s protected - def method_missing(name, *args, &block) - if reserved_alias?(name) - reserved_alias(name, *args) - elsif profile_accessor?(name) - profile_accessor(name, *args) + # Overrides Hashie::Mash + def convert_key(key) + k = super + if RESERVED_ALIASES.include?(k) + "_#{k}" + elsif k =~ /\A(dc|datacite|erc)_/ + k.sub(/_/, ".") else - super + k end end private - def reserved_alias?(name) - RESERVED_ALIASES.include?(name) - end - - def reserved_alias(name, *args) - send("_#{name}", *args) - end - - def profile_accessor?(name) - PROFILES.include? name.to_s.split("_").first - end - - def profile_accessor(name, *args) - key = name.to_s.sub("_", ".") - if key.end_with?("=") - self[key[0..-2]] = args.first - else - self[key] - end - end - def to_time(value) time = value.to_i (time == 0) ? nil : Time.at(time).utc diff --git a/lib/ezid/reserved_metadata.rb b/lib/ezid/reserved_metadata.rb new file mode 100644 index 0000000..0451f8f --- /dev/null +++ b/lib/ezid/reserved_metadata.rb @@ -0,0 +1,26 @@ +module Ezid + # + # EZID reserved metadata elements + # + # @see http://ezid.cdlib.org/doc/apidoc.html#internal-metadata + # + module ReservedMetadata + COOWNERS = "_coowners".freeze + CREATED = "_created".freeze + DATACENTER = "_datacenter".freeze + EXPORT = "_export".freeze + OWNER = "_owner".freeze + OWNERGROUP = "_ownergroup".freeze + PROFILE = "_profile".freeze + SHADOWEDBY = "_shadowedby".freeze + SHADOWS = "_shadows".freeze + STATUS = "_status".freeze + TARGET = "_target".freeze + UPDATED = "_updated".freeze + + # Read-only elements + READONLY = [ + CREATED, DATACENTER, OWNER, OWNERGROUP, SHADOWEDBY, SHADOWS, UPDATED + ].freeze + end +end diff --git a/lib/ezid/status.rb b/lib/ezid/status.rb new file mode 100644 index 0000000..ee8d362 --- /dev/null +++ b/lib/ezid/status.rb @@ -0,0 +1,10 @@ +module Ezid + # + # EZID status terms + # + module Status + PUBLIC = "public".freeze + RESERVED = "reserved".freeze + UNAVAILABLE = "unavailable".freeze + end +end diff --git a/spec/unit/identifier_spec.rb b/spec/unit/identifier_spec.rb index a069dae..d4673f9 100644 --- a/spec/unit/identifier_spec.rb +++ b/spec/unit/identifier_spec.rb @@ -119,7 +119,7 @@ module Ezid describe "#delete" do context "when the identifier is reserved" do - subject { described_class.new(id: "id", status: Identifier::RESERVED) } + subject { described_class.new(id: "id", status: Status::RESERVED) } context "and is persisted" do before { allow(subject).to receive(:persisted?) { true } } it "deletes the identifier" do @@ -136,7 +136,7 @@ module Ezid end end context "when identifier is not reserved" do - subject { described_class.new(id: "id", status: Identifier::PUBLIC) } + subject { described_class.new(id: "id", status: Status::PUBLIC) } it "raises an exception" do expect { subject.delete }.to raise_error(Error) end @@ -193,7 +193,7 @@ module Ezid it { is_expected.not_to be_unavailable } end context "when the identifier is reserved" do - before { subject.status = Identifier::RESERVED } + before { subject.status = Status::RESERVED } it { is_expected.not_to be_public } it { is_expected.to be_reserved } it { is_expected.not_to be_unavailable } @@ -218,7 +218,7 @@ module Ezid subject { described_class.new(id: "id", status: status) } describe "#unavailable!" do context "when the status is \"unavailable\"" do - let(:status) { "#{Identifier::UNAVAILABLE} | whatever" } + let(:status) { "#{Status::UNAVAILABLE} | whatever" } context "and no reason is given" do it "logs a warning" do pending "https://github.com/duke-libraries/ezid-client/issues/46" @@ -238,12 +238,12 @@ module Ezid subject.unavailable!("because") end it "should change the status" do - expect { subject.unavailable!("because") }.to change(subject, :status).from(status).to("#{Identifier::UNAVAILABLE} | because") + expect { subject.unavailable!("because") }.to change(subject, :status).from(status).to("#{Status::UNAVAILABLE} | because") end end end context "when the status is \"reserved\"" do - let(:status) { Identifier::RESERVED } + let(:status) { Status::RESERVED } context "and persisted" do before { allow(subject).to receive(:persisted?) { true } } it "raises an exception" do @@ -253,28 +253,28 @@ module Ezid context "and not persisted" do before { allow(subject).to receive(:persisted?) { false } } it "changes the status" do - expect { subject.unavailable! }.to change(subject, :status).from(Identifier::RESERVED).to(Identifier::UNAVAILABLE) + expect { subject.unavailable! }.to change(subject, :status).from(Status::RESERVED).to(Status::UNAVAILABLE) end end end context "when the status is \"public\"" do - let(:status) { Identifier::PUBLIC } + let(:status) { Status::PUBLIC } context "and no reason is given" do it "changes the status" do - expect { subject.unavailable! }.to change(subject, :status).from(Identifier::PUBLIC).to(Identifier::UNAVAILABLE) + expect { subject.unavailable! }.to change(subject, :status).from(Status::PUBLIC).to(Status::UNAVAILABLE) end end context "and a reason is given" do it "changes the status and appends the reason" do - expect { subject.unavailable!("withdrawn") }.to change(subject, :status).from(Identifier::PUBLIC).to("#{Identifier::UNAVAILABLE} | withdrawn") + expect { subject.unavailable!("withdrawn") }.to change(subject, :status).from(Status::PUBLIC).to("#{Status::UNAVAILABLE} | withdrawn") end end end end describe "#public!" do - subject { described_class.new(id: "id", status: Identifier::UNAVAILABLE) } + subject { described_class.new(id: "id", status: Status::UNAVAILABLE) } it "changes the status" do - expect { subject.public! }.to change(subject, :status).from(Identifier::UNAVAILABLE).to(Identifier::PUBLIC) + expect { subject.public! }.to change(subject, :status).from(Status::UNAVAILABLE).to(Status::PUBLIC) end end end