Skip to content

Commit

Permalink
Merge pull request #62 from duke-libraries/batch-download
Browse files Browse the repository at this point in the history
Adds Ezid::BatchDownload to facilitate using Batch download API.
  • Loading branch information
dchandekstark committed Feb 25, 2016
2 parents 07b7ebb + 0582b20 commit 164d16d
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 43 deletions.
33 changes: 18 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,21 +113,24 @@ I, [2014-12-04T15:12:48.853964 #86734] INFO -- : EZID DeleteIdentifier -- succe

## Batch Download

Instantiate an `Ezid::Client` and call `batch_download` with hash options -- see http://ezid.cdlib.org/doc/apidoc.html#parameters. Repeated values should be given as an array value for the parameter key.

Note that, due to the asynchronous nature of this request, the response only returns the URL at which the batch will be available to download (as described in the EZID documentation). Use the `notify` option to specify one or more email addresses to receive notification when the download file is actually available.

**Example**

```
>> c = Ezid::Client.new
=> #<Ezid::Client connection=#<Net::HTTP ezid.cdlib.org:443 open=false> user="eziduser" session=CLOSED>
>> response = c.batch_download(format: "csv", notify: "[email protected]", column: ["_id", "_target", "_status", "_profile", "_export", "_created", "_updated"], convertTimestamps: "yes", permanence: "real", owner: "eziduser")
I, [2015-02-20T15:16:53.462660 #55850] INFO -- : EZID BatchDownload -- success: http://ezid.cdlib.org/download/473deecb96.csv.gz
=> #<Net::HTTPOK 200 OK readbody=true>
>> response.download_url
=> "http://ezid.cdlib.org/download/da543b91a0.csv.gz"
```
See http://ezid.cdlib.org/doc/apidoc.html#parameters. Repeated values should be given as an array value for the parameter key.

```
>> batch = Ezid::BatchDownload.new(:csv)
=> #<Ezid::BatchDownload format=:csv>
>> batch.column = ["_id", "_target"]
=> ["_id", "_target"]
>> batch.createdAfter = Date.today.to_time
=> 2016-02-24 00:00:00 -0500
>> batch
=> #<Ezid::BatchDownload column=["_id", "_target"] createdAfter=1456290000 format=:csv>
>> batch.download_url
I, [2016-02-24T18:03:40.828005 #1084] INFO -- : EZID BatchDownload -- success: http://ezid.cdlib.org/download/4a63401e17.csv.gz
=> "http://ezid.cdlib.org/download/4a63401e17.csv.gz"
>> batch.download_file
File successfully download to /current/working/directory/4a63401e17.csv.gz.
=> nil
```

## Metadata handling

Expand Down
131 changes: 131 additions & 0 deletions lib/ezid/batch_download.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
require "hashie"
require "net/http"
require "uri"
require_relative "reserved_metadata"

module Ezid
class BatchDownloadError < Error; end

class BatchDownload < Hashie::Dash
include Hashie::Extensions::Coercion

ANVL = "anvl".freeze
CSV = "csv".freeze
XML = "xml".freeze
FORMATS = [ ANVL, CSV, XML ].freeze

YES = "yes".freeze
NO = "no".freeze
BOOLEANS = [ YES, NO ].freeze

TEST = "test".freeze
REAL = "real".freeze
PERMANENCE = [ TEST, REAL ].freeze

ARK = "ark".freeze
DOI = "doi".freeze
URN = "urn".freeze
TYPES = [ ARK, DOI, URN, ].freeze

# CSV Columns
ID = "_id".freeze
MAPPED_CREATOR = "_mappedCreator".freeze
MAPPED_TITLE = "_mappedTitle".freeze
MAPPED_PUBLISHER = "_mappedPublisher".freeze
MAPPED_DATE = "_mappedDate".freeze
MAPPED_TYPE = "_mappedType".freeze

MAX_DOWNLOAD_TRIES = 300
DOWNLOAD_RETRY_INTERVAL = 1

# Parameters
property :format, required: true # {anvl|csv|xml}
property :column # repeatable
property :notify # repeatable
property :convertTimestamps # {yes|no}

# Search constraints
property :createdAfter
property :createdBefore
property :crossref # {yes|no}
property :exported # {yes|no}
property :owner # repeatable
property :ownergroup # repeatable
property :permanence # {test|real}
property :profile # (repeatable)
property :status # {reserved|public|unavailable} (repeatable)
property :type # {ark|doi|urn} (repeatable)
property :updatedAfter
property :updatedBefore

coerce_value FalseClass, ->(v) { NO }
coerce_value TrueClass, ->(v) { YES }
coerce_value DateTime, ->(v) { v.to_time.utc.iso8601 }
coerce_value Time, Integer

def initialize(format, args={})
super(args.merge(format: format))
end

def params
to_h
end

def get_response
@response ||= client.batch_download(params)
end

def reload
@response = nil
end

def download_url
get_response.download_url
end

def download_file(path: nil)
path ||= Dir.getwd
fullpath = File.directory?(path) ? File.join(path, download_filename) : path
tries = 0
begin
tries += 1
download = Net::HTTP.get_response(download_uri)
download.value
rescue Net::HTTPServerException => e
if download.is_a?(Net::HTTPNotFound)
if tries < MAX_DOWNLOAD_TRIES
print "Download file not yet available (attempt #{tries} of #{MAX_DOWNLOAD_TRIES})."
puts " Trying again in #{DOWNLOAD_RETRY_INTERVAL} second(s) ..."
sleep DOWNLOAD_RETRY_INTERVAL
retry
else
raise BatchDownloadError,
"Maximum download attempts (#{MAX_DOWNLOAD_TRIES}) reached unsuccessfully."
end
else
raise
end
else
File.open(fullpath, "wb") do |f|
f.write(download.body)
end
puts "File successfully download to #{fullpath}."
end
end

private

def download_uri
URI(download_url)
end

def download_filename
File.basename(download_uri.path)
end

def client
Client.new
end

end
end
4 changes: 3 additions & 1 deletion lib/ezid/client.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
require "net/http"

require_relative "error"
require_relative "status"
require_relative "configuration"
require_relative "session"
require_relative "metadata"
require_relative "identifier"
require_relative "proxy_identifier"
require_relative "error"
require_relative "batch_download"

Dir[File.expand_path("../responses/*.rb", __FILE__)].each { |m| require m }
Dir[File.expand_path("../requests/*.rb", __FILE__)].each { |m| require m }
Expand Down
15 changes: 5 additions & 10 deletions lib/ezid/identifier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ class Identifier
# Attributes to display on inspect
INSPECT_ATTRS = %w( id status target created ).freeze

# EZID status terms
PUBLIC = "public".freeze
RESERVED = "reserved".freeze
UNAVAILABLE = "unavailable".freeze

class << self
attr_accessor :defaults

Expand Down Expand Up @@ -151,19 +146,19 @@ def delete
# Is the identifier reserved?
# @return [Boolean]
def reserved?
status == RESERVED
status == Status::RESERVED
end

# Is the identifier public?
# @return [Boolean]
def public?
status == PUBLIC
status == Status::PUBLIC
end

# Is the identifier unavailable?
# @return [Boolean]
def unavailable?
status =~ /^#{UNAVAILABLE}/
status.to_s.start_with? Status::UNAVAILABLE
end

# Is the identifier deletable?
Expand All @@ -182,7 +177,7 @@ def unavailable!(reason = nil)
if unavailable? and reason.nil?
return
end
value = UNAVAILABLE
value = Status::UNAVAILABLE
if reason
value += " | #{reason}"
end
Expand All @@ -192,7 +187,7 @@ def unavailable!(reason = nil)
# Mark the identifier as public
# @return [String] the new status
def public!
self.status = PUBLIC
self.status = Status::PUBLIC
end

protected
Expand Down
7 changes: 2 additions & 5 deletions lib/ezid/metadata.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require "hashie"
require_relative "reserved_metadata"

module Ezid
#
Expand All @@ -7,6 +8,7 @@ module Ezid
# @api private
#
class Metadata < Hashie::Mash
include ReservedMetadata

# EZID metadata field/value separator
ANVL_SEPARATOR = ": "
Expand All @@ -27,11 +29,6 @@ class Metadata < Hashie::Mash
LINE_CONTINUATION_RE = /\r?\n\s+/
# A line ending
LINE_ENDING_RE = /\r?\n/
# EZID reserved metadata elements that are read-only
# @see http://ezid.cdlib.org/doc/apidoc.html#internal-metadata
READONLY = %w(
_created _datacenter _owner _ownergroup _shadowedby _shadows _updated
).freeze
# @api private
RESERVED_ALIASES = %w(
coowners datacenter export owner ownergroup
Expand Down
26 changes: 26 additions & 0 deletions lib/ezid/reserved_metadata.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module Ezid
#
# EZID reserved metadata elements
#
# @see http://ezid.cdlib.org/doc/apidoc.html#internal-metadata
#
module ReservedMetadata
COOWNERS = "_coowners".freeze
CREATED = "_created".freeze
DATACENTER = "_datacenter".freeze
EXPORT = "_export".freeze
OWNER = "_owner".freeze
OWNERGROUP = "_ownergroup".freeze
PROFILE = "_profile".freeze
SHADOWEDBY = "_shadowedby".freeze
SHADOWS = "_shadows".freeze
STATUS = "_status".freeze
TARGET = "_target".freeze
UPDATED = "_updated".freeze

# Read-only elements
READONLY = [
CREATED, DATACENTER, OWNER, OWNERGROUP, SHADOWEDBY, SHADOWS, UPDATED
].freeze
end
end
10 changes: 10 additions & 0 deletions lib/ezid/status.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module Ezid
#
# EZID status terms
#
module Status
PUBLIC = "public".freeze
RESERVED = "reserved".freeze
UNAVAILABLE = "unavailable".freeze
end
end
24 changes: 12 additions & 12 deletions spec/unit/identifier_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ module Ezid

describe "#delete" do
context "when the identifier is reserved" do
subject { described_class.new(id: "id", status: Identifier::RESERVED) }
subject { described_class.new(id: "id", status: Status::RESERVED) }
context "and is persisted" do
before { allow(subject).to receive(:persisted?) { true } }
it "deletes the identifier" do
Expand All @@ -136,7 +136,7 @@ module Ezid
end
end
context "when identifier is not reserved" do
subject { described_class.new(id: "id", status: Identifier::PUBLIC) }
subject { described_class.new(id: "id", status: Status::PUBLIC) }
it "raises an exception" do
expect { subject.delete }.to raise_error(Error)
end
Expand Down Expand Up @@ -193,7 +193,7 @@ module Ezid
it { is_expected.not_to be_unavailable }
end
context "when the identifier is reserved" do
before { subject.status = Identifier::RESERVED }
before { subject.status = Status::RESERVED }
it { is_expected.not_to be_public }
it { is_expected.to be_reserved }
it { is_expected.not_to be_unavailable }
Expand All @@ -218,7 +218,7 @@ module Ezid
subject { described_class.new(id: "id", status: status) }
describe "#unavailable!" do
context "when the status is \"unavailable\"" do
let(:status) { "#{Identifier::UNAVAILABLE} | whatever" }
let(:status) { "#{Status::UNAVAILABLE} | whatever" }
context "and no reason is given" do
it "logs a warning" do
pending "https://github.com/duke-libraries/ezid-client/issues/46"
Expand All @@ -238,12 +238,12 @@ module Ezid
subject.unavailable!("because")
end
it "should change the status" do
expect { subject.unavailable!("because") }.to change(subject, :status).from(status).to("#{Identifier::UNAVAILABLE} | because")
expect { subject.unavailable!("because") }.to change(subject, :status).from(status).to("#{Status::UNAVAILABLE} | because")
end
end
end
context "when the status is \"reserved\"" do
let(:status) { Identifier::RESERVED }
let(:status) { Status::RESERVED }
context "and persisted" do
before { allow(subject).to receive(:persisted?) { true } }
it "raises an exception" do
Expand All @@ -253,28 +253,28 @@ module Ezid
context "and not persisted" do
before { allow(subject).to receive(:persisted?) { false } }
it "changes the status" do
expect { subject.unavailable! }.to change(subject, :status).from(Identifier::RESERVED).to(Identifier::UNAVAILABLE)
expect { subject.unavailable! }.to change(subject, :status).from(Status::RESERVED).to(Status::UNAVAILABLE)
end
end
end
context "when the status is \"public\"" do
let(:status) { Identifier::PUBLIC }
let(:status) { Status::PUBLIC }
context "and no reason is given" do
it "changes the status" do
expect { subject.unavailable! }.to change(subject, :status).from(Identifier::PUBLIC).to(Identifier::UNAVAILABLE)
expect { subject.unavailable! }.to change(subject, :status).from(Status::PUBLIC).to(Status::UNAVAILABLE)
end
end
context "and a reason is given" do
it "changes the status and appends the reason" do
expect { subject.unavailable!("withdrawn") }.to change(subject, :status).from(Identifier::PUBLIC).to("#{Identifier::UNAVAILABLE} | withdrawn")
expect { subject.unavailable!("withdrawn") }.to change(subject, :status).from(Status::PUBLIC).to("#{Status::UNAVAILABLE} | withdrawn")
end
end
end
end
describe "#public!" do
subject { described_class.new(id: "id", status: Identifier::UNAVAILABLE) }
subject { described_class.new(id: "id", status: Status::UNAVAILABLE) }
it "changes the status" do
expect { subject.public! }.to change(subject, :status).from(Identifier::UNAVAILABLE).to(Identifier::PUBLIC)
expect { subject.public! }.to change(subject, :status).from(Status::UNAVAILABLE).to(Status::PUBLIC)
end
end
end
Expand Down

0 comments on commit 164d16d

Please sign in to comment.