Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache DataStub data #448

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions +io/parseDataset.m
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

% loading h5t references are required
% unfortunately also a bottleneck
isIdCached = false;
if strcmp(datatype.Class, 'H5T_REFERENCE')
tid = H5D.get_type(did);
data = io.parseReference(did, tid, H5D.read(did));
Expand Down Expand Up @@ -56,7 +57,8 @@
elseif any(dataspace.Size == 0)
data = [];
else
data = types.untyped.DataStub(filename, fullpath);
isIdCached = true;
data = types.untyped.DataStub('fileId', fid, 'datasetId', did);
end
H5T.close(tid);
H5P.close(pid);
Expand All @@ -71,6 +73,9 @@
kwargs = io.map2kwargs(props);
parsed = eval([Type.typename '(kwargs{:})']);
end
H5D.close(did);
H5F.close(fid);

if ~isIdCached
H5D.close(did);
H5F.close(fid);
end
end
2 changes: 1 addition & 1 deletion +types/+untyped/+datapipe/BoundPipe.m
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import types.untyped.datapipe.Configuration;
import types.untyped.datapipe.properties.*;

obj.stub = types.untyped.DataStub(filename, path);
obj.stub = types.untyped.DataStub('filename', filename, 'path', path);

sid = obj.stub.get_space();
[~, h5_dims, h5_maxdims] = H5S.get_simple_extent_dims(sid);
Expand Down
113 changes: 77 additions & 36 deletions +types/+untyped/DataStub.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,82 @@
% This class is sealed due to special subsref behavior breaking nargout
% expectations for most properties/methods.

properties (Access = private)
fileId;
datasetId;
end

properties (SetAccess = protected)
filename;
path;
end

properties (Dependent, SetAccess = private)
filename;
dims;
ndims;
dataType;
end

methods
function obj = DataStub(filename, path)
obj.filename = filename;
obj.path = path;
function obj = DataStub(varargin)
p = inputParser;
p.addParameter('filename', '', ...
@(f)validateattributes(f, {'string', 'char'}, {'scalartext'}));
p.addParameter('path', '', ...
@(f)validateattributes(f, {'string', 'char'}, {'scalartext'}));
p.addParameter('fileId', []);
p.addParameter('datasetId', []);
p.parse(varargin{:});

isFileIdSet = ~any(strcmp(p.UsingDefaults, 'fileId'));
isFileNameSet = ~any(strcmp(p.UsingDefaults, 'filename'));
if isFileIdSet
obj.fileId = p.Results.fileId;
end

if isFileNameSet
if isFileIdSet
warning('MatNwb:DataStub:IgnoredOptionalArgument', ...
'File ID was already set. Ignoring File name.');
else
obj.fileId = H5F.open(p.Results.filename);
end
end

assert(xor(isFileNameSet, isFileIdSet), ...
'MatNwb:DataStub:MissingRequiredArgument', ...
'Creating a DataStub requires either a file name or a file id.');

isDatasetIdSet = ~any(strcmp(p.UsingDefaults, 'datasetId'));
isPathSet = ~any(strcmp(p.UsingDefaults, 'path'));
if isDatasetIdSet
obj.datasetId = p.Results.datasetId;
obj.path = H5I.get_name(obj.datasetId);
end

if isPathSet
if isDatasetIdSet
warning('MatNwb:DataStub:IgnoredOptionalArguments', ...
['The Dataset ID has already been set. The `path` ' ...
'argument will be overwritten.']);
else
obj.path = p.Results.path;
obj.datasetId = H5D.open(obj.fileId, obj.path);
end
end
end

function delete(obj)
if ~isempty(obj.datasetId)
H5D.close(obj.datasetId);
end
if ~isempty(obj.fileId)
H5F.close(obj.fileId);
end
end

function sid = get_space(obj)
fid = H5F.open(obj.filename);
did = H5D.open(fid, obj.path);
sid = H5D.get_space(did);
H5D.close(did);
H5F.close(fid);
sid = H5D.get_space(obj.datasetId);
end

function dims = get.dims(obj)
Expand All @@ -34,18 +87,18 @@
dims = fliplr(h5_dims);
H5S.close(sid);
end

function fnm = get.filename(obj)
fnm = H5F.get_name(obj.fileId);
end

function nd = get.ndims(obj)
nd = length(obj.dims);
end

function matType = get.dataType(obj)
fid = H5F.open(obj.filename);
did = H5D.open(fid, obj.path);
tid = H5D.get_type(did);
tid = H5D.get_type(obj.datasetId);
matType = io.getMatType(tid);
H5D.close(did);
H5F.close(fid);
end

%can be called without arg, with H5ML.id, or (dims, offset, stride)
Expand Down Expand Up @@ -75,15 +128,11 @@
end

if isstruct(data)
fid = H5F.open(obj.filename);
did = H5D.open(fid, obj.path);
fsid = H5D.get_space(did);
data = H5D.read(did, 'H5ML_DEFAULT', fsid, fsid,...
fsid = H5D.get_space(obj.datasetId);
data = H5D.read(obj.datasetId, 'H5ML_DEFAULT', fsid, fsid,...
'H5P_DEFAULT');
data = io.parseCompound(did, data);
data = io.parseCompound(obj.datasetId, data);
H5S.close(fsid);
H5D.close(did);
H5F.close(fid);
end
end

Expand Down Expand Up @@ -167,11 +216,7 @@
H5S.close(sid);

% read data.
fid = H5F.open(obj.filename);
did = H5D.open(fid, obj.path);
data = H5D.read(did, 'H5ML_DEFAULT', memSid, readSid, 'H5P_DEFAULT');
H5D.close(did);
H5F.close(fid);
data = H5D.read(obj.datasetId, 'H5ML_DEFAULT', memSid, readSid, 'H5P_DEFAULT');
H5S.close(memSid);

expectedSize = dims; %#ok<PROPLC>
Expand Down Expand Up @@ -274,17 +319,15 @@

function refs = export(obj, fid, fullpath, refs)
%Check for compound data type refs
src_fid = H5F.open(obj.filename);
% if filenames are the same, then do nothing
src_filename = H5F.get_name(src_fid);
src_filename = H5F.get_name(obj.fileId);
dest_filename = H5F.get_name(fid);
if strcmp(src_filename, dest_filename)
return;
end

src_did = H5D.open(src_fid, obj.path);
src_tid = H5D.get_type(src_did);
src_sid = H5D.get_space(src_did);
src_tid = H5D.get_type(obj.datasetId);
src_sid = H5D.get_space(obj.datasetId);
ref_i = false;
char_i = false;
member_name = {};
Expand Down Expand Up @@ -316,11 +359,11 @@
%Due to this HDF5 library's inability to delete/update
%dataset data, this is unfortunately required.
ref_tid = ref_tid(~cellfun('isempty', ref_tid));
data = H5D.read(src_did);
data = H5D.read(obj.datasetId);

refNames = member_name(ref_i);
for i=1:length(refNames)
data.(refNames{i}) = io.parseReference(src_did, ref_tid{i}, ...
data.(refNames{i}) = io.parseReference(obj.datasetId, ref_tid{i}, ...
data.(refNames{i}));
end

Expand All @@ -335,14 +378,12 @@
%copy data over and return destination
ocpl = H5P.create('H5P_OBJECT_COPY');
lcpl = H5P.create('H5P_LINK_CREATE');
H5O.copy(src_fid, obj.path, fid, fullpath, ocpl, lcpl);
H5O.copy(obj.fileId, obj.path, fid, fullpath, ocpl, lcpl);
H5P.close(ocpl);
H5P.close(lcpl);
end
H5T.close(src_tid);
H5S.close(src_sid);
H5D.close(src_did);
H5F.close(src_fid);
end

function B = subsref(obj, S)
Expand Down
4 changes: 3 additions & 1 deletion +types/+untyped/ExternalLink.m
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@
if is_typed || strcmp(LinkedInfo.Datatype.Class, 'H5T_REFERENCE')
data = io.parseDataset(Link.filename, LinkedInfo, Link.path);
else
data = types.untyped.DataStub(Link.filename, Link.path);
data = types.untyped.DataStub( ...
'filename', Link.filename, ...
'path', Link.path);
end
elseif is_group
assert(is_typed,...
Expand Down