Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug: Fixes handing large number data files in Metadata Editor #2586

Merged
merged 6 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 147 additions & 51 deletions src/js/collections/DataPackage.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"use strict";
"use strict";

define([
"jquery",
Expand Down Expand Up @@ -433,6 +433,145 @@ define([
}
},

/**
* Fetches member models in batches to avoid fetching all members simultaneously.
* @param {Array} models - The array of member models to fetch.
* @param {number} [index] - The current index of the model being fetched.
* @param {number} [batchSize] - The number of models to fetch in each batch.
* @param {number} [timeout] - The timeout for each fetch request in milliseconds.
* @param {number} [maxRetries] - The maximum number of retries for each fetch request.
* @since 0.0.0
*/
fetchMemberModels(
vchendrix marked this conversation as resolved.
Show resolved Hide resolved
models,
index = 0,
batchSize = 10,
timeout = 5000,
maxRetries = 3,
) {
// Update the number of file metadata items being loaded
this.packageModel.set("numLoadingFileMetadata", models.length - index);

// If the index is greater than or equal to the length of the models array, stop fetching
if (index >= models.length) {
this.triggerComplete();
vchendrix marked this conversation as resolved.
Show resolved Hide resolved
return;
}

// If batchSize is 0, set it to the total number of models
let batchSizeAdjust = batchSize;
if (batchSizeAdjust === 0 && index === 0)
batchSizeAdjust = models.length;

const collection = this;
// Slice the models array to get the current batch
const batch = models.slice(index, index + batchSizeAdjust);

// Create an array of promises for fetching each model in the batch
const fetchPromises = batch.map(
(memberModel) =>
new Promise((resolve, reject) => {
const attemptFetch = (retriesLeft) => {
// Create a promise for the fetch request
const fetchPromise = new Promise(
(fetchResolve, fetchReject) => {
memberModel.fetch({
success: () => {
// Once the model is synced, handle the response
memberModel.once("sync", (oldModel) => {
const newModel = collection.getMember(oldModel);

// If the type of the old model is different from the new model
if (oldModel.type !== newModel.type) {
if (newModel.type === "DataPackage") {
// If the new model is a DataPackage, replace the old model with the new one
oldModel.trigger("replace", newModel);
fetchResolve();
} else {
// Otherwise, fetch the new model and replace the old model with the new one
newModel.set("synced", false);
newModel.fetch();
newModel.once("sync", (fetchedModel) => {
fetchedModel.set("synced", true);
collection.remove(oldModel);
collection.add(fetchedModel);
oldModel.trigger("replace", newModel);
if (newModel.type === "EML")
collection.trigger("add:EML");
fetchResolve();
});
}
} else {
// If the type of the old model is the same as the new model, merge the new model into the collection
newModel.set("synced", true);
collection.add(newModel, { merge: true });
if (newModel.type === "EML")
collection.trigger("add:EML");
fetchResolve();
}
});
},
error: (model, response) =>
fetchReject(new Error(response.statusText)),
});
},
);

// Create a promise for the timeout
const timeoutPromise = new Promise((__, timeoutReject) => {
setTimeout(
() => timeoutReject(new Error("Fetch timed out")),
timeout,
);
});

// Race the fetch promise against the timeout promise
Promise.race([fetchPromise, timeoutPromise])
.then(resolve)
.catch((error) => {
if (retriesLeft > 0) {
// Retry the fetch if there are retries left
attemptFetch(retriesLeft - 1);
} else {
// Reject the promise if all retries are exhausted
reject(error);
}
});
};

// Start the fetch attempt with the maximum number of retries
attemptFetch(maxRetries);
}),
);

// Once all fetch promises are resolved, fetch the next batch
Promise.allSettled(fetchPromises)
.then((results) => {
const errors = results.filter(
(result) => result.status === "rejected",
);
if (errors.length > 0) {
/* eslint-disable */
console.error("Error fetching member models:", errors);
/* eslint-enable */
}
// Fetch the next batch of models
this.fetchMemberModels.call(
collection,
models,
index + batchSizeAdjust,
batchSizeAdjust,
timeout,
maxRetries,
);
})
.catch((error) => {
/* eslint-disable */
console.error("Error fetching member models:", error);
/* eslint-enable */
});
},

/**
* Overload fetch calls for a DataPackage
* @param {object} [options] - Optional options for this fetch that get
Expand Down Expand Up @@ -711,56 +850,13 @@ define([
// Don't fetch each member model if the fetchModels property on this
// Collection is set to false
if (this.fetchModels !== false) {
// Add the models to the collection now, silently this.add(models,
// {silent: true});

// Retrieve the model for each member
const collection = this;
models.forEach((model) => {
model.fetch();
model.once("sync", (oldModel) => {
// Get the right model type based on the model values
const newModel = collection.getMember(oldModel);

// If the model type has changed, then mark the model as
// unsynced, since there may be custom fetch() options for the
// new model
if (oldModel.type !== newModel.type) {
// DataPackages shouldn't be fetched until we support nested
// packages better in the UI
if (newModel.type === "DataPackage") {
// Trigger a replace event so other parts of the app know
// when a model has been replaced with a different type
oldModel.trigger("replace", newModel);
} else {
newModel.set("synced", false);

newModel.fetch();
newModel.once("sync", (fetchedModel) => {
fetchedModel.set("synced", true);

// Remove the model from the collection and add it back
collection.remove(oldModel);
collection.add(fetchedModel);

// Trigger a replace event so other parts of the app know
// when a model has been replaced with a different type
oldModel.trigger("replace", newModel);

if (newModel.type === "EML")
collection.trigger("add:EML");
});
}
} else {
newModel.set("synced", true);
collection.add(newModel, {
merge: true,
});

if (newModel.type === "EML") collection.trigger("add:EML");
}
});
});
// Start fetching member models
this.fetchMemberModels.call(
this,
models,
0,
MetacatUI.appModel.get("batchSizeFetch"),
);
}
} catch (error) {
// TODO: Handle the error
Expand Down
34 changes: 34 additions & 0 deletions src/js/models/AppModel.js
Original file line number Diff line number Diff line change
Expand Up @@ -2433,6 +2433,40 @@ define(["jquery", "underscore", "backbone"], function ($, _, Backbone) {
* @example application%2Fbagit-097
*/
packageFormat: "application%2Fbagit-1.0",
/**
* Whether to batch fetch requests to the DataONE API. This is an experimental feature
* and should be used with caution. If set to a number greater than 0, MetacatUI will
* batch requests to the DataONE API and send them in groups of this size. This can
* improve performance when making many requests to the DataONE API, but can also
* cause issues if the requests are too large or if the DataONE API is not able to
* handle the batched requests.
*
* Currently, this feature is only used in the DataPackageModel when fetching the
* list of DataONE member models.
*
* @type {number}
* @default 0
* @example 20
vchendrix marked this conversation as resolved.
Show resolved Hide resolved
* @since 0.0.0
*/
batchSizeFetch: 0,
/**
* Whether to batch uploads to the DataONE API. This is an experimental feature
* and should be used with caution. If set to a number greater than 0, MetacatUI will
* batch uploads to the DataONE API and send them in groups of this size. This can
* improve performance when uploading many files to the DataONE API, but can also
* cause issues if the requests are too large or if the DataONE API is not able to
* handle the batched requests.
*
* Currently, this feature is only used in the DataPackageModel when uploading files
* to the DataONE API.
*
* @type {number}
* @default 0
* @example 20
vchendrix marked this conversation as resolved.
Show resolved Hide resolved
* @since 0.0.0
*/
batchSizeUpload: 0,
},
MetacatUI.AppConfig,
),
Expand Down
Loading
Loading