Skip to content

Commit

Permalink
Merge pull request #46 from VEuPathDB/dataset-upload-fix
Browse files Browse the repository at this point in the history
Dataset upload fix
ryanrdoherty authored Aug 15, 2022
2 parents 53d7291 + 7968d3a commit 755c96f
Showing 14 changed files with 325 additions and 207 deletions.
88 changes: 16 additions & 72 deletions Model/src/main/java/org/gusdb/wdk/model/answer/AnswerValue.java
Original file line number Diff line number Diff line change
@@ -51,6 +51,9 @@
import org.gusdb.wdk.model.query.spec.QueryInstanceSpec;
import org.gusdb.wdk.model.question.Question;
import org.gusdb.wdk.model.record.Field;
import org.gusdb.wdk.model.record.PrimaryKeyDefinition;
import org.gusdb.wdk.model.record.PrimaryKeyIterator;
import org.gusdb.wdk.model.record.ResultSetPrimaryKeyIterator;
import org.gusdb.wdk.model.record.RecordClass;
import org.gusdb.wdk.model.record.RecordInstance;
import org.gusdb.wdk.model.record.TableField;
@@ -897,70 +900,31 @@ public void setSortingMap(Map<String, Boolean> sortingMap) {
_sortedIdSql = null;
}

/**
* This method is redundant with getAllIds(), consider deprecate either one of them.
*
* @return returns a list of all primary key values.
*/
public Object[][] getPrimaryKeyValues() throws WdkModelException {
String[] columns = _answerSpec.getQuestion().getRecordClass().getPrimaryKeyDefinition().getColumnRefs();
List<Object[]> buffer = new ArrayList<>();

Optional<AnswerFilterInstance> legacyFilter = _answerSpec.getLegacyFilter();
try (ResultList resultList =
legacyFilter.isPresent() ?
legacyFilter.get().getResults(this) :
_idsQueryInstance.getResults()) {
while (resultList.next()) {
Object[] pkValues = new String[columns.length];
for (int columnIndex = 0; columnIndex < columns.length; columnIndex++) {
pkValues[columnIndex] = resultList.get(columns[columnIndex]);
}
buffer.add(pkValues);
}
Object[][] ids = new String[buffer.size()][columns.length];
buffer.toArray(ids);
return ids;
}
}

private void reset() {
_sortedIdSql = null;
_checksum = null;
_resultSizeFactory.clear();
}

/**
* Get a list of all the primary key tuples of all the records in the answer. It is a shortcut of iterating
* through all the pages and get the primary keys.
* Creates a closable iterator of IDs for this answer
*
* NOTE! caller must close the return value to avoid resource leaks.
*
* This method is redundant with getPrimaryKeyValues(), consider deprecate either one of them.
* @return an iterator of all the primary key tuples of all the records in the answer
* @throws WdkModelException if unable to execute ID query
*/
public List<String[]> getAllIds() throws WdkModelException {
String idSql = getSortedIdSql();
String[] pkColumns = _answerSpec.getQuestion().getRecordClass().getPrimaryKeyDefinition().getColumnRefs();
List<String[]> pkValues = new ArrayList<>();
WdkModel wdkModel = _answerSpec.getQuestion().getWdkModel();
DataSource dataSource = wdkModel.getAppDb().getDataSource();
ResultSet resultSet = null;
public PrimaryKeyIterator getAllIds() throws WdkModelException {
try {
resultSet = SqlUtils.executeQuery(dataSource, idSql, _idsQueryInstance.getQuery().getFullName() + "__all-ids");
while (resultSet.next()) {
String[] values = new String[pkColumns.length];
for (int i = 0; i < pkColumns.length; i++) {
Object value = resultSet.getObject(pkColumns[i]);
values[i] = (value == null) ? null : value.toString();
}
pkValues.add(values);
}
}
catch (SQLException ex) {
throw new WdkModelException(ex);
PrimaryKeyDefinition pkDef = _answerSpec.getQuestion().getRecordClass().getPrimaryKeyDefinition();
DataSource dataSource = _wdkModel.getAppDb().getDataSource();
String idSql = getSortedIdSql();
String queryDescriptor = _idsQueryInstance.getQuery().getFullName() + "__all-ids";
return new ResultSetPrimaryKeyIterator(pkDef, SqlUtils.executeQuery(dataSource, idSql, queryDescriptor));
}
finally {
SqlUtils.closeResultSetAndStatement(resultSet, null);
catch (SQLException e) {
throw new WdkModelException("Unable to execute ID query", e);
}
return pkValues;
}

public void setPageIndex(int startIndex, int endIndex) {
@@ -988,26 +952,6 @@ public JSONObject getFilterSummaryJson(String filterName) throws WdkUserExceptio
}
}

/**
* Returns one big string containing all IDs in this answer value's result in
* the following format: each '\n'-delimited line contains one record, whose
* primary keys are joined and delimited by a comma.
*
* @return list of all record IDs
*/
public String getAllIdsAsString() throws WdkModelException {
List<String[]> pkValues = getAllIds();
StringBuilder buffer = new StringBuilder();
for (String[] pkValue : pkValues) {
if (buffer.length() > 0) buffer.append("\n");
for (int i = 0; i < pkValue.length; i++) {
if (i > 0) buffer.append(", ");
buffer.append(pkValue[i]);
}
}
return buffer.toString();
}

private final static String ID_QUERY_HANDLE = "pidq";
private final static String QUERY_HANDLE = "inq";

Original file line number Diff line number Diff line change
@@ -4,12 +4,11 @@
import static org.gusdb.fgputil.functional.Functions.mapToList;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

import org.gusdb.fgputil.EncryptionUtil;
import org.gusdb.fgputil.FormatUtil;
import org.gusdb.fgputil.ListBuilder;
import org.gusdb.fgputil.MapBuilder;
import org.gusdb.fgputil.db.platform.DBPlatform;
import org.gusdb.fgputil.validation.ValidObjectFactory.RunnableObj;
@@ -20,6 +19,7 @@
import org.gusdb.wdk.model.answer.ResultSizeFactory;
import org.gusdb.wdk.model.answer.spec.AnswerSpec;
import org.gusdb.wdk.model.record.DynamicRecordInstance;
import org.gusdb.wdk.model.record.PrimaryKeyIterator;
import org.gusdb.wdk.model.record.RecordClass;
import org.gusdb.wdk.model.record.RecordInstance;
import org.gusdb.wdk.model.user.User;
@@ -98,15 +98,35 @@ public String getChecksum() throws WdkModelException {
}

@Override
public List<String[]> getAllIds() throws WdkModelException {
public PrimaryKeyIterator getAllIds() throws WdkModelException {
String[] pkArray = new String[_pkMap.size()];
String[] pkColNames = _recordClass.getPrimaryKeyDefinition().getColumnRefs();
if (pkArray.length != pkColNames.length)
throw new WdkModelException("Incoming primary key array does not match recordclass PK column ref array");
for (int i = 0; i < pkColNames.length; i++) {
pkArray[i] = (String)_pkMap.get(pkColNames[i]);
}
return new ListBuilder<String[]>().add(pkArray).toList();
return new PrimaryKeyIterator() {

private boolean valueReturned = false;

@Override
public boolean hasNext() {
return !valueReturned;
}

@Override
public String[] next() {
if (valueReturned) throw new NoSuchElementException();
valueReturned = true;
return pkArray;
}

@Override
public void close() {
// nothing to do here
}
};
}

@Override
Original file line number Diff line number Diff line change
@@ -7,8 +7,13 @@
import java.io.Reader;

public abstract class DatasetContents {

private static final int BUF_SIZE = 8192;

// constants used to estimate number of records
public static final int ESTIMATED_CHARS_PER_ID = 10;
public static final int ESTIMATED_BYTES_PER_ID = 15;

protected final String fileName;

protected DatasetContents(String fileName) {
@@ -19,6 +24,8 @@ public String getUploadFileName() {
return fileName;
}

public abstract long getEstimatedRowCount();

@SuppressWarnings("ThrowFromFinallyBlock")
public String truncate(final int len) throws WdkModelException {
Reader reader = null;
Original file line number Diff line number Diff line change
@@ -13,7 +13,9 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

@@ -296,7 +298,7 @@ private Dataset createOrGetDataset(
);

insertDatasetValues(connection, datasetId, parser.iterator(content),
parser.datasetContentWidth(content));
parser.datasetContentWidth(content), content.getEstimatedRowCount());
connection.commit();

// create and insert user dataset.
@@ -533,24 +535,33 @@ private void insertDatasetValues(
final Connection connection,
final long datasetId,
final DatasetIterator data,
final int length
final int numDataColumns,
final long estimatedRowCount
) throws SQLException, WdkModelException, WdkUserException {
String sql = buildDatasetValuesInsertQuery(length);
String sql = buildDatasetValuesInsertQuery(numDataColumns);
LOG.info("Built the following insert SQL: " + sql);
int idAllocationBatchSize = calculateIdAllocationBatchSize(estimatedRowCount);
Queue<Long> datasetValueIdQueue = new LinkedList<>();
try (PreparedStatement psInsert = connection.prepareStatement(sql)) {
long batchRow = 0;
long rowOrderNumber = 1;
while (data.hasNext()) {
String[] value = data.next();

// get a new value id.
long datasetValueId = _userDb.getPlatform()
.getNextId(_userDb.getDataSource(), _userSchema, TABLE_DATASET_VALUES);
// get a new value id
if (datasetValueIdQueue.isEmpty()) {
datasetValueIdQueue.addAll(
_userDb.getPlatform().getNextNIds(
_userDb.getDataSource(),
_userSchema,
TABLE_DATASET_VALUES,
idAllocationBatchSize));
}

psInsert.setLong(1, datasetValueId);
psInsert.setLong(1, datasetValueIdQueue.poll());
psInsert.setLong(2, datasetId);
psInsert.setLong(3, rowOrderNumber);
for (int j = 0; j < length; j++) {
for (int j = 0; j < numDataColumns; j++) {
psInsert.setString(j + 4, value[j]);
}
psInsert.addBatch();
@@ -567,6 +578,17 @@ private void insertDatasetValues(
}
}

private int calculateIdAllocationBatchSize(long estimatedRowCount) {
// (0,10] = 1
if (estimatedRowCount <= 10) return 1;
// (10,100] = 10
if (estimatedRowCount <= 100) return 10;
// (100,1000] = 25
if (estimatedRowCount <= 1000) return 25;
// (1000,Inf) = 250
return 250;
}

private void validateValue(final String[] row) throws WdkUserException {
// check the number of columns
if (row.length > MAX_VALUE_COLUMNS) {
Original file line number Diff line number Diff line change
@@ -33,14 +33,28 @@ public class DatasetFileContents extends DatasetContents {
*/
private String checksum;

/**
* Number of records expected in this dataset file
* (some files are written knowing how many records are contained within)
*/
private final Long numRecords;

public DatasetFileContents(
final String fileName,
final File contents
) {
this(fileName, contents, null);
}

public DatasetFileContents(
final String fileName,
final File contents,
final Long numRecords) {
super(fileName);
LOG.info("Created new DatasetFileContents object pointing at file: " + contents.getAbsolutePath());
this.contents = contents;
this.owned = false;
this.numRecords = numRecords;
}

DatasetFileContents(
@@ -63,6 +77,7 @@ public DatasetFileContents(
tmp.deleteOnExit();
this.owned = true;
this.contents = tmp;
this.numRecords = null;
}

/**
@@ -126,4 +141,11 @@ private static String genChecksum(final File file) {
throw new WdkRuntimeException(e);
}
}

@Override
public long getEstimatedRowCount() {
return numRecords != null
? numRecords
: (contents.length() / ESTIMATED_BYTES_PER_ID) + 1; // round up
}
}
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
import java.util.List;

public class DatasetListContents extends DatasetContents {

private final List<String> idList;
private String checksum;

@@ -113,4 +114,9 @@ private void inc() {
done = true;
}
}

@Override
public long getEstimatedRowCount() {
return idList.size();
}
}
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
import org.gusdb.fgputil.EncryptionUtil;

public class DatasetStringContents extends DatasetContents {

private final String contents;

public DatasetStringContents(final String fileName, final String contents) {
@@ -22,4 +23,9 @@ public String getChecksum() {
public Reader getContentReader() {
return new StringReader(contents);
}

@Override
public long getEstimatedRowCount() {
return (contents.length() / ESTIMATED_CHARS_PER_ID) + 1; // round up
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.gusdb.wdk.model.record;

import java.util.Iterator;

public interface PrimaryKeyIterator extends Iterator<String[]>, AutoCloseable {

// no additional methods

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.gusdb.wdk.model.record;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Optional;

import org.gusdb.fgputil.db.stream.ResultSetIterator;

public class ResultSetPrimaryKeyIterator extends ResultSetIterator<String[]> implements PrimaryKeyIterator {

private static class PrimaryKeyRowConverter implements RowConverter<String[]> {

private final String[] _pkColumns;

public PrimaryKeyRowConverter(PrimaryKeyDefinition pkDef) {
_pkColumns = pkDef.getColumnRefs();
}

@Override
public Optional<String[]> convert(ResultSet rs) throws SQLException {
String[] values = new String[_pkColumns.length];
for (int i = 0; i < _pkColumns.length; i++) {
Object value = rs.getObject(_pkColumns[i]);
values[i] = (value == null) ? null : value.toString();
}
return Optional.of(values);
}
}

public ResultSetPrimaryKeyIterator(PrimaryKeyDefinition pkDef, ResultSet rs) {
super(rs, new PrimaryKeyRowConverter(pkDef));
}

}
Original file line number Diff line number Diff line change
@@ -85,11 +85,6 @@ public void addEntireResultToBasket(User user, RunnableObj<AnswerSpec> spec) thr
}
}

public void removeEntireResultFromBasket(User user, RunnableObj<AnswerSpec> spec) throws WdkModelException {
List<String[]> pkValues = AnswerValueFactory.makeAnswer(user, spec).getAllIds();
removeFromBasket(user, spec.get().getQuestion().getRecordClass(), pkValues);
}

public void addPksToBasket(User user, RecordClass recordClass, Collection<PrimaryKeyValue> recordsToAdd) throws WdkModelException {
addToBasket(user, recordClass, recordsToAdd.size(), new PrimaryKeyRecordStream(user, recordClass, recordsToAdd));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.gusdb.wdk.service.request.user.dataset;

import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import org.gusdb.fgputil.functional.Functions;
import org.gusdb.fgputil.json.JsonType;
import org.gusdb.fgputil.json.JsonUtil;
import org.gusdb.wdk.core.api.JsonKeys;
import org.gusdb.wdk.service.request.exception.RequestMisformatException;
import org.json.JSONObject;

public class DatasetRequest {

private final DatasetSourceType _sourceType;
private final JsonType _configValue;
private final Optional<String> _displayName;
private final Map<String,JsonType> _additionalConfig;

public DatasetRequest(JSONObject input) throws RequestMisformatException {
_sourceType = DatasetSourceType.getFromTypeIndicator(input.getString(JsonKeys.SOURCE_TYPE));
JSONObject sourceContent = input.getJSONObject(JsonKeys.SOURCE_CONTENT);
_configValue = new JsonType(sourceContent.get(_sourceType.getConfigJsonKey()));
if (!_configValue.getType().equals(_sourceType.getConfigType())) {
throw new RequestMisformatException("Value of '" +
_sourceType.getConfigJsonKey() + "' must be a " + _sourceType.getConfigType());
}
_additionalConfig = Functions.getMapFromKeys(
JsonUtil.getKeys(sourceContent).stream()
.filter(key -> !key.equals(_sourceType.getConfigJsonKey()))
.collect(Collectors.toSet()),
key -> new JsonType(sourceContent.get(key)));
_displayName = Optional.ofNullable(JsonUtil.getStringOrDefault(input, JsonKeys.DISPLAY_NAME, null));
}

public DatasetSourceType getSourceType() { return _sourceType; }
public JsonType getConfigValue() { return _configValue; }
public Optional<String> getDisplayName() { return _displayName; }
public Map<String,JsonType> getAdditionalConfig() { return _additionalConfig; }

}
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
package org.gusdb.wdk.service.request.user;
package org.gusdb.wdk.service.request.user.dataset;

import static org.gusdb.fgputil.FormatUtil.join;
import static org.gusdb.fgputil.json.JsonIterators.arrayStream;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Iterator;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.ws.rs.core.MediaType;

import org.apache.log4j.Logger;
import org.gusdb.fgputil.FormatUtil;
import org.gusdb.fgputil.client.ClientUtil;
import org.gusdb.fgputil.functional.Functions;
import org.gusdb.fgputil.iterator.IteratorUtil;
import org.gusdb.fgputil.json.JsonType;
import org.gusdb.fgputil.json.JsonType.ValueType;
import org.gusdb.fgputil.json.JsonUtil;
import org.gusdb.fgputil.validation.ValidObjectFactory.RunnableObj;
import org.gusdb.fgputil.validation.ValidationLevel;
import org.gusdb.fgputil.web.SessionProxy;
@@ -45,89 +43,22 @@
import org.gusdb.wdk.model.query.param.Param;
import org.gusdb.wdk.model.query.spec.ParameterContainerInstanceSpecBuilder.FillStrategy;
import org.gusdb.wdk.model.question.Question;
import org.gusdb.wdk.model.record.PrimaryKeyIterator;
import org.gusdb.wdk.model.record.PrimaryKeyValue;
import org.gusdb.wdk.model.record.RecordClass;
import org.gusdb.wdk.model.record.RecordInstance;
import org.gusdb.wdk.model.user.BasketFactory;
import org.gusdb.wdk.model.user.Strategy;
import org.gusdb.wdk.model.user.User;
import org.gusdb.wdk.service.request.exception.DataValidationException;
import org.gusdb.wdk.service.request.exception.RequestMisformatException;
import org.gusdb.wdk.service.service.TemporaryFileService;
import org.json.JSONArray;
import org.json.JSONObject;

public class DatasetRequestProcessor {

private static Logger LOG = Logger.getLogger(DatasetRequestProcessor.class);

public enum DatasetSourceType {

ID_LIST("idList", "ids", ValueType.ARRAY),
BASKET("basket", "basketName", ValueType.STRING),
FILE("file", "temporaryFileId", ValueType.STRING),
STRATEGY("strategy", JsonKeys.STRATEGY_ID, ValueType.NUMBER),
URL("url", "url", ValueType.STRING);

private final String _typeIndicator;
private final String _configJsonKey;
private final ValueType _configValueType;

DatasetSourceType(String typeIndicator, String configJsonKey, ValueType configValueType) {
_typeIndicator = typeIndicator;
_configJsonKey = configJsonKey;
_configValueType = configValueType;
}

public String getTypeIndicator() {
return _typeIndicator;
}

public String getConfigJsonKey() {
return _configJsonKey;
}

public ValueType getConfigType() {
return _configValueType;
}

public static DatasetSourceType getFromTypeIndicator(String typeIndicator) throws RequestMisformatException {
return Arrays.stream(values())
.filter(val -> val._typeIndicator.equals(typeIndicator))
.findFirst()
.orElseThrow(() -> new RequestMisformatException(
"Invalid source type. Only [" + FormatUtil.join(values(), ", ") + "] allowed."));
}
}

public static class DatasetRequest {

private final DatasetSourceType _sourceType;
private final JsonType _configValue;
private final Optional<String> _displayName;
private final Map<String,JsonType> _additionalConfig;

public DatasetRequest(JSONObject input) throws RequestMisformatException {
_sourceType = DatasetSourceType.getFromTypeIndicator(input.getString(JsonKeys.SOURCE_TYPE));
JSONObject sourceContent = input.getJSONObject(JsonKeys.SOURCE_CONTENT);
_configValue = new JsonType(sourceContent.get(_sourceType.getConfigJsonKey()));
if (!_configValue.getType().equals(_sourceType.getConfigType())) {
throw new RequestMisformatException("Value of '" +
_sourceType.getConfigJsonKey() + "' must be a " + _sourceType.getConfigType());
}
_additionalConfig = Functions.getMapFromKeys(
JsonUtil.getKeys(sourceContent).stream()
.filter(key -> !key.equals(_sourceType.getConfigJsonKey()))
.collect(Collectors.toSet()),
key -> new JsonType(sourceContent.get(key)));
_displayName = Optional.ofNullable(JsonUtil.getStringOrDefault(input, JsonKeys.DISPLAY_NAME, null));
}

public DatasetSourceType getSourceType() { return _sourceType; }
public JsonType getConfigValue() { return _configValue; }
public Optional<String> getDisplayName() { return _displayName; }
public Map<String,JsonType> getAdditionalConfig() { return _additionalConfig; }

}

public static Dataset createFromRequest(
DatasetRequest request,
User user,
@@ -175,52 +106,72 @@ private static Dataset createFromBasket(
final User user,
final DatasetFactory factory
) throws WdkModelException, DataValidationException {
var recordClass = factory.getWdkModel()

RecordClass recordClass = factory.getWdkModel()
.getRecordClassByUrlSegment(recordClassName)
.orElseThrow(() -> new DataValidationException(
"No record class exists with name '" + recordClassName + "'."));

var basketFactory = factory.getWdkModel().getBasketFactory();
var wasEmpty = true;
BasketFactory basketFactory = factory.getWdkModel().getBasketFactory();

try {
var file = Files.createTempFile("dataset-",
"-" + user.getStableId() + "-" + recordClassName).toFile();
long basketSize = basketFactory.getBasketCounts(user).get(recordClass);
if (basketSize == 0)
throw new DataValidationException("Basket '" + recordClassName + "' does "
+ "not contain any records. No dataset can be made.");

file.deleteOnExit();
// write basket records to file (just to parse again :()
File file = null;
try (Stream<RecordInstance> basketStream = basketFactory.getBasket(user, recordClass)) {

try (
var write = new BufferedWriter(new FileWriter(file));
var stream = basketFactory.getBasket(user, recordClass)
) {
var it = stream
Iterator<String[]> recordIterator = basketStream
.map(RecordInstance::getPrimaryKey)
.map(PrimaryKeyValue::getValues)
.map(Map::values)
.map(c -> c.toArray(new String[0]))
.map(a -> join(a, ListDatasetParser.DATASET_COLUMN_DIVIDER))
.iterator();

if (it.hasNext()) {
wasEmpty = false;
while (it.hasNext()) {
write.write(it.next());
write.write('\n');
}
file = createTempFile(user, recordClassName);

write.flush();
}
writeRecordsToFile(file, recordIterator);

return createDataset(user, new ListDatasetParser(),
new DatasetFileContents(null, file, basketSize), factory);
}
catch (IOException e) {
throw new WdkModelException("Could not create dataset from basket", e);
}
finally {
deleteFile(file);
}
}

private static void deleteFile(File file) {
if (file != null) {
try {
Files.delete(file.toPath());
}
catch (IOException e) {
LOG.warn("Unable to delete file after use: " + file.getAbsolutePath());
}
}
}

if (wasEmpty)
throw new DataValidationException("Basket '" + recordClassName + "' does "
+ "not contain any records. No dataset can be made.");
private static File createTempFile(User user, String recordClassName) throws IOException {
return Files.createTempFile(user.getWdkModel().getModelConfig().getWdkTempDir(),
"dataset-", "-" + user.getStableId() + "-" + recordClassName).toFile();
}

return createDataset(user, new ListDatasetParser(),
new DatasetFileContents(null, file), factory);
private static void writeRecordsToFile(File file, Iterator<String[]> rows) throws IOException {

} catch (IOException e) {
throw new WdkModelException(e);
try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) {

for (String[] rowArray : IteratorUtil.toIterable(rows)) {
String row = String.join(ListDatasetParser.DATASET_COLUMN_DIVIDER, rowArray);
writer.write(row);
writer.write('\n');
}

writer.flush();
}
}

@@ -240,20 +191,29 @@ private static Dataset createFromStrategy(
AnswerValue answerValue = AnswerValueFactory.makeAnswer(
Strategy.getRunnableStep(strategy, strategy.get().getRootStepId()).get());

List<String[]> ids = answerValue.getAllIds();

if (ids.isEmpty())
throw new DataValidationException("Strategy '" + strategyId + "' does not"
+ " contain any records. No dataset can be made.");
long resultSize = answerValue.getResultSizeFactory().getResultSize();
if (resultSize == 0)
throw new DataValidationException("Strategy '" + strategyId + "' does "
+ "not contain any records. No dataset can be made.");

return createDataset(user, new ListDatasetParser(),
new DatasetListContents(joinIds(ids)), factory);
}
// write records to file (just to parse again :()
File file = null;
try (PrimaryKeyIterator pkIterator = answerValue.getAllIds()) {

file = createTempFile(user, strategy.get().getRecordClass().get().getUrlSegment());

private static List<String> joinIds(List<String[]> ids) {
return ids.stream()
.map(idArray -> join(idArray, ListDatasetParser.DATASET_COLUMN_DIVIDER))
.collect(Collectors.toList());
writeRecordsToFile(file, pkIterator);

return createDataset(user, new ListDatasetParser(),
new DatasetFileContents(null, file, resultSize), factory);
}
catch (Exception e) {
throw new WdkModelException("Could not create dataset from basket", e);
}
finally {
deleteFile(file);
}
}

private static Dataset createDataset(
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.gusdb.wdk.service.request.user.dataset;

import java.util.Arrays;

import org.gusdb.fgputil.FormatUtil;
import org.gusdb.fgputil.json.JsonType.ValueType;
import org.gusdb.wdk.core.api.JsonKeys;
import org.gusdb.wdk.service.request.exception.RequestMisformatException;

/**
* Contains the possible ways a user can submit a dataset (for use as a dataset param),
* along with how to parse the config JSON for the submission.
*/
public enum DatasetSourceType {

ID_LIST("idList", "ids", ValueType.ARRAY),
BASKET("basket", "basketName", ValueType.STRING),
FILE("file", "temporaryFileId", ValueType.STRING),
STRATEGY("strategy", JsonKeys.STRATEGY_ID, ValueType.NUMBER),
URL("url", "url", ValueType.STRING);

private final String _typeIndicator;
private final String _configJsonKey;
private final ValueType _configValueType;

DatasetSourceType(String typeIndicator, String configJsonKey, ValueType configValueType) {
_typeIndicator = typeIndicator;
_configJsonKey = configJsonKey;
_configValueType = configValueType;
}

public String getTypeIndicator() {
return _typeIndicator;
}

public String getConfigJsonKey() {
return _configJsonKey;
}

public ValueType getConfigType() {
return _configValueType;
}

public static DatasetSourceType getFromTypeIndicator(String typeIndicator) throws RequestMisformatException {
return Arrays.stream(values())
.filter(val -> val._typeIndicator.equals(typeIndicator))
.findFirst()
.orElseThrow(() -> new RequestMisformatException(
"Invalid source type. Only [" + FormatUtil.join(values(), ", ") + "] allowed."));
}
}
Original file line number Diff line number Diff line change
@@ -25,8 +25,8 @@
import org.gusdb.wdk.service.annotation.OutSchema;
import org.gusdb.wdk.service.request.exception.DataValidationException;
import org.gusdb.wdk.service.request.exception.RequestMisformatException;
import org.gusdb.wdk.service.request.user.DatasetRequestProcessor;
import org.gusdb.wdk.service.request.user.DatasetRequestProcessor.DatasetRequest;
import org.gusdb.wdk.service.request.user.dataset.DatasetRequestProcessor;
import org.gusdb.wdk.service.request.user.dataset.DatasetRequest;
import org.json.JSONException;
import org.json.JSONObject;

0 comments on commit 755c96f

Please sign in to comment.