Skip to content

Commit

Permalink
Merge pull request #1539 from libris/feature/lxl-4599
Browse files Browse the repository at this point in the history
Add profile negotiation for EMM dump data.
  • Loading branch information
jannistsiroyannis authored Jan 9, 2025
2 parents e0882c5 + 6e382d4 commit ccfbcfa
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 20 deletions.
66 changes: 47 additions & 19 deletions emm/src/main/java/whelk/Dump.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public class Dump {
private static final int GZIP_BUF_SIZE = 64 * 1024;
private static final String ND_JSON_LD_GZ_EXT = ".ndjsonld.gz";

public static void sendDumpResponse(Whelk whelk, String apiBaseUrl, HttpServletRequest req, HttpServletResponse res) throws IOException, SQLException {
public static void sendDumpResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String apiBaseUrl, HttpServletRequest req, HttpServletResponse res) throws IOException {
String selection = req.getParameter("selection");

if (selection == null) {
Expand All @@ -86,6 +86,16 @@ public static void sendDumpResponse(Whelk whelk, String apiBaseUrl, HttpServletR
return;
}

String profile = req.getParameter("profile"); // May be null, meaning default (kbv)
Document profileDoc = null;
if (profile != null) {
profileDoc = whelk.getStorage().getDocumentByIri(profile);
if (profileDoc == null) {
logger.info("Bad profile requested for EMM dump: {}", profile);
profile = null;
}
}

String tmpDir = System.getProperty("java.io.tmpdir");
Path dumpsPath = Paths.get(tmpDir, "dumps");
Files.createDirectories(dumpsPath);
Expand All @@ -97,10 +107,10 @@ public static void sendDumpResponse(Whelk whelk, String apiBaseUrl, HttpServletR
}

if (isDownload) {
sendDumpDownloadResponse(whelk, dumpFilePath, res);
sendDumpDownloadResponse(whelk, targetVocabMapper, profile, profileDoc, dumpFilePath, res);
} else {
long offsetNumeric = Long.parseLong(offset);
sendDumpPageResponse(whelk, apiBaseUrl, selection, dumpFilePath, offsetNumeric, res);
sendDumpPageResponse(whelk, targetVocabMapper, profile, profileDoc, apiBaseUrl, selection, dumpFilePath, offsetNumeric, res);
}
}

Expand Down Expand Up @@ -149,7 +159,7 @@ private static void sendDumpIndexResponse(String apiBaseUrl, HttpServletResponse
HttpTools.sendResponse(res, responseObject, JSON_CONTENT_TYPE);
}

private static void sendDumpPageResponse(Whelk whelk, String apiBaseUrl, String dump, Path dumpFilePath, long offsetLines, HttpServletResponse res) throws IOException {
private static void sendDumpPageResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, String apiBaseUrl, String dump, Path dumpFilePath, long offsetLines, HttpServletResponse res) throws IOException {
ArrayList<String> recordIdsOnPage = new ArrayList<>(EmmChangeSet.TARGET_HITS_PER_PAGE);
Long totalEntityCount = null;

Expand Down Expand Up @@ -212,10 +222,10 @@ private static void sendDumpPageResponse(Whelk whelk, String apiBaseUrl, String

BasicFileAttributes attributes = Files.readAttributes(dumpFilePath, BasicFileAttributes.class);
Instant dumpCreationTime = attributes.creationTime().toInstant();
sendFormattedResponse(whelk, apiBaseUrl, dump, recordIdsOnPage, res, offsetLines, totalEntityCount, dumpCreationTime);
sendFormattedResponse(whelk, targetVocabMapper, profile, profileDoc, apiBaseUrl, dump, recordIdsOnPage, res, offsetLines, totalEntityCount, dumpCreationTime);
}

private static void sendFormattedResponse(Whelk whelk, String apiBaseUrl, String dump, ArrayList<String> recordIdsOnPage, HttpServletResponse res, long offset, Long totalEntityCount, Instant dumpCreationTime) throws IOException{
private static void sendFormattedResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, String apiBaseUrl, String dump, ArrayList<String> recordIdsOnPage, HttpServletResponse res, long offset, Long totalEntityCount, Instant dumpCreationTime) throws IOException{
var responseObject = new LinkedHashMap<>();

responseObject.put(JsonLd.CONTEXT_KEY, "https://www.w3.org/ns/activitystreams");
Expand All @@ -242,7 +252,12 @@ private static void sendFormattedResponse(Whelk whelk, String apiBaseUrl, String
var items = new ArrayList<>(EmmChangeSet.TARGET_HITS_PER_PAGE);
responseObject.put("items", items);

var contextDoc = contextDoc(whelk);
Document contextDoc = null;
if (profileDoc != null)
contextDoc = profileDoc;
else {
contextDoc = contextDoc(whelk);
}
if (offset == 0) {
items.add(wrapContextDoc(contextDoc));
}
Expand Down Expand Up @@ -272,28 +287,33 @@ private static void sendFormattedResponse(Whelk whelk, String apiBaseUrl, String
itemOfPath.add("@graph"); itemOfPath.add(1); itemOfPath.add("itemOf"); // unggh..
doc._set(itemOfPath, instance.getThing(), doc.data);

items.add(wrapDoc(doc, contextDoc));
items.add(formatDoc(doc, contextDoc, targetVocabMapper, profile, profileDoc));
}
// For normal categories
else {
items.add(wrapDoc(doc, contextDoc));
items.add(formatDoc(doc, contextDoc, targetVocabMapper, profile, profileDoc));
}

}

HttpTools.sendResponse(res, responseObject, JSON_CONTENT_TYPE);
}

private static void sendDumpDownloadResponse(Whelk whelk, Path dumpFilePath, HttpServletResponse res) {
private static void sendDumpDownloadResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, Path dumpFilePath, HttpServletResponse res) {
String filename = Unicode.stripSuffix(dumpFilePath.getFileName().toString(), ".dump") + ND_JSON_LD_GZ_EXT;
res.setHeader("Content-Disposition", "attachment; filename=" + filename);
res.setHeader("Content-Type", "application/octet-stream");

int batchSize = EmmChangeSet.TARGET_HITS_PER_PAGE;
try (GZIPOutputStream os = new GZIPOutputStream(new BufferedOutputStream(res.getOutputStream()), GZIP_BUF_SIZE)) {
res.flushBuffer();

var contextDoc = contextDoc(whelk);

Document contextDoc = null;
if (profileDoc != null)
contextDoc = profileDoc;
else {
contextDoc = contextDoc(whelk);
}
writeJsonLdLine(wrapContextDoc(contextDoc), os);

// Has the dump not begun being written yet ?
Expand Down Expand Up @@ -325,26 +345,26 @@ private static void sendDumpDownloadResponse(Whelk whelk, Path dumpFilePath, Htt
batch.add(line.trim());

if (batch.size() >= batchSize) {
writeJsonLdLines(whelk, batch, contextDoc, os);
writeJsonLdLines(whelk, targetVocabMapper, profile, profileDoc, batch, contextDoc, os);
batch = new ArrayList<>(batchSize);
}
}
writeJsonLdLines(whelk, batch, contextDoc, os);
writeJsonLdLines(whelk, targetVocabMapper, profile, profileDoc, batch, contextDoc, os);
res.flushBuffer();
}
} catch (Exception e) {
logger.info("Error sending dump download: {}", e.getMessage());
}
}

private static void writeJsonLdLines(Whelk whelk, Collection<String> ids, Document contextDoc, OutputStream os) throws IOException {
private static void writeJsonLdLines(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, Collection<String> ids, Document contextDoc, OutputStream os) throws IOException {
Map<String, Document> idsAndRecords = whelk.bulkLoad(ids);
for (Document doc : idsAndRecords.values()) {
if (doc.getDeleted()) {
continue;
}

writeJsonLdLine(wrapDoc(doc, contextDoc), os);
writeJsonLdLine(formatDoc(doc, contextDoc, targetVocabMapper, profile, profileDoc), os);
}
os.flush();
}
Expand All @@ -357,15 +377,23 @@ private static void writeJsonLdLine(Object object, OutputStream os) throws IOExc
os.write("\n".getBytes(StandardCharsets.UTF_8));
}

private static Object wrapDoc(Document doc, Document contextDoc) {
private static Object formatDoc(Document doc, Document contextDoc, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc) {
var context = new ArrayList<>();
context.add(null);
context.add(contextDoc.getRecordIdentifiers().getFirst());
return Map.of(

Document formattedDoc = doc; // Will be replaced if there's a profile
if (profile != null && profileDoc != null) {
formattedDoc = new Document((Map) targetVocabMapper.applyTargetVocabularyMap(profile, profileDoc.data, doc.data));
}

Map data = Map.of(
JsonLd.ID_KEY, doc.getRecordIdentifiers().getFirst(),
JsonLd.CONTEXT_KEY, context,
JsonLd.GRAPH_KEY, doc.data.get(JsonLd.GRAPH_KEY)
JsonLd.GRAPH_KEY, formattedDoc.data.get(JsonLd.GRAPH_KEY)
);

return data;
}

private static Object wrapContextDoc(Document contextDoc) {
Expand Down
5 changes: 4 additions & 1 deletion emm/src/main/java/whelk/EmmServlet.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@
public class EmmServlet extends HttpServlet {
private final Logger logger = LogManager.getLogger(this.getClass());
private final Whelk whelk;
private final TargetVocabMapper targetVocabMapper;

public static final String AS2_CONTENT_TYPE = "application/activity+json";

public EmmServlet() {
whelk = Whelk.createLoadedCoreWhelk();
Document contextDocument = whelk.getStorage().getDocumentByIri(whelk.getSystemContextUri());
targetVocabMapper = new TargetVocabMapper(whelk.getJsonld(), contextDocument.data);
}

public void init() {
Expand All @@ -31,7 +34,7 @@ public void doGet(HttpServletRequest req, HttpServletResponse res) {
String apiBaseUrl = req.getRequestURL().toString();

if (req.getServletPath() != null && req.getServletPath().endsWith("/full")) {
Dump.sendDumpResponse(whelk, apiBaseUrl, req, res);
Dump.sendDumpResponse(whelk, targetVocabMapper, apiBaseUrl, req, res);
return;
}
String until = req.getParameter("until");
Expand Down

0 comments on commit ccfbcfa

Please sign in to comment.