From 9ab768082822a21ae625c19917147b4c84707029 Mon Sep 17 00:00:00 2001
From: Tom Najdek <tom@doppnet.com>
Date: Mon, 24 Feb 2025 17:58:21 +0100
Subject: [PATCH] Rename attachment item after metadata retrieval

---
 src/js/actions/items-write.js |  2 +-
 src/js/actions/recognize.js   | 22 +++++++++++----
 src/js/common/format.js       | 52 +++++++++++++++++++++++------------
 src/js/reducers/recognize.js  |  6 ++--
 test/recognize.test.jsx       | 20 ++++++++++++++
 5 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/src/js/actions/items-write.js b/src/js/actions/items-write.js
index 5b822661..89305208 100644
--- a/src/js/actions/items-write.js
+++ b/src/js/actions/items-write.js
@@ -238,7 +238,7 @@ const createItem = (properties, libraryKey) => {
 				otherItems: state.libraries[libraryKey].items,
 				response
 			});
-			return response.getEntityByIndex(0);
+			return item;
 		} catch(error) {
 			dispatch({
 					type: ERROR_CREATE_ITEM,
diff --git a/src/js/actions/recognize.js b/src/js/actions/recognize.js
index 223a6bc2..771fde09 100644
--- a/src/js/actions/recognize.js
+++ b/src/js/actions/recognize.js
@@ -1,28 +1,35 @@
-import { createItem, deleteItem, getAttachmentUrl, updateItem } from '.';
+import { createItem, deleteItem, getAttachmentUrl, updateItem, renameAttachment } from '.';
 import { BEGIN_RECOGNIZE_DOCUMENT, COMPLETE_RECOGNIZE_DOCUMENT, ERROR_RECOGNIZE_DOCUMENT,
 	UPDATE_RECOGNIZE_DOCUMENT, BEGIN_UNRECOGNIZE_DOCUMENT, COMPLETE_UNRECOGNIZE_DOCUMENT,
 	ERROR_UNRECOGNIZE_DOCUMENT, } from '../constants/actions';
 import { PDFWorker } from '../common/pdf-worker.js';
 import { pick } from 'web-common/utils';
 import { getItemFromIdentifier } from '../common/identifiers';
+import { getFileBaseNameFromItem } from '../common/format';
 
 const retrieveMetadata = (itemKey, libraryKey, backgroundTaskId) => {
 	return async (dispatch, getState) => {
 		dispatch({ type: BEGIN_RECOGNIZE_DOCUMENT, itemKey, libraryKey, backgroundTaskId });
 		const state = getState();
 		const attachmentItem = state.libraries[state.current.libraryKey]?.items?.[itemKey];
+		const originalFilename = attachmentItem.filename;
+		const originalTitle = attachmentItem.title;
 		try {
 			const recognizerData = await dispatch(getRecognizerData(itemKey));
 			dispatch({ type: UPDATE_RECOGNIZE_DOCUMENT, itemKey, libraryKey, stage: 1 });
 			const recognizedItem = await dispatch(recognizePDF(recognizerData));
+
 			dispatch({ type: UPDATE_RECOGNIZE_DOCUMENT, itemKey, libraryKey, stage: 2 });
 			delete recognizedItem.key;
 			delete recognizedItem.version;
 			recognizedItem.collections = [...attachmentItem.collections];
 			const item = await dispatch(createItem(recognizedItem, libraryKey));
+
 			dispatch({ type: UPDATE_RECOGNIZE_DOCUMENT, itemKey, libraryKey, stage: 3 });
-			await dispatch(updateItem(itemKey, { parentItem: item.key, collections: [] }, libraryKey));
-			dispatch({ type: COMPLETE_RECOGNIZE_DOCUMENT, itemKey, libraryKey, parentItemKey: item.key });
+			await dispatch(updateItem(itemKey, { parentItem: item.key, title: 'PDF', collections: [] }, libraryKey));
+			const newFileName = `${getFileBaseNameFromItem(item, state.meta.mappings)}.pdf`;
+			await dispatch(renameAttachment(itemKey, newFileName, libraryKey));
+			dispatch({ type: COMPLETE_RECOGNIZE_DOCUMENT, itemKey, libraryKey, parentItemKey: item.key, originalFilename, originalTitle });
 		} catch (error) {
 			dispatch({
 				type: ERROR_RECOGNIZE_DOCUMENT,
@@ -164,6 +171,7 @@ const recognizePDF = (recognizerData) => {
 }
 
 const undoRetrieveMetadata = (itemKey, libraryKey) => {
+	// itemKey is the recognized item, that needs to be deleted; originalItemKey is the attachment item, that needs to be restored as top-level item
 	return async (dispatch, getState) => {
 		const state = getState();
 		dispatch({
@@ -172,17 +180,19 @@ const undoRetrieveMetadata = (itemKey, libraryKey) => {
 			libraryKey,
 		});
 		try {
-			const originalItemKey = state.recognize.lookup[`${libraryKey}-${itemKey}`];
-			if(!originalItemKey) {
+			const originalItemData = state.recognize.lookup[`${libraryKey}-${itemKey}`];
+			if(!originalItemData) {
 				throw new Error('Original item not found');
 			}
+			const { originalItemKey, originalFilename, originalTitle } = originalItemData;
 			const item = state.libraries[libraryKey].items?.[itemKey];
 			if(!item) {
 				throw new Error('Item not found');
 			}
 
 			const collections = item.collections;
-			await dispatch(updateItem(originalItemKey, { parentItem: false, collections }, libraryKey));
+			await dispatch(updateItem(originalItemKey, { parentItem: false, title: originalTitle, collections }, libraryKey));
+			await dispatch(renameAttachment(originalItemKey, originalFilename, libraryKey));
 			await dispatch(deleteItem(item));
 			dispatch({
 				type: COMPLETE_UNRECOGNIZE_DOCUMENT,
diff --git a/src/js/common/format.js b/src/js/common/format.js
index c65aba12..dbdd6bd4 100644
--- a/src/js/common/format.js
+++ b/src/js/common/format.js
@@ -1,4 +1,5 @@
 import { unescapeHTML } from '../utils';
+import { getItemTitle } from './item';
 
 const entityToChar = str => {
 	const textarea = document.createElement('textarea');
@@ -13,7 +14,7 @@ const noteSummary = note => {
 		note
 			.replace(/<\s*\/?br\s*[/]?>/gi, ' ') // replace <br /> to spaces
 			.replace(/<(?:.|\n)*?>/gm, '') // remove html tags. This is still going to be sanitized by React.
-		).replace(/[\u202F\u00A0]/g, ' ') // replace no-break spaces to normal spaces
+	).replace(/[\u202F\u00A0]/g, ' ') // replace no-break spaces to normal spaces
 		.replace(/ +/g, " ") // remove series of spaces with just one
 		.substring(0, 180); // truncate to 180 chars
 };
@@ -49,7 +50,7 @@ const dateLocalized = date => (date instanceof Date && !isNaN(date)) ?
 
 //@TODO: figure out better place for this
 const itemsSourceLabel = itemsSource => {
-	switch(itemsSource) {
+	switch (itemsSource) {
 		case 'trash':
 			return "Trash";
 		case 'publications':
@@ -69,14 +70,14 @@ const pluralize = (word, count) => count === 1 ? word : `${word}s`;
 
 //@NOTE: should only be used for trusted/sanitized input
 const stripTagsUsingDOM = html => {
-   const tmp = document.createElement("DIV");
-   tmp.innerHTML = html;
-   return tmp.textContent || tmp.innerText || "";
+	const tmp = document.createElement("DIV");
+	tmp.innerHTML = html;
+	return tmp.textContent || tmp.innerText || "";
 }
 
 const lpad = (string, pad, length) => {
 	string = string ? string + '' : '';
-	while(string.length < length) {
+	while (string.length < length) {
 		string = pad + string;
 	}
 	return string;
@@ -94,7 +95,7 @@ const formatDateTime = date =>
 
 const parseDescriptiveString = str => {
 	var lc = str.toLowerCase().trim();
-	switch(lc) {
+	switch (lc) {
 		case 'yesterday':
 			return formatDate(new Date(new Date().getTime() - 86400000));
 		case 'today':
@@ -199,16 +200,31 @@ const renderItemTitle = (title, targetNode) => {
 	return textContent;
 }
 
+// {{ firstCreator suffix=" - " }}{{ year suffix=" - " }}{{ title truncate="100" }}
+const getFileBaseNameFromItem = (item, mappings) => {
+	const title = getItemTitle(mappings, item);
+	const date = item[Symbol.for('meta')] && item[Symbol.for('meta')].parsedDate ?
+		item[Symbol.for('meta')].parsedDate :
+		'';
+	const creator = item[Symbol.for('meta')] && item[Symbol.for('meta')].creatorSummary ?
+		item[Symbol.for('meta')].creatorSummary :
+		'';
+	const year = date.substr(0, 4);
+
+	return `${creator}${creator ? ' - ' : ''}${year}${year ? ' - ' : ''}${title.substring(0, 100)}`;
+}
+
 export {
-	creator,
-	dateLocalized,
-	formatDate,
-	formatDateTime,
-	itemsSourceLabel,
-	noteSummary,
-	noteAsTitle,
-	parseDescriptiveString,
-	pluralize,
-	renderItemTitle,
-	stripTagsUsingDOM,
+    creator,
+    dateLocalized,
+    formatDate,
+    formatDateTime,
+	getFileBaseNameFromItem,
+    itemsSourceLabel,
+    noteAsTitle,
+    noteSummary,
+    parseDescriptiveString,
+    pluralize,
+    renderItemTitle,
+    stripTagsUsingDOM
 };
diff --git a/src/js/reducers/recognize.js b/src/js/reducers/recognize.js
index 6355c7fd..c4999c32 100644
--- a/src/js/reducers/recognize.js
+++ b/src/js/reducers/recognize.js
@@ -1,5 +1,5 @@
 import { getBaseMappedValue } from '../common/item';
-import { omit } from 'web-common/utils';
+import { omit, pick } from 'web-common/utils';
 import { BEGIN_RECOGNIZE_DOCUMENT, CLEAR_RECOGNIZE_DOCUMENT, CLEAR_RECOGNIZE_DOCUMENTS, COMPLETE_RECOGNIZE_DOCUMENT,
 	ERROR_RECOGNIZE_DOCUMENT, UPDATE_RECOGNIZE_DOCUMENT, COMPLETE_UNRECOGNIZE_DOCUMENT, ERROR_UNRECOGNIZE_DOCUMENT } from '../constants/actions';
 
@@ -21,7 +21,7 @@ const getDefaultState = () => ({
 	backgroundTaskId: null, // id of the background task for all recognition processes, can only be updated by BEGIN_RECOGNIZE_DOCUMENT action
 	progress: 0,
 	entries: [], // items being recognized: { itemKey, itemTitle, libraryKey, stage, error, completed },
-	lookup: {}, // items previously recognized: { libraryKey-itemKey: parentItemKey }
+	lookup: {}, // items previously recognized: { libraryKey-itemKey: { originalItemKey, originalTitle, originalFilename }
 });
 
 const recognize = (state = getDefaultState(), action, globalState) => {
@@ -75,7 +75,7 @@ const recognize = (state = getDefaultState(), action, globalState) => {
 				}),
 				lookup: {
 					...state.lookup,
-					[`${action.libraryKey}-${action.parentItemKey}`]: action.itemKey,
+					[`${action.libraryKey}-${action.parentItemKey}`]: { originalItemKey: action.itemKey, ...pick(action, ['originalTitle', 'originalFilename']) },
 				}
 			});
 		case ERROR_RECOGNIZE_DOCUMENT:
diff --git a/test/recognize.test.jsx b/test/recognize.test.jsx
index 971d7bfd..4ddc45ef 100644
--- a/test/recognize.test.jsx
+++ b/test/recognize.test.jsx
@@ -75,6 +75,7 @@ describe('Metadata Retrieval', () => {
 		let hasPatchedAttachmentItem = false;
 		let hasDeleted = false;
 		let patchCounter = 0;
+		let renameCounter = 0;
 		let version = state.libraries.u1.sync.version;
 
 		server.use(
@@ -112,9 +113,11 @@ describe('Metadata Retrieval', () => {
 				const item = await request.json();
 				if (patchCounter === 0) {
 					expect(item.parentItem).toBe('S8CIV6VJ');
+					expect(item.title).toBe('PDF');
 					expect(item.collections).toEqual([]);
 				} else {
 					expect(item.parentItem).toBe(false);
+					expect(item.title).toBe('attention-is-all-you-need.pdf');
 					expect(item.collections).toEqual(['CSB4KZUU']);
 				}
 				hasPatchedAttachmentItem = true;
@@ -123,6 +126,19 @@ describe('Metadata Retrieval', () => {
 				await delay(100);
 				return new HttpResponse(null, { status: 204, headers: { 'Last-Modified-Version': version } });
 			}),
+			// rename attachment file
+			http.post('https://api.zotero.org/users/1/items/UMPPCXU4/file', async ({ request }) => {
+				const bodyParams = (await request.text()).split('&');
+				if(renameCounter === 0) {
+					expect(bodyParams).toContain('filename=Vaswani et al. - 2023 - Attention Is All You Need.pdf');
+					expect(bodyParams).toContain('md5=18e1b007a1dab45b30cc861ba2dfda25');
+				} else {
+					expect(bodyParams).toContain('filename=attention-is-all-you-need.pdf');
+					expect(bodyParams).toContain('md5=18e1b007a1dab45b30cc861ba2dfda25');
+				}
+				renameCounter++;
+				return HttpResponse.json({ 'exists': 1 });
+			}),
 			http.delete('https://api.zotero.org/users/1/items/S8CIV6VJ', async () => {
 				hasDeleted = true;
 				version++;
@@ -187,6 +203,8 @@ describe('Metadata Retrieval', () => {
 		expect(screen.queryByRole('row', { name: 'attention-is-all-you-need.pdf' })).not.toBeInTheDocument();
 
 		await user.click(screen.getByRole('row', { name: 'Attention Is All You Need' }));
+		expect(patchCounter).toBe(1);
+		expect(renameCounter).toBe(1);
 
 		// unregonize
 		const toolbar = screen.getByRole('toolbar', { name: 'items toolbar' });
@@ -200,6 +218,8 @@ describe('Metadata Retrieval', () => {
 		await waitFor(() => {
 			expect(screen.queryByRole('row', { name: 'Attention Is All You Need' })).not.toBeInTheDocument();
 		});
+		expect(patchCounter).toBe(2);
+		expect(renameCounter).toBe(2);
 		expect(hasDeleted).toBe(true);
 	});
 });