Skip to content

Commit

Permalink
Merge pull request #460 from cidgoh/csv-save-fix
Browse files Browse the repository at this point in the history
Csv save fix
  • Loading branch information
ddooley authored Feb 4, 2025
2 parents 1dd4766 + 3fda8d8 commit 41f24b7
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 91 deletions.
3 changes: 2 additions & 1 deletion lib/Toolbar.js
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,8 @@ class Toolbar {

updateTemplateOptions() {
this.$selectTemplate.empty();
for (const [schema_name, schema_obj] of Object.entries(this.menu)) {
//for (const [schema_name, schema_obj] of Object.entries(this.menu)) {
for (const schema_obj of Object.values(this.menu)) {
const templates = schema_obj['templates'];
for (const [template_name, template_obj] of Object.entries(templates)) {
let path = schema_obj.folder + '/' + template_name;
Expand Down
196 changes: 108 additions & 88 deletions lib/utils/files.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { utils as XlsxUtils, writeFile } from 'xlsx/xlsx.js';
import { utils as XlsxUtils, writeFile} from 'xlsx/xlsx.js';
import { saveAs } from 'file-saver';

/**
Expand Down Expand Up @@ -120,6 +120,7 @@ export function createWorkbookFromJSON(jsonData) {
return workbook;
}

/*
function writeWorkbook(workbook, baseName, ext, opt = {}) {
if (ext === 'xlsx' || ext === 'xls') {
// can support multiple sheets in one file
Expand All @@ -144,122 +145,141 @@ function writeWorkbook(workbook, baseName, ext, opt = {}) {
});
}
}
*/

/**
* Download matrix to file.
* Note that BOM and UTF-8 can create problems on some systems when importing
* file. See "Supported Output Formats" and "UTF-16 Unicode Text" sections of
* https://reactian.com/sheetjs-community-edition-spreadsheet-data-toolkit/
* and https://github.com/SheetJS/sheetjs
* Solution at bottom of: https://github.com/SheetJS/sheetjs/issues/943
* The "Comma Separated Values" format is actually UTF-8 with BOM prefix.
* A “U+” designation refers to a character, and in this case that would be
* U+FEFF; the start of the file is the three-byte sequence EF BB BF which is
* how U+FEFF is represented in UTF-8.
* @param {Workbook} workbook workboox to download.
* @param {String} baseName Basename of downloaded file.
* @param {String} ext Extension of downloaded file.
*/
export function exportWorkbook(workbook, baseName, ext) {
switch (ext) {
case 'xlsx':
case 'xls':
writeWorkbook(workbook, baseName, ext);
break;
case 'csv':
processAndSave(
workbook,
baseName,
'csv',
',',
'text/plain;charset=UTF-8'
);
break;

case 'csv (UTF-16)':
processAndSave(
workbook,
baseName,
'csv',
',',
'text/plain;charset=UTF-16LE'
);
break;

case 'tsv':
processAndSave(
workbook,
baseName,
'tsv',
'\t',
'text/plain;charset=UTF-8'
);
break;

case 'tsv (UTF-16)':
processAndSave(
workbook,
baseName,
'tsv',
'\t',
'text/plain;charset=UTF-16LE'
);
break;

case 'csv (UTF-8, no BOM)':
processAndSave(
workbook,
baseName,
'csv',
',',
'text/plain;charset=UTF-8',
false
);
break;

case 'csv (ASCII)':
processAndSave(
workbook,
baseName,
'csv',
',',
'text/plain;charset=us-ascii',
false
);
break;
}
}

function processAndSave(
workbook,
baseName,
ext,
delimiter,
mimeType,
includeBOM = true
) {
// Often just one sheet, but if multiple, then each gets file name + _ + template (class) name
const sheets = workbook.SheetNames;
sheets.forEach((sheetName) => {
let data = '';

const worksheet = workbook.Sheets[sheetName];
const fileName = `${baseName}${sheets.length > 1 ? `_${sheetName}` : ''}.${ext.split(' ')[0]}`;
var data = '';
switch (ext) {
case 'xlsx':
case 'xls':
// Note, mimeType always set to application/zip in these cases.
writeFile(workbook, `${baseName}.${ext}`); //, opt
break;

/* Notes:
See
- https://docs.sheetjs.com/docs/api/write-options/
- https://docs.sheetjs.com/docs/api/utilities/csv#csv-output
saveBlob() enables more accurate mimeTypes?
* writeFile(bookType: 'csv'...) output includes the UTF-8 byte order
* mark ("BOM").
* sheet_to_csv() will return JavaScript strings without the UTF-8 BOM.
*/

/* Phasing this out. UTF-8 doesn't need a BOM
case 'csv': // UTF-8
// writeFile(workbook, fileName, {bookType: 'csv', FS: ','});
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
data = '\uFEFF' + data; //BOM
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
break;
*/

/* This case won't work until we convert data to UTF-16
case 'csv (UTF-16)':
//writeFile(workbook, fileName, {bookType: 'txt', FS: ','});
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
data = '\uFEFF' + data; //BOM
saveBlob(data, fileName, 'text/plain;charset=UTF-16LE');
break;
*/

case 'csv':
case 'csv (UTF-8, no BOM)':
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
break;

/* This case won't work until we convert data to ASCII
case 'csv (ASCII)': // no BOM
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
saveBlob(data, fileName, 'text/plain;charset=us-ascii');
break;
*/

/*
* https://stackoverflow.com/questions/8336355/what-exactly-is-unicode-codepage-1200
* sheet_to_txt(): sheetjs notes: "If encoding support is available, the
* output will be encoded in CP1200 and the UTF-16 BOM will be added. If
* encoding support is not available, the output will be encoded as a
* standard string." In DH tests it seems "encoding support" is not
* available, and resulting file is UTF-8 +BOM anyways.
*/
case 'tsv': // UTF-8 BOM version
// SheetJS note: For compatibility with Excel, writeFile() csv output
// will always include the UTF-8 byte order mark ("BOM").
//writeFile(workbook, fileName, {bookType: 'csv', FS: '\t'});
data = XlsxUtils.sheet_to_csv(worksheet, {FS: '\t'});
//data = '\uFEFF' + data; //BOM
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
break;

/* Not working, produces hexidecimal file - is charset="UTF-16LE" recognized?
* See Table 2-4: unicode.org/versions/Unicode6.0.0/ch02.pdf"
* UTF-16 little endian, aka code page 1200, is not permitted to have a BOM,
* according to the Unicode standard.
* DATA NEEDS TO BE CONVERTED TO UTF-16
*
case 'tsv (UTF-16)': // no BOM
// See: https://localizely.com/character-encodings/utf16le/
//writeFile(workbook, fileName, {bookType: 'tsv', FS: '\t'});
data = XlsxUtils.sheet_to_txt(worksheet, {FS: '\t'});
saveBlob(data, fileName, 'text/plain;charset=UTF-16LE');
break;
*/
}
})
};

// Saves workbook which may have multiple sheets into one or more files.
// ext: csv, csv (UTF-16), tsv, tsv (UTF-16)
// ext no BOM: csv (UTF-8, no BOM), csv (ASCII)
// This script can enhance file type with mimeType - but is that something sheetJS can't do?
function saveBlob(
data,
fileName,
mimeType
) {

/*
const sheetData = XlsxUtils.sheet_to_json(worksheet, { header: 1 });
const formattedData = sheetData
.map((row) => row.join(delimiter))
.join('\n');
data += formattedData + '\n';
// Insert BOM character.
if (includeBOM && mimeType.includes('UTF-8')) {
data = '\uFEFF' + data;
}
*/

// Enhancing with mimeType
const blob = new Blob([data], { type: mimeType });
saveAs(
blob,
`${baseName}${sheets.length > 1 ? `_${sheetName}` : ''}.${
ext.split(' ')[0]
}`
);
});
}
saveAs(blob, fileName);
};

// TODO: refactor to export matrix
export function exportFile(matrix, baseName, ext) {
Expand Down
4 changes: 2 additions & 2 deletions lib/utils/templates.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ const schemaFromChildPath = (childPath) =>

// Returns default template built from schema
async function compileSchema(schema_folder) { // e.g. canada_covid19

for (const [schema_name, schema_obj] of Object.entries(menu)) {
//for (const [schema_name, schema_obj] of Object.entries(menu)) {
for (const schema_obj of Object.values(menu)) {
if (schema_obj.folder === schema_folder) {
var schema = await fetchSchema(`/templates/${schema_folder}/schema.json`);
const template = {
Expand Down

0 comments on commit 41f24b7

Please sign in to comment.