diff --git a/.vscode/settings.json b/.vscode/settings.json index 00ad71f..ead151b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,9 @@ { - "typescript.tsdk": "node_modules\\typescript\\lib" + "typescript.tsdk": "node_modules\\typescript\\lib", + "files.exclude": { + "array": true, + "docs": true, + "string": true, + "utils": true + } } \ No newline at end of file diff --git a/package.json b/package.json index a7d86ee..c42c1f2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "datapipe-js", - "version": "0.2.14", + "version": "0.2.16", "description": "dataPipe is a JavaScript library for data manipulations, data transformations and data wrangling library inspired by LINQ (C#) and Pandas (Python)", "main": "dist/data-pipe.min.js", "module": "dist/data-pipe.esm.js", diff --git a/src/tests/dsv-parser.spec.ts b/src/tests/dsv-parser.spec.ts index c09b029..9eb5316 100644 --- a/src/tests/dsv-parser.spec.ts +++ b/src/tests/dsv-parser.spec.ts @@ -108,6 +108,15 @@ describe('Dsv Parser specification', () => { expect(result[0].F2).toBe(2); }) + it('simple numders and zeros', () => { + const csv = ["F1,F2,F3", `0,2,0`].join('\n') + const result = parseCsv(csv); + expect(result.length).toBe(1); + expect(result[0].F1).toBe(0); + expect(result[0].F2).toBe(2); + expect(result[0].F3).toBe(0); + }) + it('Empty should be null', () => { const csv = ["F1,F2,F3", `1,,"Test, comma"`].join('\n') const result = parseCsv(csv); diff --git a/src/types.ts b/src/types.ts index 2c871b0..b7b9f74 100644 --- a/src/types.ts +++ b/src/types.ts @@ -31,16 +31,6 @@ export type ScalarObject = Record; */ export type PrimitivesObject = Record; -/** - * A simple data table structure what provides a most efficient way - * to send data across the wire - */ -export interface TableDto { - fieldDataTypes?: DataTypeName[]; - fieldNames: string[]; - rows: PrimitiveType[][]; -} - /** * Commonly used and recognized types */ @@ -54,6 +44,21 @@ export enum DataTypeName { Boolean = 'Boolean' } +export interface Table { + fieldDataTypes?: DataTypeName[]; + fieldNames: string[]; + rows: T[][]; +} + + +/** + * A simple data table structure what provides a most efficient way + * to send data across the wire + */ +export type TableDto = Table; + +export type ScallarTable = Table; + export interface FieldDescription { fieldName: string; isNullable: boolean; diff --git a/src/utils/dsv-parser.ts b/src/utils/dsv-parser.ts index 6b914d5..fb7ba7a 100644 --- a/src/utils/dsv-parser.ts +++ b/src/utils/dsv-parser.ts @@ -1,5 +1,6 @@ import { parseNumberOrNull, parseDatetimeOrNull } from "./utils"; -import { ParsingOptions, ScalarType, ScalarObject } from "../types"; +import { ParsingOptions, ScalarType, ScalarObject, TableDto, ScallarTable } from "../types"; +import { toTable } from "./table"; type ParsingContext = { content: string; @@ -23,7 +24,7 @@ function getObjectElement(fieldNames: string[], tokens: string[], options: Parsi value = !!value; } else { const num = parseNumberOrNull(value as string); - value = num || value; + value = (num === null || num === undefined) ? value : num; } } obj[fieldName] = value === EmptySymbol ? '' : value; @@ -118,7 +119,19 @@ function getLineTokens(content: string, options: ParsingOptions): ScalarObject[] if (!fieldNames) { // fieldName is used as indicator on whether data rows handling started - fieldNames = tokens.map(t => t.trim()); // field names can't have spaces + fieldNames = []; + + for (let i = 0; i < tokens.length; i++) { + // if empty then _ + const token = tokens[i].trim().length ? tokens[i].trim() : '_'; + if (fieldNames.indexOf(token) >= 0) { + // need to make sure fieldNames are unique + fieldNames.push(token + i) + } else { + fieldNames.push(token) + } + } + lineNumber++; continue; } @@ -142,6 +155,80 @@ function getLineTokens(content: string, options: ParsingOptions): ScalarObject[] return result; } +function parseLineTokens(content: string, options: ParsingOptions): ScallarTable { + const ctx = { + content: content, + currentIndex: 0 + } as ParsingContext; + content = content || ''; + const delimiter = options.delimiter || ','; + + const result = {} as ScallarTable; + let lineNumber = 0; + let fieldNames: string[] | null = null; + let isEmpty = true; + + do { + const tokens = nextLineTokens(ctx, delimiter); + + isEmpty = tokens.filter(f => !f || !f.length).length === tokens.length; + + if (isEmpty) { + lineNumber++; + continue; + } + + // skip rows based skipRows value + if (lineNumber < options.skipRows) { + lineNumber++; + continue; + } + + // skip rows based on skipUntil call back + if (!fieldNames && typeof options.skipUntil === "function" && !options.skipUntil(tokens)) { + lineNumber++; + continue; + } + + if (!fieldNames) { + // fieldName is used as indicator on whether data rows handling started + fieldNames = []; + + for (let i = 0; i < tokens.length; i++) { + // if empty then _ + const token = tokens[i].trim().length ? tokens[i].trim() : '_'; + if (fieldNames.indexOf(token) >= 0) { + // need to make sure fieldNames are unique + fieldNames.push(token + i) + } else { + fieldNames.push(token) + } + } + + result.fieldNames = fieldNames; + + lineNumber++; + continue; + } + + if (typeof options.takeWhile === "function" && fieldNames && !options.takeWhile(tokens)) { + break; + } + + const row = getObjectElement(fieldNames, tokens, options) + + if (row) { + // no need for null or empty objects + result.rows.push(tokens); + } + lineNumber++; + } + while (++ctx.currentIndex < ctx.content.length) + + return result; +} + + export function parseCsv(content: string, options?: ParsingOptions): ScalarObject[] { content = content || ''; @@ -152,6 +239,18 @@ export function parseCsv(content: string, options?: ParsingOptions): ScalarObjec return getLineTokens(content, options || new ParsingOptions()); } +export function parseCsvToTable(content: string, options?: ParsingOptions): ScallarTable { + content = content || ''; + + if (!content.length) { + return {} as TableDto; + } + + const items = getLineTokens(content, options || new ParsingOptions()); + const table = toTable(items) + return table; +} + export function toCsv(array: ScalarObject[], delimiter = ','): string { array = array || []; @@ -184,4 +283,3 @@ export function toCsv(array: ScalarObject[], delimiter = ','): string { return lines.join('\n') } - diff --git a/src/utils/utils.ts b/src/utils/utils.ts index 9ed733a..19a75e2 100644 --- a/src/utils/utils.ts +++ b/src/utils/utils.ts @@ -269,7 +269,11 @@ export function getFieldDescriptions(items: Record[]): Field if (value === null || value === undefined) { fDesc.isNullable = true } else { - fDesc.dataTypeName = workoutDataType(value, fDesc.dataTypeName) + const newType = workoutDataType(value, fDesc.dataTypeName); + if(newType !== fDesc.dataTypeName) { + fDesc.dataTypeName = newType; + } + if ((fDesc.dataTypeName == DataTypeName.String || fDesc.dataTypeName == DataTypeName.LargeString) && String(value).length > (fDesc.maxSize || 0)) { fDesc.maxSize = String(value).length; } diff --git a/typedoc.json b/typedoc.json new file mode 100644 index 0000000..3f17957 --- /dev/null +++ b/typedoc.json @@ -0,0 +1,4 @@ +{ + "name": "datapipe js doc", + "inputFiles": ["./src/data-pipe.ts"] + } diff --git a/types/array b/types/array deleted file mode 100644 index c043056..0000000 --- a/types/array +++ /dev/null @@ -1 +0,0 @@ -export * from './array'