Skip to content

Commit

Permalink
fixed parsing issue
Browse files Browse the repository at this point in the history
  • Loading branch information
ppaska committed Jun 14, 2020
1 parent b2c2aa9 commit 17c8083
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 18 deletions.
8 changes: 7 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
{
"typescript.tsdk": "node_modules\\typescript\\lib"
"typescript.tsdk": "node_modules\\typescript\\lib",
"files.exclude": {
"array": true,
"docs": true,
"string": true,
"utils": true
}
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "datapipe-js",
"version": "0.2.14",
"version": "0.2.16",
"description": "dataPipe is a JavaScript library for data manipulations, data transformations and data wrangling library inspired by LINQ (C#) and Pandas (Python)",
"main": "dist/data-pipe.min.js",
"module": "dist/data-pipe.esm.js",
Expand Down
9 changes: 9 additions & 0 deletions src/tests/dsv-parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ describe('Dsv Parser specification', () => {
expect(result[0].F2).toBe(2);
})

it('simple numders and zeros', () => {
const csv = ["F1,F2,F3", `0,2,0`].join('\n')
const result = parseCsv(csv);
expect(result.length).toBe(1);
expect(result[0].F1).toBe(0);
expect(result[0].F2).toBe(2);
expect(result[0].F3).toBe(0);
})

it('Empty should be null', () => {
const csv = ["F1,F2,F3", `1,,"Test, comma"`].join('\n')
const result = parseCsv(csv);
Expand Down
25 changes: 15 additions & 10 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,6 @@ export type ScalarObject = Record<string, ScalarType>;
*/
export type PrimitivesObject = Record<string, PrimitiveType>;

/**
* A simple data table structure what provides a most efficient way
* to send data across the wire
*/
export interface TableDto {
fieldDataTypes?: DataTypeName[];
fieldNames: string[];
rows: PrimitiveType[][];
}

/**
* Commonly used and recognized types
*/
Expand All @@ -54,6 +44,21 @@ export enum DataTypeName {
Boolean = 'Boolean'
}

export interface Table<T> {
fieldDataTypes?: DataTypeName[];
fieldNames: string[];
rows: T[][];
}


/**
* A simple data table structure what provides a most efficient way
* to send data across the wire
*/
export type TableDto = Table<PrimitiveType>;

export type ScallarTable = Table<ScalarType>;

export interface FieldDescription {
fieldName: string;
isNullable: boolean;
Expand Down
106 changes: 102 additions & 4 deletions src/utils/dsv-parser.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { parseNumberOrNull, parseDatetimeOrNull } from "./utils";
import { ParsingOptions, ScalarType, ScalarObject } from "../types";
import { ParsingOptions, ScalarType, ScalarObject, TableDto, ScallarTable } from "../types";
import { toTable } from "./table";

type ParsingContext = {
content: string;
Expand All @@ -23,7 +24,7 @@ function getObjectElement(fieldNames: string[], tokens: string[], options: Parsi
value = !!value;
} else {
const num = parseNumberOrNull(value as string);
value = num || value;
value = (num === null || num === undefined) ? value : num;
}
}
obj[fieldName] = value === EmptySymbol ? '' : value;
Expand Down Expand Up @@ -118,7 +119,19 @@ function getLineTokens(content: string, options: ParsingOptions): ScalarObject[]

if (!fieldNames) {
// fieldName is used as indicator on whether data rows handling started
fieldNames = tokens.map(t => t.trim()); // field names can't have spaces
fieldNames = [];

for (let i = 0; i < tokens.length; i++) {
// if empty then _
const token = tokens[i].trim().length ? tokens[i].trim() : '_';
if (fieldNames.indexOf(token) >= 0) {
// need to make sure fieldNames are unique
fieldNames.push(token + i)
} else {
fieldNames.push(token)
}
}

lineNumber++;
continue;
}
Expand All @@ -142,6 +155,80 @@ function getLineTokens(content: string, options: ParsingOptions): ScalarObject[]
return result;
}

function parseLineTokens(content: string, options: ParsingOptions): ScallarTable {
const ctx = {
content: content,
currentIndex: 0
} as ParsingContext;
content = content || '';
const delimiter = options.delimiter || ',';

const result = {} as ScallarTable;
let lineNumber = 0;
let fieldNames: string[] | null = null;
let isEmpty = true;

do {
const tokens = nextLineTokens(ctx, delimiter);

isEmpty = tokens.filter(f => !f || !f.length).length === tokens.length;

if (isEmpty) {
lineNumber++;
continue;
}

// skip rows based skipRows value
if (lineNumber < options.skipRows) {
lineNumber++;
continue;
}

// skip rows based on skipUntil call back
if (!fieldNames && typeof options.skipUntil === "function" && !options.skipUntil(tokens)) {
lineNumber++;
continue;
}

if (!fieldNames) {
// fieldName is used as indicator on whether data rows handling started
fieldNames = [];

for (let i = 0; i < tokens.length; i++) {
// if empty then _
const token = tokens[i].trim().length ? tokens[i].trim() : '_';
if (fieldNames.indexOf(token) >= 0) {
// need to make sure fieldNames are unique
fieldNames.push(token + i)
} else {
fieldNames.push(token)
}
}

result.fieldNames = fieldNames;

lineNumber++;
continue;
}

if (typeof options.takeWhile === "function" && fieldNames && !options.takeWhile(tokens)) {
break;
}

const row = getObjectElement(fieldNames, tokens, options)

if (row) {
// no need for null or empty objects
result.rows.push(tokens);
}
lineNumber++;
}
while (++ctx.currentIndex < ctx.content.length)

return result;
}


export function parseCsv(content: string, options?: ParsingOptions): ScalarObject[] {
content = content || '';

Expand All @@ -152,6 +239,18 @@ export function parseCsv(content: string, options?: ParsingOptions): ScalarObjec
return getLineTokens(content, options || new ParsingOptions());
}

export function parseCsvToTable(content: string, options?: ParsingOptions): ScallarTable {
content = content || '';

if (!content.length) {
return {} as TableDto;
}

const items = getLineTokens(content, options || new ParsingOptions());
const table = toTable(items)
return table;
}

export function toCsv(array: ScalarObject[], delimiter = ','): string {
array = array || [];

Expand Down Expand Up @@ -184,4 +283,3 @@ export function toCsv(array: ScalarObject[], delimiter = ','): string {

return lines.join('\n')
}

6 changes: 5 additions & 1 deletion src/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,11 @@ export function getFieldDescriptions(items: Record<string, ScalarType>[]): Field
if (value === null || value === undefined) {
fDesc.isNullable = true
} else {
fDesc.dataTypeName = workoutDataType(value, fDesc.dataTypeName)
const newType = workoutDataType(value, fDesc.dataTypeName);
if(newType !== fDesc.dataTypeName) {
fDesc.dataTypeName = newType;
}

if ((fDesc.dataTypeName == DataTypeName.String || fDesc.dataTypeName == DataTypeName.LargeString) && String(value).length > (fDesc.maxSize || 0)) {
fDesc.maxSize = String(value).length;
}
Expand Down
4 changes: 4 additions & 0 deletions typedoc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"name": "datapipe js doc",
"inputFiles": ["./src/data-pipe.ts"]
}
1 change: 0 additions & 1 deletion types/array

This file was deleted.

0 comments on commit 17c8083

Please sign in to comment.