diff --git a/.eslintrc b/.eslintrc index 11643ef..63611ad 100644 --- a/.eslintrc +++ b/.eslintrc @@ -8,5 +8,8 @@ "eslint:recommended", "plugin:@typescript-eslint/eslint-recommended", "plugin:@typescript-eslint/recommended" - ] -} + ], + "rules": { + "@typescript-eslint/no-explicit-any": "off" + } +} \ No newline at end of file diff --git a/package.json b/package.json index 4147237..168acb7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "datapipe-js", - "version": "0.2.8", + "version": "0.2.9", "description": "dataPipe is a JavaScript library for data manipulations, data transformations and data wrangling library inspired by LINQ (C#) and Pandas (Python)", "main": "dist/data-pipe.min.js", "module": "dist/data-pipe.esm.js", @@ -21,7 +21,7 @@ "docs:md": "npx typedoc src --out md-docs --plugin typedoc-plugin-markdown", "deploy": "npm run docs && npx gh-pages -d docs", "dev": "npx rollup --config rollup.config.dev.js --watch", - "lint": "eslint . --ext .ts", + "lint": "npx eslint . --ext .ts", "lint-fix": "eslint . --ext .ts --fix" }, "repository": { diff --git a/src/_internals.ts b/src/_internals.ts index 2244ee8..c2dc66f 100644 --- a/src/_internals.ts +++ b/src/_internals.ts @@ -4,9 +4,9 @@ export function fieldSelector(input: string | string[] | Selector): if (typeof input === "function") { return input; } else if (typeof input === "string") { - return (item) => item[input]; + return (item): any => item[input]; } else if (Array.isArray(input)) { - return (item) => input.map(r => item[r]).join('|'); + return (item): any => input.map(r => item[r]).join('|'); } else { throw Error(`Unknown input. Can't create a fieldSelector`) } diff --git a/src/array/joins.ts b/src/array/joins.ts index c60f6ba..8175ebc 100644 --- a/src/array/joins.ts +++ b/src/array/joins.ts @@ -1,6 +1,69 @@ import { Selector } from "../types"; import { fieldSelector } from "../_internals"; +function verifyJoinArgs( + leftArray: any[], + rightArray: any[], + leftKeySelector: (item: any) => string, + rightKeySelector: (item: any) => string, + resultSelector: (leftItem: any, rightItem: any) => any +): void { + if (!leftArray || !Array.isArray(leftArray)) { + throw Error('leftArray is not provided or not a valid') + } + if (!rightArray || !Array.isArray(rightArray)) { + throw Error('rightArray is not provided or not a valid') + } + + if (typeof leftKeySelector !== 'function') { + throw Error('leftKeySelector is not provided or not a valid function') + } + + if (typeof rightKeySelector !== 'function') { + throw Error('rightKeySelector is not provided or not a valid function') + } + + if (typeof resultSelector !== 'function') { + throw Error('resultSelector is not provided or not a valid function') + } +} + +function leftOrInnerJoin( + isInnerJoin: boolean, + leftArray: any[], + rightArray: any[], + leftKey: string | string[] | Selector, + rightKey: string | string[] | Selector, + resultSelector: (leftItem: any, rightItem: any) => any +): any[] { + const leftKeySelector = fieldSelector(leftKey); + const rightKeySelector = fieldSelector(rightKey); + + verifyJoinArgs(leftArray, rightArray, leftKeySelector, rightKeySelector, resultSelector); + + // build a lookup map + const rightArrayMap = Object.create(null); + for (const item of rightArray) { + rightArrayMap[rightKeySelector(item)] = item; + } + + const result: any[] = []; + for (const leftItem of leftArray) { + const leftKey = leftKeySelector(leftItem); + const rightItem = rightArrayMap[leftKey] || null; + + if (isInnerJoin && !rightItem) { continue; } + + const resultItem = resultSelector(leftItem, rightItem); + + // if result is null then probably a left item was modified + result.push(resultItem || leftItem); + } + + return result; +} + + /** * leftJoin returns all elements from the left array (leftArray), and the matched elements from the right array (rightArray). * The result is NULL from the right side, if there is no match. @@ -114,7 +177,7 @@ export function merge( const targetKeySelector = fieldSelector(targetKey); const sourceKeySelector = fieldSelector(sourceKey); - verifyJoinArgs(targetArray, sourceArray, targetKeySelector, sourceKeySelector, () => { }); + verifyJoinArgs(targetArray, sourceArray, targetKeySelector, sourceKeySelector, () => false); // build a lookup maps for both arrays. // so, both of them have to be unique, otherwise it will flattern result @@ -140,66 +203,3 @@ export function merge( return targetArray; } - - -function verifyJoinArgs( - leftArray: any[], - rightArray: any[], - leftKeySelector: (item: any) => string, - rightKeySelector: (item: any) => string, - resultSelector: (leftItem: any, rightItem: any) => any -): void { - if (!leftArray || !Array.isArray(leftArray)) { - throw Error('leftArray is not provided or not a valid') - } - if (!rightArray || !Array.isArray(rightArray)) { - throw Error('rightArray is not provided or not a valid') - } - - if (typeof leftKeySelector !== 'function') { - throw Error('leftKeySelector is not provided or not a valid function') - } - - if (typeof rightKeySelector !== 'function') { - throw Error('rightKeySelector is not provided or not a valid function') - } - - if (typeof resultSelector !== 'function') { - throw Error('resultSelector is not provided or not a valid function') - } -} - -function leftOrInnerJoin( - isInnerJoin: boolean, - leftArray: any[], - rightArray: any[], - leftKey: string | string[] | Selector, - rightKey: string | string[] | Selector, - resultSelector: (leftItem: any, rightItem: any) => any -): any[] { - const leftKeySelector = fieldSelector(leftKey); - const rightKeySelector = fieldSelector(rightKey); - - verifyJoinArgs(leftArray, rightArray, leftKeySelector, rightKeySelector, resultSelector); - - // build a lookup map - const rightArrayMap = Object.create(null); - for (const item of rightArray) { - rightArrayMap[rightKeySelector(item)] = item; - } - - const result: any[] = []; - for (const leftItem of leftArray) { - const leftKey = leftKeySelector(leftItem); - const rightItem = rightArrayMap[leftKey] || null; - - if (isInnerJoin && !rightItem) { continue; } - - const resultItem = resultSelector(leftItem, rightItem); - - // if result is null then probably a left item was modified - result.push(resultItem || leftItem); - } - - return result; -} diff --git a/src/array/stats.ts b/src/array/stats.ts index 5a1c12b..e014550 100644 --- a/src/array/stats.ts +++ b/src/array/stats.ts @@ -2,6 +2,35 @@ import { Selector, Predicate } from "../types"; import { parseNumber } from "../utils"; import { isArrayEmptyOrNull } from "./utils"; +function fieldSelector(field?: string | Selector): Selector { + if (!field) { + return (item: any): any => item; + } + return typeof field === 'function' ? field as Selector : (item: any): any => item[String(field)]; +} + +function fieldComparator(field?: string | Selector): (a: any, b: any) => number { + return (a: any, b: any): number => { + const aVal = parseNumber(a, fieldSelector(field)); + const bVal = parseNumber(b, fieldSelector(field)); + + if (bVal === undefined) { + return 1; + } + + if (aVal === undefined) { + return -1; + } + + return aVal - bVal >= 0 ? 1 : -1; + } +} + +function getNumberValuesArray(array: any[], field?: string | Selector): number[] { + const elementSelector = fieldSelector(field); + return array.map(item => parseNumber(item, elementSelector)).filter(v => v !== undefined) as number[]; +} + /** * Sum of items in array. * @param array The array to process. @@ -246,32 +275,3 @@ export function median(array: any[], field?: Selector | string): number | null { array.sort(fieldComparator(field)); return quantile(getNumberValuesArray(array, field), 0.5); } - -function fieldComparator(field?: string | Selector): (a: any, b: any) => number { - return (a: any, b: any) => { - const aVal = parseNumber(a, fieldSelector(field)); - const bVal = parseNumber(b, fieldSelector(field)); - - if (bVal === undefined) { - return 1; - } - - if (aVal === undefined) { - return -1; - } - - return aVal - bVal >= 0 ? 1 : -1; - } -} - -function fieldSelector(field?: string | Selector): Selector { - if (!field) { - return (item: any) => item; - } - return typeof field === 'function' ? field as Selector : (item: any) => item[String(field)]; -} - -function getNumberValuesArray(array: any[], field?: string | Selector): number[] { - const elementSelector = fieldSelector(field); - return array.map(item => parseNumber(item, elementSelector)).filter(v => v !== undefined) as number[]; -} diff --git a/src/array/transform.ts b/src/array/transform.ts index 345c75e..4fda786 100644 --- a/src/array/transform.ts +++ b/src/array/transform.ts @@ -70,7 +70,7 @@ export function pivot(array: any, rowFields: string | string[],columnField: stri const groups: { [key: string]: any[] } = Object.create(null); columnValues = columnValues || []; - aggFunction = aggFunction || ((a: any[]) => sum(a)); + aggFunction = aggFunction || ((a: any[]): number | null => sum(a)); const elementSelector = fieldSelector(rowFields); diff --git a/src/array/utils.ts b/src/array/utils.ts index c45e8e9..007de62 100644 --- a/src/array/utils.ts +++ b/src/array/utils.ts @@ -1,40 +1,35 @@ import { parseNumber, parseDatetimeOrNull } from "../utils"; -/** - * Checks if array is empty or null or array at all - * @param array - */ -export function isArrayEmptyOrNull(array: any[]): boolean { - return !array || !Array.isArray(array) || !array.length; +function compareStrings(a: string, b: any): number { + return a.localeCompare(b); } -/** - * Sorts array. - * @param array The array to process. - * @param fields sorts order. - * @public - * @example - * sort(array, 'name ASC', 'age DESC'); - */ -export function sort(array: any[], ...fields: string[]) { +function compareNumbers(a: number, b: any): number { + const bNumVal = parseNumber(b); + if (bNumVal === undefined) { + return 1; + } - if (!array || !Array.isArray(array)) { throw Error('Array is not provided'); } + return a - bNumVal; +} - if(!fields?.length) { - // just a default sort - return array.sort(); +function compareObjects(a: any, b: any): number { + const aDate = parseDatetimeOrNull(a); + const bDate = parseDatetimeOrNull(b); + + if (!aDate && !bDate) { + return 0; } - const sortFields = fields.map(field => { - const asc = !field.endsWith(' DESC'); - return { - asc, - field: field.replace(asc ? /\sASC$/ : /\sDESC$/, '') - }; - }); + if (!aDate) { + return -1; + } - array.sort(comparator(sortFields)); - return array; + if (!bDate) { + return 1; + } + + return aDate.getTime() - bDate.getTime(); } function compare(a: any, b: any, { field, asc }: any): number { @@ -55,9 +50,9 @@ function compare(a: any, b: any, { field, asc }: any): number { return 0; } -function comparator(sortFields: any[]) { +function comparator(sortFields: any[]): (a: any, b: any) => number { if (sortFields.length) { - return (a: any, b: any) => { + return (a: any, b: any): number => { for (let i = 0, len = sortFields.length; i < len; i++) { const res = compare(a, b, sortFields[i]); @@ -68,36 +63,44 @@ function comparator(sortFields: any[]) { return 0; }; } -} -function compareStrings(a: string, b: any): number { - return a.localeCompare(b); + return (): number => 0; } -function compareNumbers(a: number, b: any): number { - const bNumVal = parseNumber(b); - if (bNumVal === undefined) { - return 1; - } - - return a - bNumVal; +/** + * Checks if array is empty or null or array at all + * @param array + */ +export function isArrayEmptyOrNull(array: any[]): boolean { + return !array || !Array.isArray(array) || !array.length; } -function compareObjects(a: any, b: any): number { - const aDate = parseDatetimeOrNull(a); - const bDate = parseDatetimeOrNull(b); +/** + * Sorts array. + * @param array The array to process. + * @param fields sorts order. + * @public + * @example + * sort(array, 'name ASC', 'age DESC'); + */ +export function sort(array: any[], ...fields: string[]): any[] { - if (!aDate && !bDate) { - return 0; - } + if (!array || !Array.isArray(array)) { throw Error('Array is not provided'); } - if (!aDate) { - return -1; + if (!fields?.length) { + // just a default sort + return array.sort(); } - if (!bDate) { - return 1; - } + const sortFields = fields.map(field => { + const asc = !field.endsWith(' DESC'); + return { + asc, + field: field.replace(asc ? /\sASC$/ : /\sDESC$/, '') + }; + }); - return aDate.getTime() - bDate.getTime(); + array.sort(comparator(sortFields)); + return array; } + diff --git a/src/tests/dsv-parser.spec.ts b/src/tests/dsv-parser.spec.ts index 195df15..f2b7aeb 100644 --- a/src/tests/dsv-parser.spec.ts +++ b/src/tests/dsv-parser.spec.ts @@ -18,6 +18,13 @@ describe('Dsv Parser specification', () => { expect(result[0].F3).toBe('Test, comma'); }) + it('simple numders and strings with spaces', () => { + const csv = ["F1,F2 ,F3", `1,2,"Test, comma"`].join('\n') + const result = parseCsv(csv); + expect(result.length).toBe(1); + expect(result[0].F2).toBe(2); + }) + it('Empty should be null', () => { const csv = ["F1,F2,F3", `1,,"Test, comma"`].join('\n') const result = parseCsv(csv); @@ -40,27 +47,27 @@ describe('Dsv Parser specification', () => { multi-line string`; const csv = ["F1\tF2\tF3", `1\t"${multiLineString}"\t"Test, comma"`].join('\n') - const result = parseCsv(csv, { delimiter: '\t' }); + const result = parseCsv(csv, { delimiter: '\t' } as ParsingOptions); expect(result.length).toBe(1); expect(result[0].F2).toBe(multiLineString); }) it('DSV with comma numbers', () => { const csv = ["F1\tF2\tF3", `1\t1,000.32\t"Test, comma"`].join('\n') - const result = parseCsv(csv, { delimiter: '\t' }); + const result = parseCsv(csv, { delimiter: '\t' } as ParsingOptions); expect(result.length).toBe(1); expect(result[0].F2).toBe(1000.32); }) it('skip rows', () => { const csv = ["", "", "F1\tF2\tF3", `1\t1,000.32\t"Test, comma"`].join('\n') - const result = parseCsv(csv, { delimiter: '\t', skipRows: 2 }); + const result = parseCsv(csv, { delimiter: '\t', skipRows: 2 } as ParsingOptions); expect(result.length).toBe(1); expect(result[0].F2).toBe(1000.32); }) it('skip rows not empty rows', () => { const csv = ["", " * not Empty *", "F1\tF2\tF3", `1\t1,000.32\t"Test, comma"`].join('\n') - const result = parseCsv(csv, { delimiter: '\t', skipRows: 2 }); + const result = parseCsv(csv, { delimiter: '\t', skipRows: 2 } as ParsingOptions); expect(result.length).toBe(1); expect(result[0].F2).toBe(1000.32); }) @@ -69,7 +76,7 @@ describe('Dsv Parser specification', () => { const csv = ["", " * not Empty *", "F1\tF2\tF3", `1\t1,000.32\t"Test, comma"`].join('\n') const options = new ParsingOptions(); options.delimiter = '\t'; - options.skipUntil = t => t && t.length > 1; + options.skipUntil = (t: string[]): boolean => t && t.length > 1; const result = parseCsv(csv, options); expect(result.length).toBe(1); @@ -77,7 +84,7 @@ describe('Dsv Parser specification', () => { }) it('empty values', () => { - const csv = ["", "", , "\t\t\t", "F1\tF2\tF3", `1\t1,000.32\t"Test, comma"`, "\t\t"].join('\n') + const csv = ["", "", "\t\t\t", "F1\tF2\tF3", `1\t1,000.32\t"Test, comma"`, "\t\t"].join('\n') const options = new ParsingOptions(); options.delimiter = '\t'; diff --git a/src/utils/dsv-parser.ts b/src/utils/dsv-parser.ts index 0b6a2c1..e3393dd 100644 --- a/src/utils/dsv-parser.ts +++ b/src/utils/dsv-parser.ts @@ -1,51 +1,6 @@ import { parseNumberOrNull, parseDatetimeOrNull } from "./utils"; import { ParsingOptions } from "../types"; -export function parseCsv(content: string, options?: ParsingOptions): any[] { - const result: any[] = []; - content = content || ''; - - if (!content.length) { - return result; - } - - return getLineTokens(content, options || new ParsingOptions()); -} - -export function toCsv(array: any[], delimiter = ','): string { - array = array || []; - - const result = ""; - const headers: string[] = []; - - // workout all headers - for (const item of array) { - for (const name in item) { - if (headers.indexOf(name) < 0) { headers.push(name); } - } - } - - // create a csv string - const lines = array.map(item => { - const values: string[] = []; - for (const name of headers) { - let value: any = item[name]; - if (value instanceof Date) { - value = parseDatetimeOrNull(value); - } else if (typeof value === "string" && value.indexOf(delimiter) >= 0) { - value = '"' + value + '"'; - } - - values.push(value || '') - } - return values.join(delimiter); - - }); - lines.unshift(headers.join(delimiter)) - - return lines.join('\n') -} - type ParsingContext = { content: string; currentIndex: number; @@ -65,7 +20,7 @@ function getObjectElement(fieldNames: string[], tokens: string[], options: Parsi } else if (options.booleanFields && options.booleanFields.indexOf(fieldName) >= 0) { value = !!value; } else { - const num = parseNumberOrNull(value); + const num = parseNumberOrNull(value as string); value = num || value; } } @@ -74,11 +29,40 @@ function getObjectElement(fieldNames: string[], tokens: string[], options: Parsi return obj; } +function nextLineTokens(context: ParsingContext, delimiter = ','): string[] { + const tokens: string[] = []; + let token = ''; + + do { + const currentChar = context.content[context.currentIndex]; + if (currentChar === '\n') { + if (context.content[context.currentIndex + 1] === '\r') { context.currentIndex++; } + break; + } + + if (token.length === 0 && currentChar === '"') { + while (context.content[++context.currentIndex] !== '"') { + token += context.content[context.currentIndex]; + } + + } else if (currentChar === delimiter) { + tokens.push(token); + token = ''; + } else { + token += currentChar; + } + } + while (++context.currentIndex < context.content.length) + + tokens.push(token); + return tokens; +} + function getLineTokens(content: string, options: ParsingOptions): string[][] { - const ctx = { + const ctx = { content: content, currentIndex: 0 - }; + } as ParsingContext; content = content || ''; const delimiter = options.delimiter || ','; @@ -111,7 +95,7 @@ function getLineTokens(content: string, options: ParsingOptions): string[][] { if (!fieldNames) { // fieldName is used as indicator on whether data rows handling started - fieldNames = tokens; + fieldNames = tokens.map(t => t.trim()); // field names can't have spaces lineNumber++; continue; } @@ -135,32 +119,46 @@ function getLineTokens(content: string, options: ParsingOptions): string[][] { return result; } -function nextLineTokens(context: ParsingContext, delimiter = ','): string[] { - const tokens: string[] = []; - let token = ''; +export function parseCsv(content: string, options?: ParsingOptions): any[] { + content = content || ''; - do { - const currentChar = context.content[context.currentIndex]; - if (currentChar === '\n') { - if (context.content[context.currentIndex + 1] === '\r') { context.currentIndex++; } - break; + if (!content.length) { + return []; + } + + return getLineTokens(content, options || new ParsingOptions()); +} + +export function toCsv(array: any[], delimiter = ','): string { + array = array || []; + + const headers: string[] = []; + + // workout all headers + for (const item of array) { + for (const name in item) { + if (headers.indexOf(name) < 0) { headers.push(name); } } + } - if (token.length === 0 && currentChar === '"') { - while (context.content[++context.currentIndex] !== '"') { - token += context.content[context.currentIndex]; + // create a csv string + const lines = array.map(item => { + const values: string[] = []; + for (const name of headers) { + let value: any = item[name]; + if (value instanceof Date) { + value = parseDatetimeOrNull(value); + } else if (typeof value === "string" && value.indexOf(delimiter) >= 0) { + value = '"' + value + '"'; } - } else if (currentChar === delimiter) { - tokens.push(token); - token = ''; - } else { - token += currentChar; + values.push(value || '') } - } - while (++context.currentIndex < context.content.length) + return values.join(delimiter); - tokens.push(token); - return tokens; + }); + lines.unshift(headers.join(delimiter)) + + return lines.join('\n') } diff --git a/src/utils/utils.ts b/src/utils/utils.ts index 95c4864..ad14f2c 100644 --- a/src/utils/utils.ts +++ b/src/utils/utils.ts @@ -89,7 +89,7 @@ export function parseDatetimeOrNull(value: string | Date): Date | null { return NaN; }; - const correctYear = (yy: number) => { + const correctYear = (yy: number): number => { if (yy < 100) { return yy < 68 ? yy + 2000 : yy + 1900; } else { @@ -163,7 +163,7 @@ export function deepClone(obj: any): any { } if (typeof obj === 'object') { - const clone = {}; + const clone = {} as any; for (const propName in obj) { const propValue = obj[propName];