Skip to content

Commit

Permalink
added method for createFieldDescriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
ppaska committed Jan 2, 2021
1 parent c99e90f commit 453df18
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ Thumbs.db

package-lock.json
*.tgz
debug.log
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ when in browser.
- **parseNumberOrNull**(value: string | number): convert to number or returns null
- **parseBooleanOrNull**(val: boolean | string): convert to Boolean or returns null. It is treating `['1', 'yes', 'true', 'on']` as true and `['0', 'no', 'false', 'off']` as false
- **deepClone** returns a deep copy of your object or array.
- **createFieldDescriptions**(items: Record<string, ScalarType>[]): FieldDescription[] : Generates a field descriptions (first level only) from array of items. That eventually can be used for relational table definition. If any properties are Objects, it would use JSON.stringify to calculate maxSize field.

## License
A permissive [MIT](https://github.com/FalconSoft/dataPipe/blob/master/LICENSE) (c) - FalconSoft Ltd
Expand Down
24 changes: 16 additions & 8 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,30 @@
<body>
<h4>Data Pipe testing page. Do not expect anything here! </h4>

<button onclick="onButtonClick()">++ Click ++ </button onclick="myFunction()">
<button onclick="onButtonClick()">++ Click ++ </button>

<script>
function onButtonClick() {
const csv1 = ["F1,F2,F3", `1,"T ""k"" c",32`].join('\n');
console.log('db', dp.dataPipe().fromCsv(csv1).toArray());
}

fetch("https://raw.githubusercontent.com/FalconSoft/sample-data/master/CSV/sample-testing-data-100.csv")
fetch("https://restcountries.eu/rest/v2/regionalbloc/eu")
.then(r => {
r.text().then(text => {
const items = dp.dataPipe().fromCsv(text).toArray();
console.log('==>', items, dp.dataPipe(items).getFieldDescriptions());
r.json().then(items => {
console.log('==>', items, dp.dataPipe(items).createFieldDescriptions());
});
})

}

onButtonClick();

// fetch("https://raw.githubusercontent.com/FalconSoft/sample-data/master/CSV/sample-testing-data-100.csv")
// .then(r => {
// r.text().then(text => {
// const items = dp.dataPipe().fromCsv(text).toArray();
// console.log('==>', items, dp.dataPipe(items).getFieldDescriptions());
// });
// })

// onButtonClick();

</script>
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "datapipe-js",
"version": "0.3.6",
"version": "0.3.7",
"description": "dataPipe is a JavaScript library for data manipulations, data transformations and data wrangling library inspired by LINQ (C#) and Pandas (Python)",
"main": "dist/cjs/data-pipe.js",
"module": "dist/esm/data-pipe.mjs",
Expand Down
15 changes: 9 additions & 6 deletions src/data-pipe.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { sum, avg, count, min, max, first, last, countBy, mean, quantile, variance, median, stdev } from './array/stats';
import { sum, avg, count, min, max, first, last, mean, quantile, variance, median, stdev } from './array/stats';
import { Selector, Predicate, ParsingOptions, FieldDescription, PrimitiveType, TableDto, DataTypeName } from './types';
import { parseCsv, fromTable, toTable, getFieldDescriptions, toCsv } from './utils';
import { leftJoin, innerJoin, fullJoin, merge, groupBy, flatten, sort, pivot, transpose, toObject } from './array';
import { parseCsv, fromTable, toTable, createFieldDescriptions, toCsv } from './utils';
import { leftJoin, innerJoin, fullJoin, merge, groupBy, sort, pivot, transpose, toObject } from './array';


export class DataPipe {
Expand Down Expand Up @@ -298,8 +298,11 @@ export class DataPipe {

// end of transformation functions

getFieldDescriptions(): FieldDescription[] {
return getFieldDescriptions(this.data)
/**
* generates a field descriptions (first level only) that can be used for relational table definition.
* if any properties are Objects, it would use JSON.stringify to calculate maxSize field.
*/
createFieldDescriptions(): FieldDescription[] {
return createFieldDescriptions(this.data)
}

}
22 changes: 11 additions & 11 deletions src/tests/utils-pipe.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { parseDatetimeOrNull, parseNumberOrNull, getFieldDescriptions, dateToString } from "../utils";
import { parseDatetimeOrNull, parseNumberOrNull, createFieldDescriptions, dateToString } from "../utils";
import { FieldDescription, DataTypeName } from "../types";


Expand Down Expand Up @@ -44,27 +44,27 @@ describe('Test dataUtils', () => {
expect(parseNumberOrNull(NaN)).toBe(NaN);
})

it('getFieldDescriptions', () => {
it('createFieldDescriptions', () => {
const arr = [2, 4, 5].map(r => ({ val1: r }));
const ff = getFieldDescriptions(arr);
const ff = createFieldDescriptions(arr);
expect(ff.length).toBe(1);
expect(ff[0].fieldName).toBe('val1');
expect(ff[0].dataTypeName).toBe(DataTypeName.WholeNumber);
expect(ff[0].isNullable).toBe(false);
});

it('getFieldDescriptions2', () => {
it('createFieldDescriptions2', () => {
const arr = [2, '4', 5].map(r => ({ val1: r }));
const ff = getFieldDescriptions(arr);
const ff = createFieldDescriptions(arr);
expect(ff.length).toBe(1);
expect(ff[0].fieldName).toBe('val1');
expect(ff[0].dataTypeName).toBe(DataTypeName.WholeNumber);
expect(ff[0].isNullable).toBe(false);
});

it('getFieldDescriptions numbers check', () => {
it('createFieldDescriptions numbers check', () => {
const mapFn = (r: any): any => ({ val1: r });
const fdFn = (arr: any[]): FieldDescription => getFieldDescriptions(arr.map(mapFn))[0];
const fdFn = (arr: any[]): FieldDescription => createFieldDescriptions(arr.map(mapFn))[0];

expect(fdFn([2, 4, 5]).dataTypeName).toBe(DataTypeName.WholeNumber);
expect(fdFn([2, 4, 5]).isNullable).toBe(false);
Expand All @@ -77,9 +77,9 @@ describe('Test dataUtils', () => {
expect(fdFn([2, '4', 5]).dataTypeName).toBe(DataTypeName.WholeNumber);
});

it('getFieldDescriptions DateTime check', () => {
it('createFieldDescriptions DateTime check', () => {
const mapFn = (r: any): any => ({ val1: r });
const fdFn = (arr: any[]): FieldDescription => getFieldDescriptions(arr.map(mapFn))[0];
const fdFn = (arr: any[]): FieldDescription => createFieldDescriptions(arr.map(mapFn))[0];

expect(fdFn(['2019-01-01', '2019-01-02']).dataTypeName).toBe(DataTypeName.DateTime);
expect(fdFn(['2019-01-01', '2019-01-02']).isNullable).toBe(false);
Expand All @@ -94,9 +94,9 @@ describe('Test dataUtils', () => {
expect(fdFn([new Date(2001, 1, 1), 'NOT A DATE', new Date()]).dataTypeName).toBe(DataTypeName.String);
});

it('getFieldDescriptions size check', () => {
it('createFieldDescriptions size check', () => {
const mapFn = (r: any): any => ({ val1: r });
const fdFn = (arr: any[]): FieldDescription => getFieldDescriptions(arr.map(mapFn))[0];
const fdFn = (arr: any[]): FieldDescription => createFieldDescriptions(arr.map(mapFn))[0];

const longestText = 'Longest Text';
expect(fdFn(['Test1', 'Longer', longestText]).maxSize).toBe(longestText.length);
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export interface FieldDescription {
fieldName: string;
isNullable: boolean;
isUnique: boolean;
isObject: boolean;
maxSize?: number;
dataTypeName?: DataTypeName;
}
52 changes: 40 additions & 12 deletions src/utils/helpers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Selector, FieldDescription, DataTypeName, ScalarType } from "../types";
import { Selector, FieldDescription, DataTypeName, ScalarType, PrimitiveType } from "../types";

/**
* Formats selected value to number.
Expand Down Expand Up @@ -129,7 +129,7 @@ export function parseDatetimeOrNull(value: string | Date, format: string | null
return validDateOrNull(correctYear(dt[2]), parseMonth(strTokens[1]), dt[0], dt[3] || 0, dt[4] || 0, dt[5] || 0, dt[6] || 0);
} else if (format.startsWith('yyyy-mm')) {
return validDateOrNull(dt[0], parseMonth(strTokens[1]), dt[2] || 1, dt[3] || 0, dt[4] || 0, dt[5] || 0, dt[6] || 0);
} else if(format.length){
} else if (format.length) {
throw new Error(`Unrecognized format '${format}'`);
}

Expand Down Expand Up @@ -283,21 +283,45 @@ export function workoutDataType(value: ScalarType, inType: DataTypeName | undefi
return undefined;
}

export function getFieldDescriptions(items: Record<string, ScalarType>[]): FieldDescription[] {
/**
* generates a field descriptions (first level only) that can be used for relational table definition.
* if any properties are Objects, it would use JSON.stringify to calculate maxSize field.
* @param items
*/
export function createFieldDescriptions(items: Record<string, ScalarType>[]): FieldDescription[] {

const resultMap: Record<string, FieldDescription> = Object.create(null);
const valuesMap: Record<string, Set<string>> = Object.create(null);

let index = 0;
for (const item of items) {

for (const [name, value] of Object.entries(item)) {
let fDesc = resultMap[name];
let valuesSet = valuesMap[name];

if (valuesSet === undefined) {
valuesSet = valuesMap[name] = new Set<string>();
}

if (fDesc === undefined) {
fDesc = {
index: index++,
fieldName: name,
isNullable: false
} as FieldDescription;
resultMap[name] = fDesc;
}

const strValue: PrimitiveType =
value instanceof Date ? dateToString(value)
: typeof value === 'object' ? JSON.stringify(value)
: String(value);

if (!fDesc.isObject) {
fDesc.isObject = typeof value === 'object';
}

if (value === null || value === undefined) {
fDesc.isNullable = true
} else {
Expand All @@ -306,19 +330,23 @@ export function getFieldDescriptions(items: Record<string, ScalarType>[]): Field
fDesc.dataTypeName = newType;
}

if ((fDesc.dataTypeName == DataTypeName.String || fDesc.dataTypeName == DataTypeName.LargeString) && String(value).length > (fDesc.maxSize || 0)) {
fDesc.maxSize = String(value).length;
if ((fDesc.dataTypeName == DataTypeName.String || fDesc.dataTypeName == DataTypeName.LargeString) && strValue.length > (fDesc.maxSize || 0)) {
fDesc.maxSize = strValue.length;
}
}

// const pValue: PrimitiveType = value instanceof Date ? dateToString(value) : value;
// if (fDesc.valuesMap.get(pValue) === undefined) {
// fDesc.valuesMap.set(pValue, 0);
// }

// fDesc.valuesMap?.set(pValue, (fDesc.valuesMap.get(pValue) || 0) + 1);
if (!valuesSet.has(strValue)) {
valuesSet.add(strValue);
}
}
}

return Object.values(resultMap);
const fields = Object.values(resultMap)
.sort((a: FieldDescription, b: FieldDescription) => (a.index > b.index) ? 1 : ((b.index > a.index) ? -1 : 0))

return fields
.map(r => {
r.isUnique = valuesMap[r.fieldName].size === items.length;
return r;
});
}

0 comments on commit 453df18

Please sign in to comment.