Skip to content

Commit

Permalink
ref
Browse files Browse the repository at this point in the history
  • Loading branch information
ddecrulle committed Jan 20, 2025
1 parent 895fe8d commit f4be0ab
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 36 deletions.
33 changes: 27 additions & 6 deletions web/src/core/adapters/sqlOlap/sqlOlap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { assert } from "tsafe/assert";
import memoize from "memoizee";
import { same } from "evt/tools/inDepth/same";
import type { ReturnType } from "tsafe";
import { arrowTableToRowsAndColumns } from "./utils/arrowTableToRowsAndColumns";
import { arrowTableToColumns, arrowTableToRows } from "./utils/arrowTable";

export const createDuckDbSqlOlap = (params: {
getS3Config: () => Promise<
Expand Down Expand Up @@ -108,7 +108,6 @@ export const createDuckDbSqlOlap = (params: {
})(),
getRowsAndColumns: async ({ sourceUrl, fileType, rowsPerPage, page }) => {
const db = await sqlOlap.getConfiguredAsyncDuckDb();

const conn = await db.connect();

const sqlQuery = `SELECT * FROM ${(() => {
Expand All @@ -123,17 +122,39 @@ export const createDuckDbSqlOlap = (params: {
})()} LIMIT ${rowsPerPage} OFFSET ${rowsPerPage * (page - 1)}`;

const stmt = await conn.prepare(sqlQuery);

const res = await stmt.query();

const { rows, columns } = await arrowTableToRowsAndColumns({
table: res
});
const columns = await arrowTableToColumns({ table: res });
const rows = arrowTableToRows({ table: res, columns });

await conn.close();

return { rows, columns };
},
getRows: async ({ sourceUrl, fileType, rowsPerPage, page, columns }) => {
const db = await sqlOlap.getConfiguredAsyncDuckDb();
const conn = await db.connect();

const sqlQuery = `SELECT * FROM ${(() => {
switch (fileType) {
case "csv":
return `read_csv('${sourceUrl}')`;
case "parquet":
return `read_parquet('${sourceUrl}')`;
case "json":
return `read_json('${sourceUrl}')`;
}
})()} LIMIT ${rowsPerPage} OFFSET ${rowsPerPage * (page - 1)}`;

const stmt = await conn.prepare(sqlQuery);
const res = await stmt.query();

const rows = arrowTableToRows({ table: res, columns });

await conn.close();

return { rows };
},
getRowCount: memoize(
async ({ sourceUrl, fileType }) => {
if (fileType !== "parquet") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ const getColumnType = async (type: DataType): Promise<Column["type"]> => {
return "number";
}

case Type.Float: {
case Type.Decimal:
case Type.Float:
return "number";
}

case Type.Utf8:
case Type.LargeUtf8:
return "string";
Expand All @@ -36,44 +37,62 @@ const getColumnType = async (type: DataType): Promise<Column["type"]> => {
case Type.FixedSizeBinary:
return "binary";

case Type.Duration:
case Type.FixedSizeList:
case Type.Map:
case Type.Union:
case Type.Struct:
case Type.List:
return "string";

case Type.Interval:
default:
throw new Error(
`Unsupported Arrow DataType: ${Type[type.typeId] || "Unknown"} (${type.typeId})`
);
}
};

export const arrowTableToRowsAndColumns = async (params: { table: Table<any> }) => {
export const arrowTableToColumns = async (params: { table: Table<any> }) => {
const { table } = params;

const rows: Record<string, any>[] = Array.from({ length: table.numRows }, () => ({}));
const columns: Column[] = [];
const columns = await Promise.all(
table.schema.fields.map(async field => {
const columnType = await getColumnType(field.type);
return {
name: field.name,
type: columnType,
rowType: field.type.toString()
};
})
);

return columns;
};

export const arrowTableToRows = (params: { table: Table<any>; columns: Column[] }) => {
const { table, columns } = params;

for (const field of table.schema.fields) {
const column = table.getChild(field.name);
assert(column !== null, `Column for field "${field.name}" not found.`);
const rows: Record<string, any>[] = Array.from({ length: table.numRows }, () => ({}));

const columnType = await getColumnType(field.type);
for (const column of columns) {
const field = table.schema.fields.find(field => field.name === column.name);
assert(field !== undefined, `Field "${column.name}" not found in schema.`);

columns.push({
name: field.name,
type: columnType
});
const vector = table.getChild(column.name);
assert(vector !== null, `Column vector for "${column.name}" not found.`);

const transformedColumn = convertVector({
vector: column,
expectedType: columnType
vector,
expectedType: column.type
});

for (let rowIndex = 0; rowIndex < table.numRows; rowIndex++) {
rows[rowIndex][field.name] = transformedColumn[rowIndex];
rows[rowIndex][column.name] = transformedColumn[rowIndex];
}
}

return { rows, columns };
return rows;
};

const convertVector = (params: { vector: Vector<any>; expectedType: Column["type"] }) => {
Expand Down
8 changes: 8 additions & 0 deletions web/src/core/ports/SqlOlap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@ export type SqlOlap = {
rowsPerPage: number;
page: number;
}) => Promise<{ rows: unknown[]; columns: Column[] }>;
getRows: (params: {
sourceUrl: string;
fileType: "parquet" | "csv" | "json";
rowsPerPage: number;
page: number;
columns: Column[];
}) => Promise<{ rows: unknown[] }>;
};

export type Column = {
name: string;
type: "string" | "number" | "bigint" | "boolean" | "date" | "dateTime" | "binary";
rowType: string;
};
39 changes: 29 additions & 10 deletions web/src/core/usecases/dataExplorer/thunks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,18 +133,37 @@ const privateThunks = {
if (!getIsActive()) {
return;
}

const rowsOrErrorMessage = await sqlOlap
.getRowsAndColumns({
sourceUrl,
rowsPerPage: rowsPerPage + 1,
page,
fileType
})
.catch(error => {
const rowsOrErrorMessage = await (async () => {
try {
return isSourceUrlChanged
? await sqlOlap.getRowsAndColumns({
sourceUrl,
rowsPerPage: rowsPerPage + 1,
page,
fileType
})
: await (async () => {
assert(
data.state === "loaded",
"Data must be loaded to reuse columns"
);
const { rows } = await sqlOlap.getRows({
sourceUrl,
rowsPerPage: rowsPerPage + 1,
page,
fileType,
columns: data.columns
});
return {
rows,
columns: data.columns
};
})();
} catch (error) {
console.error(error);
return String(error);
});
}
})();

if (typeof rowsOrErrorMessage === "string") {
dispatch(actions.queryFailed({ errorMessage: rowsOrErrorMessage }));
Expand Down
6 changes: 3 additions & 3 deletions web/src/ui/shared/Datagrid/CustomDataGrid.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ export const CustomDataGrid = <R extends GridValidRowModel = any>(
{originalRenderCell ? (
originalRenderCell(params)
) : (
<div>{params.value.toString()}</div>
<span>{params.formattedValue}</span>
)}
<CopyToClipboardIconButton
textToCopy={params.value}
textToCopy={params.formattedValue}
className={css({
visibility: params.hasFocus
? "visible"
Expand Down Expand Up @@ -115,7 +115,7 @@ const { i18n } = declareComponentKeys<

export type I18n = typeof i18n;
const useStyles = tss.withName({ CustomDataGrid }).create(({ theme }) => ({
columnSeparator: { "&&&&&": { opacity: "1" } }, //Ensures the column separator remains visible (opacity 1) when a column header is selected. By default, MUI reduces the opacity to 0 because an outline is applied to the selected column header
columnSeparator: { "&&&&&&&": { opacity: "1" } }, //Ensures the column separator remains visible (opacity 1) when a column header is selected. By default, MUI reduces the opacity to 0 because an outline is applied to the selected column header
iconSeparator: {
"&&": { color: theme.colors.useCases.typography.textDisabled }
}
Expand Down

0 comments on commit f4be0ab

Please sign in to comment.