Skip to content

Commit

Permalink
chore(parquet): GeoParquetLoader, collect the parquet loader zoo in o…
Browse files Browse the repository at this point in the history
…ne file (#2809)
  • Loading branch information
ibgreen authored Nov 28, 2023
1 parent 617d83d commit 77f3c1c
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 107 deletions.
25 changes: 13 additions & 12 deletions examples/website/geospatial/examples.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,6 @@ export const EXAMPLES: Record<string, Record<string, Example>> = {
format: 'geoparquet',
data: `${LOADERS_URL}/modules/parquet/test/data/geoparquet/airports.parquet`,
viewState: {...VIEW_STATE, longitude: -4.65, latitude: -29.76, zoom: 1.76}
},
NZBuildingFootprints: {
format: 'geoparquet',
data: 'https://storage.googleapis.com/open-geodata/linz-examples/nz-building-outlines.parquet',
viewState: {
latitude: 47.65,
longitude: 7,
zoom: 4.5,
maxZoom: 20,
maxPitch: 89,
bearing: 0
}
}
},
GeoJSON: {
Expand Down Expand Up @@ -220,5 +208,18 @@ function getGeoParquetTestExamples() {
}
}

GeoParquet.NZBuildingFootprints = {
format: 'geoparquet',
data: 'https://storage.googleapis.com/open-geodata/linz-examples/nz-building-outlines.parquet',
viewState: {
latitude: 47.65,
longitude: 7,
zoom: 4.5,
maxZoom: 20,
maxPitch: 89,
bearing: 0
}
}

return GeoParquet;
}
67 changes: 7 additions & 60 deletions modules/parquet/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,74 +3,21 @@

export {Buffer} from './polyfills/buffer/install-buffer-polyfill';

import type {LoaderWithParser} from '@loaders.gl/loader-utils';
import type {
ObjectRowTable,
ObjectRowTableBatch,
ColumnarTable,
ColumnarTableBatch,
GeoJSONTable,
GeoJSONTableBatch
} from '@loaders.gl/schema';

// import {ArrowTable, ArrowTableBatch} from '@loaders.gl/arrow';

// ParquetLoader

import {BlobFile} from '@loaders.gl/loader-utils';
import {
ParquetLoader as ParquetWorkerLoader,
ParquetColumnarLoader as ParquetColumnarWorkerLoader,
ParquetLoaderOptions
export {
ParquetWorkerLoader,
ParquetLoader,
GeoParquetWorkerLoader,
GeoParquetLoader,
ParquetColumnarWorkerLoader,
ParquetColumnarLoader
} from './parquet-loader';
import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-rows';
import {
parseParquetFileInColumns,
parseParquetFileInColumnarBatches
} from './lib/parsers/parse-parquet-to-columns';

// import type {ParquetWasmLoaderOptions} from './lib/wasm/parse-parquet-wasm';
// import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
// import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';

export {ParquetWorkerLoader};
// export {ParquetWasmWorkerLoader};

/** ParquetJS table loader */
export const ParquetLoader: LoaderWithParser<
ObjectRowTable | GeoJSONTable,
ObjectRowTableBatch | GeoJSONTableBatch,
ParquetLoaderOptions
> = {
...ParquetWorkerLoader,
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
return parseParquetFile(new BlobFile(arrayBuffer), options);
},
parseFile: parseParquetFile,
parseFileInBatches: parseParquetFileInBatches
};

/** ParquetJS table loader */
export const ParquetColumnarLoader: LoaderWithParser<
ColumnarTable,
ColumnarTableBatch,
ParquetLoaderOptions
> = {
...ParquetColumnarWorkerLoader,
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
},
parseFile: parseParquetFileInColumns,
parseFileInBatches: parseParquetFileInColumnarBatches
};

// export const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {
// ...ParquetWasmWorkerLoader,
// parse: parseParquetWasm
// };

// ParquetWriter

export {ParquetWriter as _ParquetWriter} from './parquet-writer';
// export {ParquetWasmWriter} from './parquet-wasm-writer';

Expand Down
3 changes: 3 additions & 0 deletions modules/parquet/src/lib/parsers/get-parquet-schema.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

// loaders.gl
import {Schema} from '@loaders.gl/schema';
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
Expand Down
87 changes: 87 additions & 0 deletions modules/parquet/src/lib/parsers/parse-geoparquet.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {ReadableFile} from '@loaders.gl/loader-utils';
import type {
GeoJSONTable,
GeoJSONTableBatch,
ObjectRowTable,
ObjectRowTableBatch
} from '@loaders.gl/schema';
import {convertWKBTableToGeoJSON} from '@loaders.gl/gis';
import {WKTLoader, WKBLoader} from '@loaders.gl/wkt';

import type {ParquetLoaderOptions} from '../../parquet-loader';

import {parseParquetFile, parseParquetFileInBatches} from './parse-parquet';

export async function parseGeoParquetFile(
file: ReadableFile,
options?: ParquetLoaderOptions
): Promise<ObjectRowTable | GeoJSONTable> {
const table = await parseParquetFile(file, options);
const shape = options?.parquet?.shape;
return convertTable(table, shape);
}

export async function* parseGeoParquetFileInBatches(
file: ReadableFile,
options?: ParquetLoaderOptions
): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch> {
const tableBatches = parseParquetFileInBatches(file, options);

for await (const batch of tableBatches) {
const shape = options?.parquet?.shape;
yield convertBatch(batch, shape);
}
}

function convertTable(
objectRowTable: ObjectRowTable,
shape?: 'object-row-table' | 'geojson-table'
): ObjectRowTable | GeoJSONTable {
switch (shape) {
case 'object-row-table':
return objectRowTable;

case 'geojson-table':
try {
return convertWKBTableToGeoJSON(objectRowTable, objectRowTable.schema!, [
WKTLoader,
WKBLoader
]);
} catch (error) {
return objectRowTable;
}

default:
throw new Error(shape);
}
}

function convertBatch(
objectRowBatch: ObjectRowTableBatch,
shape?: 'object-row-table' | 'geojson-table'
): ObjectRowTableBatch | GeoJSONTableBatch {
switch (shape) {
case 'object-row-table':
return objectRowBatch;

case 'geojson-table':
try {
const geojsonTable = convertWKBTableToGeoJSON(objectRowBatch, objectRowBatch.schema!, [
WKTLoader,
WKBLoader
]);
return {
...objectRowBatch,
...geojsonTable
};
} catch (error) {
return objectRowBatch;
}

default:
throw new Error(shape);
}
}
6 changes: 6 additions & 0 deletions modules/parquet/src/lib/parsers/parse-parquet-to-columns.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import {materializeColumns} from '../../parquetjs/schema/shred';
import {getSchemaFromParquetReader} from './get-parquet-schema';
import {installBufferPolyfill} from '../../polyfills/buffer';

/**
* @deprecated
*/
export async function parseParquetFileInColumns(
file: ReadableFile,
options?: ParquetLoaderOptions
Expand All @@ -26,6 +29,9 @@ export async function parseParquetFileInColumns(
throw new Error('empty table');
}

/**
* @deprecated
*/
export async function* parseParquetFileInColumnarBatches(
file: ReadableFile,
options?: ParquetLoaderOptions
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
// import {ColumnarTableBatch} from '@loaders.gl/schema';
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {ReadableFile} from '@loaders.gl/loader-utils';
import type {
GeoJSONTable,
GeoJSONTableBatch,
ObjectRowTable,
ObjectRowTableBatch
} from '@loaders.gl/schema';
import {convertWKBTableToGeoJSON} from '@loaders.gl/gis';
import {WKTLoader, WKBLoader} from '@loaders.gl/wkt';
import type {ObjectRowTable, ObjectRowTableBatch} from '@loaders.gl/schema';

import type {ParquetLoaderOptions} from '../../parquet-loader';
import type {ParquetRow} from '../../parquetjs/schema/declare';
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
import {getSchemaFromParquetReader} from './get-parquet-schema';
import {installBufferPolyfill} from '../../polyfills/buffer';

/**
* * Parse a parquet file using parquetjs
* @param file
* @param options
* @returns
*/
export async function parseParquetFile(
file: ReadableFile,
options?: ParquetLoaderOptions
): Promise<ObjectRowTable | GeoJSONTable> {
): Promise<ObjectRowTable> {
installBufferPolyfill();

const reader = new ParquetReader(file, {
Expand Down Expand Up @@ -47,10 +47,15 @@ export async function parseParquetFile(
return convertTable(objectRowTable, shape);
}

/**
* Parse a parquet file in batches using parquetjs
* @param file
* @param options
*/
export async function* parseParquetFileInBatches(
file: ReadableFile,
options?: ParquetLoaderOptions
): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch> {
): AsyncIterable<ObjectRowTableBatch> {
const reader = new ParquetReader(file, {
preserveBinary: options?.parquet?.preserveBinary
});
Expand Down Expand Up @@ -78,21 +83,11 @@ export async function* parseParquetFileInBatches(
function convertTable(
objectRowTable: ObjectRowTable,
shape?: 'object-row-table' | 'geojson-table'
): ObjectRowTable | GeoJSONTable {
): ObjectRowTable {
switch (shape) {
case 'object-row-table':
return objectRowTable;

case 'geojson-table':
try {
return convertWKBTableToGeoJSON(objectRowTable, objectRowTable.schema!, [
WKTLoader,
WKBLoader
]);
} catch (error) {
return objectRowTable;
}

default:
throw new Error(shape);
}
Expand Down
3 changes: 3 additions & 0 deletions modules/parquet/src/lib/wasm/encode-parquet-wasm.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import type {WriterOptions} from '@loaders.gl/loader-utils';
import type {ArrowTable} from '@loaders.gl/arrow';

Expand Down
3 changes: 3 additions & 0 deletions modules/parquet/src/lib/wasm/load-wasm-browser.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import * as wasmEsm from 'parquet-wasm/esm2/arrow1';

let cached: typeof wasmEsm | null = null;
Expand Down
3 changes: 3 additions & 0 deletions modules/parquet/src/lib/wasm/load-wasm-node.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import * as wasmNode from 'parquet-wasm/node/arrow1';

export async function loadWasm(wasmUrl?: string) {
Expand Down
3 changes: 3 additions & 0 deletions modules/parquet/src/lib/wasm/load-wasm.ts
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

export {loadWasm} from './load-wasm-node';
3 changes: 3 additions & 0 deletions modules/parquet/src/lib/wasm/parse-parquet-wasm.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

// eslint-disable
import type {LoaderOptions} from '@loaders.gl/loader-utils';
import type {ArrowTable} from '@loaders.gl/arrow';
Expand Down
Loading

0 comments on commit 77f3c1c

Please sign in to comment.