Skip to content

Commit

Permalink
feat(import): add support for openaddresses geojsons format
Browse files Browse the repository at this point in the history
  • Loading branch information
Joxit committed Jan 15, 2021
1 parent d89f586 commit a838dce
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 13 deletions.
2 changes: 1 addition & 1 deletion lib/isValidCsvRecord.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ function streetContainsExclusionaryWord(record) {

function hasAllProperties(record) {
return [ 'LON', 'LAT', 'NUMBER', 'STREET' ].every(function(prop) {
return record[ prop ] && record[ prop ].length > 0;
return record[ prop ] && (typeof record[ prop ] === 'number' || record[ prop ].length > 0);
});
}

Expand Down
4 changes: 2 additions & 2 deletions lib/parameters.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ function getFullFileList(peliasConfig, args) {
const files = _.get(peliasConfig.imports.openaddresses, 'files', []);

if (_.isEmpty(files)) {
// no specific files listed, so return all .csv files
return glob.sync( args.dirPath + '/**/*.csv' );
// no specific files listed, so return all .csv and .geojson files
return glob.sync( args.dirPath + '/**/*.csv' ).concat(glob.sync( args.dirPath + '/**/*.geojson' ));
} else {
// otherwise return the requested files with full path
return files.map(function(file) {
Expand Down
49 changes: 39 additions & 10 deletions lib/streams/recordStream.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ const path = require( 'path' );
const csvParse = require( 'csv-parse' );
const combinedStream = require( 'combined-stream' );
const _ = require( 'lodash' );
const through = require('through2');
const split = require('split2');

const logger = require( 'pelias-logger' ).get( 'openaddresses' );
const config = require('pelias-config').generate();
Expand Down Expand Up @@ -47,14 +49,6 @@ function createRecordStream( filePath, dirPath ){
badRecordCount: 0
};

const csvParser = csvParse({
trim: true,
skip_empty_lines: true,
relax_column_count: true,
relax: true,
columns: true
});

const contentHashStream = ContentHashStream.create();
const validRecordFilterStream = ValidRecordFilterStream.create();
const cleanupStream = CleanupStream.create();
Expand All @@ -65,14 +59,49 @@ function createRecordStream( filePath, dirPath ){
done();
};

return fs.createReadStream( filePath )
.pipe( csvParser )
return fileStreamDispatcher(fs.createReadStream( filePath ), filePath)
.pipe( contentHashStream )
.pipe( validRecordFilterStream )
.pipe( cleanupStream )
.pipe( documentStream );
}

function geojsonStream(stream) {
return stream
.pipe(split())
.pipe(through.obj((line, _enc, next) => {
let row;
try {
const geojson = JSON.parse(line);
row = {
NUMBER: _.get(geojson, 'properties.number'),
STREET: _.get(geojson, 'properties.street'),
LON: _.get(geojson, 'geometry.coordinates[0]'),
LAT: _.get(geojson, 'geometry.coordinates[1]'),
POSTCODE: _.get(geojson, 'properties.postcode'),
UNIT:_.get(geojson, 'properties.unit')
};
} catch(e) {
logger.error(e);
}
next(null, row);
}));
}

function fileStreamDispatcher(stream, filePath) {
if (filePath.endsWith('.geojson')) {
return geojsonStream(stream);
}

return stream.pipe(csvParse({
trim: true,
skip_empty_lines: true,
relax_column_count: true,
relax: true,
columns: true
}));
}

/*
* Create a single stream from many CSV files
*/
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"pelias-logger": "^1.2.1",
"pelias-model": "^7.1.0",
"pelias-wof-admin-lookup": "^7.3.0",
"split2": "^3.2.2",
"temp": "^0.9.1",
"through2": "^3.0.0",
"through2-filter": "^3.0.0",
Expand Down

0 comments on commit a838dce

Please sign in to comment.