From dc47406d3a21aeed24b0540b9108d2a4bc0d3a9f Mon Sep 17 00:00:00 2001 From: Joxit Date: Tue, 30 Apr 2019 00:19:42 +0200 Subject: [PATCH] feat(extract.sh): Add the parameter sqlite This can extract and build at the same time with the parameter `build` to `wof_extract_sqlite.js`. Add max_old_space_size for node --- cmd/extract.sh | 6 +++- cmd/wof_extract_sqlite.js | 74 ++++++++++++++++++++++----------------- 2 files changed, 46 insertions(+), 34 deletions(-) mode change 100755 => 100644 cmd/wof_extract_sqlite.js diff --git a/cmd/extract.sh b/cmd/extract.sh index 2816b56d..4e001947 100755 --- a/cmd/extract.sh +++ b/cmd/extract.sh @@ -8,6 +8,10 @@ mkdir -p ${PLACEHOLDER_DATA}; echo "Creating extract at ${PLACEHOLDER_DATA}/wof.extract" -${DIR}/wof_extract.sh > ${PLACEHOLDER_DATA}/wof.extract; +if [ "$1" = "sqlite" ]; then + exec node --max_old_space_size=4096 ${DIR}/wof_extract_sqlite.js > ${PLACEHOLDER_DATA}/wof.extract; +else + ${DIR}/wof_extract.sh > ${PLACEHOLDER_DATA}/wof.extract; +fi echo 'Done!' diff --git a/cmd/wof_extract_sqlite.js b/cmd/wof_extract_sqlite.js old mode 100755 new mode 100644 index 620e0f87..849072fd --- a/cmd/wof_extract_sqlite.js +++ b/cmd/wof_extract_sqlite.js @@ -1,45 +1,53 @@ -#!/usr/bin/env node const path = require('path'); +const fs = require('fs'); const whosonfirst = require('pelias-whosonfirst'); const SQLiteStream = whosonfirst.SQLiteStream; const through = require('through2'); const Placeholder = require('../Placeholder'); -const WOF_DIR = process.env.WOF_DIR || '/data/whosonfirst-data/data'; -const layers = [ - 'ocean', - 'continent', - 'marinearea', - 'empire', - 'country', - 'dependency', - 'disputed', - 'macroregion', - 'region', - 'macrocounty', - 'county', - 'localadmin', - 'locality', - 'borough', - 'macrohood', - 'neighbourhood' -]; -const ph = new Placeholder(); -ph.load({ reset: true }); +const WOF_DIR = process.env.WOF_DIR || '/data/whosonfirst-data/sqlite'; + +const layers = fs.readFileSync(path.join(__dirname, 'placetype.filter'), 'utf-8') + .replace(/^.*\(/, '') // Removes all characters before the first parenthesis + .match(/[a-z]+/g); // Get the layer list + +const jq_filter = fs.readFileSync(path.join(__dirname, 'jq.filter'), 'utf-8') + .match(/test\("(.*)"\)/g) // Get all tests + .map(s => s.replace(/^[^"]+"/, '').replace(/"[^"]+$/, '')) // Get only regex part + .map(s => new RegExp(s)); // Transform it into JS RegExp + +const output = () => { + if (process.argv.length > 2 && process.argv[2] === 'build') { + const ph = new Placeholder(); + ph.load({ reset: true }); + return through.obj((row, _, next) => { + ph.insertWofRecord(row, next); + }, done => { + console.error('populate fts...'); + ph.populate(); + console.error('optimize...'); + ph.optimize(); + console.error('close...'); + ph.close(); + done(); + }); + } else { + return through.obj((row, _, next) => { + console.log(JSON.stringify(row)); + next(); + }); + } +}; new SQLiteStream( - path.join(WOF_DIR, 'sqlite', 'whosonfirst-data-latest.db'), + path.join(WOF_DIR, 'whosonfirst-data-latest.db'), SQLiteStream.findGeoJSONByPlacetype(layers) ) .pipe(whosonfirst.toJSONStream()) .pipe(through.obj((row, _, next) => { - ph.insertWofRecord(row.properties, next); - }, done => { - console.error('populate fts...'); - ph.populate(); - console.error('optimize...'); - ph.optimize(); - console.error('close...'); - ph.close(); - done(); - })); + Object.keys(row.properties) + .filter(key => !jq_filter.some(regex => regex.test(key))) + .forEach(key => delete row.properties[key]); + next(null, row.properties); + })) + .pipe(output());