Skip to content

Commit

Permalink
feat(extract.sh): Add the parameter sqlite
Browse files Browse the repository at this point in the history
This can extract and build at the same time with the parameter `build` 
to `wof_extract_sqlite.js`.
Add max_old_space_size for node
  • Loading branch information
Joxit committed Apr 29, 2019
1 parent cbb50a5 commit dc47406
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 34 deletions.
6 changes: 5 additions & 1 deletion cmd/extract.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ mkdir -p ${PLACEHOLDER_DATA};

echo "Creating extract at ${PLACEHOLDER_DATA}/wof.extract"

${DIR}/wof_extract.sh > ${PLACEHOLDER_DATA}/wof.extract;
if [ "$1" = "sqlite" ]; then
exec node --max_old_space_size=4096 ${DIR}/wof_extract_sqlite.js > ${PLACEHOLDER_DATA}/wof.extract;
else
${DIR}/wof_extract.sh > ${PLACEHOLDER_DATA}/wof.extract;
fi

echo 'Done!'
74 changes: 41 additions & 33 deletions cmd/wof_extract_sqlite.js
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,45 +1,53 @@
#!/usr/bin/env node
const path = require('path');
const fs = require('fs');
const whosonfirst = require('pelias-whosonfirst');
const SQLiteStream = whosonfirst.SQLiteStream;
const through = require('through2');
const Placeholder = require('../Placeholder');

const WOF_DIR = process.env.WOF_DIR || '/data/whosonfirst-data/data';
const layers = [
'ocean',
'continent',
'marinearea',
'empire',
'country',
'dependency',
'disputed',
'macroregion',
'region',
'macrocounty',
'county',
'localadmin',
'locality',
'borough',
'macrohood',
'neighbourhood'
];
const ph = new Placeholder();
ph.load({ reset: true });
const WOF_DIR = process.env.WOF_DIR || '/data/whosonfirst-data/sqlite';

const layers = fs.readFileSync(path.join(__dirname, 'placetype.filter'), 'utf-8')
.replace(/^.*\(/, '') // Removes all characters before the first parenthesis
.match(/[a-z]+/g); // Get the layer list

const jq_filter = fs.readFileSync(path.join(__dirname, 'jq.filter'), 'utf-8')
.match(/test\("(.*)"\)/g) // Get all tests
.map(s => s.replace(/^[^"]+"/, '').replace(/"[^"]+$/, '')) // Get only regex part
.map(s => new RegExp(s)); // Transform it into JS RegExp

const output = () => {
if (process.argv.length > 2 && process.argv[2] === 'build') {
const ph = new Placeholder();
ph.load({ reset: true });
return through.obj((row, _, next) => {
ph.insertWofRecord(row, next);
}, done => {
console.error('populate fts...');
ph.populate();
console.error('optimize...');
ph.optimize();
console.error('close...');
ph.close();
done();
});
} else {
return through.obj((row, _, next) => {
console.log(JSON.stringify(row));
next();
});
}
};

new SQLiteStream(
path.join(WOF_DIR, 'sqlite', 'whosonfirst-data-latest.db'),
path.join(WOF_DIR, 'whosonfirst-data-latest.db'),
SQLiteStream.findGeoJSONByPlacetype(layers)
)
.pipe(whosonfirst.toJSONStream())
.pipe(through.obj((row, _, next) => {
ph.insertWofRecord(row.properties, next);
}, done => {
console.error('populate fts...');
ph.populate();
console.error('optimize...');
ph.optimize();
console.error('close...');
ph.close();
done();
}));
Object.keys(row.properties)
.filter(key => !jq_filter.some(regex => regex.test(key)))
.forEach(key => delete row.properties[key]);
next(null, row.properties);
}))
.pipe(output());

0 comments on commit dc47406

Please sign in to comment.