Skip to content

Commit

Permalink
Merge pull request #225 from pelias/filter-out-urls
Browse files Browse the repository at this point in the history
feat(parser): Filter out URLs before sending to pelias/model
  • Loading branch information
orangejulius authored Jul 3, 2019
2 parents 26caeec + d4c5305 commit 68e6496
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 5 deletions.
18 changes: 14 additions & 4 deletions stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,15 @@ function parser( precision ){
// decode polyline
var geojson = polyline.toGeoJSON(cols[0], precision);

const name = selectName(cols.slice(1));

// skip record if there is no valid name
if (!name) {
return next();
}

// select name
geojson.properties = { name: selectName(cols.slice(1)) };
geojson.properties = { name: name };

// compute bbox
geojson = extent.bboxify( geojson );
Expand All @@ -47,11 +54,14 @@ function parser( precision ){
// each connected road can have one or more names
// we select one name to be the default.
function selectName( names ){
// return the longest name
// filter out URLs
// then return the longest name
// @todo: can we improve this logic?
return names.reduce( function( a, b ){
return names.filter( function ( name) {
return !name.match(/^http(s)?:\/\//);
}).reduce( function( a, b ){
return a.length > b.length ? a : b;
});
}, '');
}

module.exports = parser;
38 changes: 37 additions & 1 deletion test/stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ module.exports.tests.parse_simple = function(test, common) {
});
};

// row does not contain a valid name
// row does not follow polyline schema
module.exports.tests.parse_invalid = function(test, common) {
test('parse: invalid row', function(t) {

Expand Down Expand Up @@ -94,6 +94,42 @@ module.exports.tests.select_name = function(test, common) {
});
};

// URLs are filtered out
module.exports.tests.filter_url = function(test, common) {
test('parse: filter URL', function(t) {

var stream = parser(6);
var row = ['a','foo','foooooooo','https://this-website-should-not-be-the-name.com'].join('\0');
var expected = 'foooooooo';

function assert( actual, enc, next ){
t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' );
next();
}

stream.pipe( through.obj( assert, function(){ t.end(); } ));
stream.write(row);
stream.end();
});
};

module.exports.tests.filter_only_url = function(test, common) {
test('parse:document with only URL name is skipped', function(t) {

var stream = parser(6);
var row = ['polylineEncodedString','https://this-website-should-not-be-the-name.com'].join('\0');

function assert( actual, enc, next ){
t.fail('no valid rows'); // should not execute
next();
}

stream.pipe( through.obj( assert, function(){ t.end(); } ));
stream.write(row);
stream.end();
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
Expand Down

0 comments on commit 68e6496

Please sign in to comment.