diff --git a/src/components/extractFields.js b/src/components/extractFields.js index 1b2c1bea..7692f735 100644 --- a/src/components/extractFields.js +++ b/src/components/extractFields.js @@ -179,6 +179,40 @@ function getHierarchies(id, properties) { } +// https://github.com/whosonfirst/whosonfirst-properties/blob/main/properties/wof/concordances.json +function getConcordances(properties) { + let concordances = {}; + + // validate types, map 'wof:concordances' + let con = _.get(properties, 'wof:concordances'); + if (_.isPlainObject(con)) { + _.each(con, (v, k) => { + if (!_.isString(k) || !(_.isString(v) || _.isInteger(v))) { return; } + if (_.isString(v)) { v = v.trim(); } + if (_.isInteger(v) && v < 1){ return; } + concordances[k.trim()] = v; + }); + } + + // map other concordances which may exist with the 'qs_pg' prefix + // but only when not present in 'wof:concordances'. + // note: take care with underscore vs. colon delimiters + _.each({ + 'qs_pg:gn_id': 'gn:id', + 'qs_pg:qs_id': 'qs:id' + }, (k, prop) => { + if (!_.has(concordances, k)) { + let v = _.get(properties, prop); + if (!_.isString(v) && !_.isInteger(v)) { return; } + if (_.isString(v)) { v = v.trim(); } + if (_.isInteger(v) && v < 1) { return; } + concordances[k] = v; + } + }); + + return concordances; +} + /* This function extracts the fields from the json_object that we're interested in for creating Pelias Document objects. If there is no hierarchy then a @@ -200,7 +234,8 @@ module.exports.create = function map_fields_stream() { bounding_box: getBoundingBox(json_object.properties), population: getPopulation(json_object.properties), popularity: json_object.properties['misc:photo_sum'], - hierarchies: getHierarchies(json_object.id, json_object.properties) + hierarchies: getHierarchies(json_object.id, json_object.properties), + concordances: getConcordances(json_object.properties) }; // use the QS altname if US county and available diff --git a/src/peliasDocGenerators.js b/src/peliasDocGenerators.js index 3cc9ac62..51f63cef 100644 --- a/src/peliasDocGenerators.js +++ b/src/peliasDocGenerators.js @@ -142,6 +142,11 @@ function setupDocument(record, hierarchy) { assignField(record, wofDoc); } + // store the concordances in the addendum (where available) + if (_.isPlainObject(record.concordances) && !_.isEmpty(record.concordances)) { + wofDoc.setAddendum('concordances', record.concordances); + } + return wofDoc; } diff --git a/test/components/extractFieldsTest.js b/test/components/extractFieldsTest.js index 067d1ae5..2128f652 100644 --- a/test/components/extractFieldsTest.js +++ b/test/components/extractFieldsTest.js @@ -69,7 +69,8 @@ tape('readStreamComponents', function(test) { { 'country_id': 23456 } - ] + ], + concordances: {} } ]; @@ -101,7 +102,8 @@ tape('readStreamComponents', function(test) { popularity: undefined, abbreviation: undefined, bounding_box: undefined, - hierarchies: [] + hierarchies: [], + concordances: {} } ]; @@ -144,7 +146,8 @@ tape('readStreamComponents', function(test) { { 'place type 1_id': 12345 } - ] + ], + concordances: {} } ]; @@ -1081,4 +1084,120 @@ tape('multi-lang index test', (test) => { }); test.end(); -}); \ No newline at end of file +}); + +tape('concordances', (test) => { + test.test('missing concordances', function (t) { + var input = [{ + id: 54321, + properties: {} + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, {}, 'no-op'); + t.end(); + }); + }); + + test.test('empty concordances', function (t) { + var input = [{ + id: 54321, + properties: { + 'wof:concordances': {} + } + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, {}, 'no-op'); + t.end(); + }); + }); + + test.test('wrong type concordances', function (t) { + var input = [{ + id: 54321, + properties: { + 'wof:concordances': 'string' + } + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, {}, 'no-op'); + t.end(); + }); + }); + + test.test('map valid concordances', function (t) { + var input = [{ + id: 54321, + properties: { + 'wof:concordances': { + 'alpha': 'bar', + 'beta': 2, + 'gamma': null, + 'delta': undefined, + 'epsilon': [{ 'foo': 'bar' }], + 'zeta': [ 'foo', 'bar' ], + 'eta': 2.2, + 'theta': 0 + } + } + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, { alpha: 'bar', beta: 2 }, 'mapped valid values'); + t.end(); + }); + }); + + test.test('trim concordances', function (t) { + var input = [{ + id: 54321, + properties: { + 'wof:concordances': { + ' alpha ': ' bar ' + } + } + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, { alpha: 'bar' }, 'trim keys/values'); + t.end(); + }); + }); + + test.test('qs_pg prefixed concordances', function (t) { + var input = [{ + id: 54321, + properties: { + 'qs_pg:gn_id': ' bar ', + 'qs_pg:qs_id': 100, + 'qs_pg:qs_nn': ' bat ' + } + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, { 'gn:id': 'bar', 'qs:id': 100 }, 'map qs_pg props'); + t.end(); + }); + }); + + test.test('qs_pg prefer wof:concordances', function (t) { + var input = [{ + id: 54321, + properties: { + 'qs_pg:qs_id': 100, + 'wof:concordances': { + 'qs:id': 200 + } + } + }]; + + test_stream(input, extractFields.create(), function (err, actual) { + t.deepEqual(actual[0].concordances, { 'qs:id': 200 }, 'prefer wof:concordances'); + t.end(); + }); + }); + + test.end(); +}); diff --git a/test/peliasDocGeneratorsTest.js b/test/peliasDocGeneratorsTest.js index f8f63628..199e4807 100644 --- a/test/peliasDocGeneratorsTest.js +++ b/test/peliasDocGeneratorsTest.js @@ -723,6 +723,51 @@ tape('create', function(test) { }); + test.test('addendum.concordances empty by default', function (t) { + const input = { + id: 1, + name: 'Example', + name_aliases: [], + lat: 12.121212, + lon: 21.212121, + place_type: 'country' + }; + + test_stream([input], peliasDocGenerators.create(() => [[input]]), function (err, actual) { + t.false(err); + t.false( + actual[0].getAddendum('concordances'), + 'addendum.concordances not set' + ); + t.end(); + }); + }); + + test.test('addendum.concordances should be set where available', function (t) { + const input = { + id: 1, + name: 'Example', + name_aliases: [], + lat: 12.121212, + lon: 21.212121, + place_type: 'country', + concordances: { + 'alpha': 'alpha', + 'beta': 100 + } + }; + + test_stream([input], peliasDocGenerators.create(() => [[input]]), function (err, actual) { + t.false(err); + t.deepEqual( + actual[0].getAddendum('concordances'), + input.concordances, + 'addendum.concordances correctly set' + ); + t.end(); + }); + }); + test.end(); }); diff --git a/test/readStreamTest.js b/test/readStreamTest.js index 4d034605..8aec9a84 100644 --- a/test/readStreamTest.js +++ b/test/readStreamTest.js @@ -94,7 +94,8 @@ tape('readStream', (test) => { popularity: 87654, hierarchies: [ { 'region_id': 123 } - ] + ], + concordances: {} }, '456': { id: 456, @@ -110,7 +111,8 @@ tape('readStream', (test) => { popularity: undefined, hierarchies: [ { 'localadmin_id': 456 } - ] + ], + concordances: {} } }); @@ -192,7 +194,8 @@ tape('readStream', (test) => { bounding_box: undefined, population: undefined, popularity: undefined, - hierarchies: [ { 'region_id': 421302191 } ] + hierarchies: [ { 'region_id': 421302191 } ], + concordances: {} } }); t.deepEqual(logger.getDebugMessages().length, 17);