From 18b3c138c7b62c449641b165542ce0066aad5dcc Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 21 Jan 2025 17:13:06 -0500 Subject: [PATCH 1/6] Rename test suite We don't have any firm conventions, but having an endpoint name in the suite name seems to make sense. --- ...ic_housenumber.json => search_alphanumeric_housenumber.json} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename test_cases/{alphanumeric_housenumber.json => search_alphanumeric_housenumber.json} (98%) diff --git a/test_cases/alphanumeric_housenumber.json b/test_cases/search_alphanumeric_housenumber.json similarity index 98% rename from test_cases/alphanumeric_housenumber.json rename to test_cases/search_alphanumeric_housenumber.json index 58fb8da..62d8c36 100644 --- a/test_cases/alphanumeric_housenumber.json +++ b/test_cases/search_alphanumeric_housenumber.json @@ -1,5 +1,5 @@ { - "name": "Alphanumeric housenumbers", + "name": "Search Alphanumeric housenumbers", "priorityThresh": 5, "normalizers": { "name": [ From 031d26b9d0ddc84fe932ec541c54344b72f10ba6 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 21 Jan 2025 17:14:42 -0500 Subject: [PATCH 2/6] Set priorityThresh 1 For most search queries, the client reading the results may be doing so programatically. Not getting the right result sorted first is almost as bad as not returning it at all. --- test_cases/search_alphanumeric_housenumber.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_cases/search_alphanumeric_housenumber.json b/test_cases/search_alphanumeric_housenumber.json index 62d8c36..047c86b 100644 --- a/test_cases/search_alphanumeric_housenumber.json +++ b/test_cases/search_alphanumeric_housenumber.json @@ -1,6 +1,6 @@ { "name": "Search Alphanumeric housenumbers", - "priorityThresh": 5, + "priorityThresh": 1, "normalizers": { "name": [ "toLowerCase" From 7e9812c1878e05545b682ec140f9fdeb8b13e378 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 21 Jan 2025 17:22:25 -0500 Subject: [PATCH 3/6] Strip puncutation in housenumber for more robust matching --- test_cases/search_alphanumeric_housenumber.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_cases/search_alphanumeric_housenumber.json b/test_cases/search_alphanumeric_housenumber.json index 047c86b..04e877e 100644 --- a/test_cases/search_alphanumeric_housenumber.json +++ b/test_cases/search_alphanumeric_housenumber.json @@ -6,7 +6,8 @@ "toLowerCase" ], "housenumber": [ - "toLowerCase" + "toLowerCase", + "stripPunctuation" ], "street": [ "toLowerCase" From 9bef42ad51859d480fa18b771247fd89b4908afe Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 21 Jan 2025 17:23:19 -0500 Subject: [PATCH 4/6] Fix street name in Belarusian address query I don't understand any Cryillic, but it looks like this test has not had a streetname that matches the name in OSM for a while. This appears to be the expected record: https://www.openstreetmap.org/way/431615462 --- test_cases/search_alphanumeric_housenumber.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_cases/search_alphanumeric_housenumber.json b/test_cases/search_alphanumeric_housenumber.json index 04e877e..63c58f8 100644 --- a/test_cases/search_alphanumeric_housenumber.json +++ b/test_cases/search_alphanumeric_housenumber.json @@ -90,7 +90,7 @@ "id": 3, "status": "pass", "in": { - "text": "15А Комсомольская улица minsk belarus" + "text": "15А Камсамольская вуліца minsk belarus" }, "description": "Belarusian address written the localized way. Note the Cyrillic character in the query and expected result", "issue": "https://github.com/pelias/pelias/issues/833", @@ -98,7 +98,7 @@ "properties": [ { "housenumber": "15А", - "street": "Комсомольская улица", + "street": "Камсамольская вуліца", "region": "Minsk", "country_a": "BLR" } @@ -109,7 +109,7 @@ "id": "3.1", "status": "pass", "in": { - "text": "Комсомольская улица 15А minsk belarus" + "text": "Камсамольская вуліца 15А minsk belarus" }, "description": "Belarusian address written the 'American' way. Note the Cyrillic character in the query and expected result", "issue": "https://github.com/pelias/pelias/issues/833", @@ -117,7 +117,7 @@ "properties": [ { "housenumber": "15А", - "street": "Комсомольская улица", + "street": "Камсамольская вуліца", "region": "Minsk", "country_a": "BLR" } From eaf63740b4fd93e3e52522d797a7a6b8a732be2d Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 21 Jan 2025 17:29:31 -0500 Subject: [PATCH 5/6] Add another alphanumeric housenumber test https://github.com/pelias/pelias/issues/810 --- .../search_alphanumeric_housenumber.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test_cases/search_alphanumeric_housenumber.json b/test_cases/search_alphanumeric_housenumber.json index 63c58f8..ab702ab 100644 --- a/test_cases/search_alphanumeric_housenumber.json +++ b/test_cases/search_alphanumeric_housenumber.json @@ -178,6 +178,24 @@ } ] } + }, + { + "id": 6, + "status": "pass", + "in": { + "text": "kinkerstraat 175F, amsterdam" + }, + "description": "Dutch address in a building with housenumbers like 175A, 175B, 175C, etc. Without proper sorting the desired result can easily be drowned out", + "expected": { + "properties": [ + { + "housenumber": "175F", + "street": "Kinkerstraat", + "locality": "Amsterdam", + "country_a": "NLD" + } + ] + } } ] } From 7807a4d334d40c5c0ddf0e1d4e345f48622a049e Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Wed, 22 Jan 2025 11:32:31 +0100 Subject: [PATCH 6/6] additional alphanumeric housenumber tests https://github.com/pelias/pelias/issues/810#issuecomment-2606560338 --- .../search_alphanumeric_housenumber.json | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/test_cases/search_alphanumeric_housenumber.json b/test_cases/search_alphanumeric_housenumber.json index ab702ab..7d07fd8 100644 --- a/test_cases/search_alphanumeric_housenumber.json +++ b/test_cases/search_alphanumeric_housenumber.json @@ -196,6 +196,95 @@ } ] } + }, + { + "id": "6.1", + "status": "fail", + "in": { + "text": "kinkerstraat 175 F, amsterdam" + }, + "description": "As above, we currently dont support permutations of the unit number, as in this example with a space", + "expected": { + "properties": [ + { + "housenumber": "175F", + "street": "Kinkerstraat", + "locality": "Amsterdam", + "country_a": "NLD" + } + ] + } + }, + { + "id": 7, + "status": "pass", + "in": { + "text": "Vanadiumweg 11C, Amersfoort" + }, + "description": "Dutch address in a building with housenumbers like 175A, 175B, 175C, etc. Without proper sorting the desired result can easily be drowned out", + "expected": { + "properties": [ + { + "housenumber": "11C", + "street": "Vanadiumweg", + "locality": "Amersfoort", + "country_a": "NLD" + } + ] + } + }, + { + "id": "7.1", + "status": "pass", + "in": { + "text": "Vanadiumweg 11, Amersfoort" + }, + "description": "Expect the housenumber 11 (with no unit suffix) to appear before those with a unit suffix", + "expected": { + "properties": [ + { + "housenumber": "11", + "street": "Vanadiumweg", + "locality": "Amersfoort", + "country_a": "NLD" + } + ] + } + }, + { + "id": 8, + "status": "pass", + "in": { + "text": "봉화로167번길 35-7" + }, + "description": "Korean address, there are multiple units at 35-* but none at 35", + "expected": { + "properties": [ + { + "housenumber": "35-7", + "street": "봉화로167번길", + "country_a": "KOR" + } + ] + } + }, + { + "id": "8.1", + "status": "pass", + "in": { + "text": "봉화로167번길 35" + }, + "description": "As above, query is for 35 without unit suffix. Sorting is arbitrary.", + "expected": { + "priorityThresh": 10, + "properties": [ + { + "housenumber": "35-7", + "street": "봉화로167번길", + "country_a": "KOR" + } + ] + } } ] }