From 1d1afc5747d9688ca412e4f45903221f90b9ce72 Mon Sep 17 00:00:00 2001 From: jlinn Date: Fri, 20 May 2016 22:59:22 -0700 Subject: [PATCH] Make allow_malformed option apply to url decoding --- .../index/analysis/url/URLTokenizer.java | 8 +++++- .../index/analysis/url/URLTokenizerTest.java | 28 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java b/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java index 4a022b5..c44f1ca 100644 --- a/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java +++ b/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java @@ -196,7 +196,13 @@ private List tokenize(URL url, URLPart part) throws IOException { int start = 0; int end = 0; if (urlDecode) { - partString = URLDecoder.decode(partString, "UTF-8"); + try { + partString = URLDecoder.decode(partString, "UTF-8"); + } catch (IllegalArgumentException e) { + if (!allowMalformed) { + throw new IOException("Error performing URL decoding on string: " + partString, e); + } + } } switch (part) { case HOST: diff --git a/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java b/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java index c457646..7019548 100644 --- a/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java +++ b/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java @@ -147,6 +147,34 @@ public void testAllowMalformed() throws IOException { } + @Test + public void testUrlDecode() throws Exception { + String url = "http://foo.com?baz=foo%20bat"; + URLTokenizer tokenizer = createTokenizer(url, URLPart.QUERY); + tokenizer.setUrlDecode(true); + assertTokenStreamContents(tokenizer, stringArray("baz=foo bat")); + } + + + @Test(expected = IOException.class) + public void testUrlDecodeIllegalCharacters() throws Exception { + String url = "http://foo.com?baz=foo%2vbat"; + URLTokenizer tokenizer = createTokenizer(url, URLPart.QUERY); + tokenizer.setUrlDecode(true); + assertTokenStreamContents(tokenizer, ""); + } + + + @Test + public void testUrlDecodeAllowMalformed() throws Exception { + String url = "http://foo.com?baz=foo%2vbat"; + URLTokenizer tokenizer = createTokenizer(url, URLPart.QUERY); + tokenizer.setUrlDecode(true); + tokenizer.setAllowMalformed(true); + assertTokenStreamContents(tokenizer, "baz=foo%2vbat"); + } + + private URLTokenizer createTokenizer(String input, URLPart part) throws IOException { URLTokenizer tokenizer = new URLTokenizer(part); tokenizer.setReader(new StringReader(input));