From 893e7264c7ddca67be75df47c440cdfabe681520 Mon Sep 17 00:00:00 2001 From: Stanislav Jordanov Date: Wed, 19 Sep 2018 18:43:01 +0300 Subject: [PATCH] Allow more elaborate XPath expressions in the Lucene index spec in collection.xconf (original PR #2169) --- README | 2 +- .../exist/indexing/lucene/LuceneConfig.java | 8 +- .../indexing/lucene/LuceneIndexConfig.java | 22 +- .../indexing/lucene/LuceneIndexWorker.java | 10 +- .../indexing/lucene/LuceneMatchListener.java | 7 +- .../indexing/lucene/NodePathPattern.java | 230 ++++++++++++++++++ .../indexing/lucene/LuceneIndexTest.java | 104 +++++++- .../indexing/range/RangeIndexWorker.java | 14 +- src/org/exist/Indexer.java | 10 +- src/org/exist/dom/persistent/StoredNode.java | 18 +- src/org/exist/storage/NativeBroker.java | 10 +- src/org/exist/storage/NodePath.java | 18 +- src/org/exist/storage/NodePath2.java | 145 +++++++++++ 13 files changed, 531 insertions(+), 67 deletions(-) create mode 100644 extensions/indexes/lucene/src/org/exist/indexing/lucene/NodePathPattern.java create mode 100644 src/org/exist/storage/NodePath2.java diff --git a/README b/README index f44507b7848..35b147a7534 100644 --- a/README +++ b/README @@ -21,4 +21,4 @@ bin/startup.sh or bin/startup.bat If these scripts don't work for you, try to call Java manually: java -jar start.jar jetty - + diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java index 12e925bc211..ad76ad3472e 100644 --- a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java @@ -124,7 +124,7 @@ protected LuceneIndexConfig getWildcardConfig(NodePath path) { public Analyzer getAnalyzer(QName qname) { LuceneIndexConfig idxConf = paths.get(qname); while (idxConf != null) { - if (!idxConf.isNamed() && idxConf.getNodePath().match(qname)) + if (!idxConf.isNamed() && idxConf.getNodePathPattern().match(qname)) break; idxConf = idxConf.getNext(); } @@ -274,12 +274,12 @@ protected void parseConfig(NodeList configNodes, Map namespaces) if (config.getName() != null) { namedIndexes.put(config.getName(), config); } // register index either by QName or path - if (config.getNodePath().hasWildcard()) { + if (config.getNodePathPattern().hasWildcard()) { wildcardPaths.add(config); } else { - LuceneIndexConfig idxConf = paths.get(config.getNodePath().getLastComponent()); + LuceneIndexConfig idxConf = paths.get(config.getNodePathPattern().getLastComponent()); if (idxConf == null) { - paths.put(config.getNodePath().getLastComponent(), config); + paths.put(config.getNodePathPattern().getLastComponent(), config); } else { idxConf.add(config); } diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexConfig.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexConfig.java index 6a45d2913cd..784db1f8d95 100644 --- a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexConfig.java +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexConfig.java @@ -58,16 +58,16 @@ public class LuceneIndexConfig { private String name = null; - private NodePath path = null; + private NodePathPattern path = null; private boolean isQNameIndex = false; private Map specialNodes = null; private LuceneIndexConfig nextConfig = null; - + private FieldType type = null; - + // This is for the @attr match boosting // and the intention is to do a proper predicate check instead in the future. /ljo private MultiMap matchAttrs; @@ -78,13 +78,13 @@ public LuceneIndexConfig(Element config, Map namespaces, Analyze Map fieldTypes) throws DatabaseConfigurationException { if (config.hasAttribute(QNAME_ATTR)) { QName qname = parseQName(config, namespaces); - path = new NodePath(qname); + path = new NodePathPattern(qname); isQNameIndex = true; } else { String matchPath = config.getAttribute(MATCH_ATTR); try { - path = new NodePath(namespaces, matchPath); + path = new NodePathPattern(namespaces, matchPath); if (path.length() == 0) throw new DatabaseConfigurationException("Lucene module: Invalid match path in collection config: " + matchPath); @@ -96,10 +96,10 @@ public LuceneIndexConfig(Element config, Map namespaces, Analyze String name = config.getAttribute(FIELD_ATTR); if (name != null && name.length() > 0) setName(name); - + String fieldType = config.getAttribute(TYPE_ATTR); if (fieldType != null && fieldType.length() > 0) - type = fieldTypes.get(fieldType); + type = fieldTypes.get(fieldType); if (type == null) type = new FieldType(config, analyzers); @@ -200,7 +200,7 @@ public QName getQName() { return path.getLastComponent(); } - public NodePath getNodePath() { + public NodePathPattern getNodePathPattern() { return path; } @@ -252,14 +252,14 @@ public void setName(String name) { public String getName() { return name; } - + public void add(LuceneIndexConfig config) { if (nextConfig == null) nextConfig = config; else nextConfig.add(config); } - + public LuceneIndexConfig getNext() { return nextConfig; } @@ -267,7 +267,7 @@ public LuceneIndexConfig getNext() { private boolean isAttributeNode() { return path.getLastComponent().getNameType() == ElementValue.ATTRIBUTE; } - + /** * @return true if this index can be queried by name */ diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java index 696df30384a..022766df8db 100644 --- a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java @@ -261,20 +261,20 @@ public IStoredNode getReindexRoot(IStoredNode node, N return null; } - NodePath p = new NodePath(path); + NodePath2 p = new NodePath2((NodePath2)path); boolean reindexRequired = false; if (node.getNodeType() == Node.ELEMENT_NODE && !includeSelf) - p.removeLastComponent(); + p.removeLastNode(); for (int i = 0; i < p.length(); i++) { if (config.matches(p)) { reindexRequired = true; break; } - p.removeLastComponent(); + p.removeLastNode(); } if (reindexRequired) { - p = new NodePath(path); + p = new NodePath2((NodePath2)path); IStoredNode topMost = null; IStoredNode currentNode = node; if (currentNode.getNodeType() != Node.ELEMENT_NODE) @@ -283,7 +283,7 @@ public IStoredNode getReindexRoot(IStoredNode node, N if (config.matches(p)) topMost = currentNode; currentNode = currentNode.getParentStoredNode(); - p.removeLastComponent(); + p.removeLastNode(); } return topMost; } diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneMatchListener.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneMatchListener.java index 29aefebc9ac..6bc59dc213a 100644 --- a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneMatchListener.java +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneMatchListener.java @@ -39,6 +39,7 @@ import org.exist.storage.DBBroker; import org.exist.storage.IndexSpec; import org.exist.storage.NodePath; +import org.exist.storage.NodePath2; import org.exist.util.serializer.AttrList; import org.xml.sax.SAXException; @@ -313,19 +314,19 @@ private void scanMatches(final NodeProxy p) { } private NodePath getPath(final NodeProxy proxy) { - final NodePath path = new NodePath(); + final NodePath2 path = new NodePath2(); final IStoredNode node = (IStoredNode) proxy.getNode(); walkAncestor(node, path); return path; } - private void walkAncestor(final IStoredNode node, final NodePath path) { + private void walkAncestor(final IStoredNode node, final NodePath2 path) { if (node == null) { return; } final IStoredNode parent = node.getParentStoredNode(); walkAncestor(parent, path); - path.addComponent(node.getQName()); + path.addNode(node); } /** diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/NodePathPattern.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/NodePathPattern.java new file mode 100644 index 00000000000..9b71059a4a4 --- /dev/null +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/NodePathPattern.java @@ -0,0 +1,230 @@ +/* + * eXist Open Source Native XML Database + * Copyright (C) 2001-2018 The eXist Project + * http://exist-db.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.exist.indexing.lucene; + +//import org.apache.logging.log4j.LogManager; +//import org.apache.logging.log4j.Logger; + +import org.exist.dom.QName; +import org.exist.storage.NodePath; +import org.exist.storage.NodePath2; +import org.exist.util.FastStringBuffer; + +import java.util.ArrayList; +import java.util.Map; + +/** + * @author Stanislav Jordanov + * @version 1.0 + *

+ * Class NodePathPattern is a replacement for class NodePath + * in cases it was used not as a path, but as a path pattern. + * Most notably this mis-design was employed in LuceneConfig and LuceneIndexConfig. + *

+ * This is required in order to implement the feature requested/discussed here: + * @see + * [Exist-open] Are more elaborate xpath expressions allowed in Lucene's index config + * + *

+ * After class NodePath2 was introduced and replaced NodePath in all cases related to Lucene index + * element walking and matching, now all that is left in order to have the desired feature implemented + * is implementing properly NodePathPattern.match methods, w/o modifying the originally used NodePath. + */ +public class NodePathPattern { + + private final NodePath qnPath; + private final ArrayList predicates = new ArrayList<>(); + + + interface Predicate { + boolean evaluate(NodePath2 nodePath, int elementIdx); + } + + private final static Predicate CONST_TRUE_PREDICATE = new Predicate() { + @Override + public boolean evaluate(NodePath2 nodePath, int elementIdx) { + return true; + } + }; + + static class SimpleAttrEqValuePredicate implements Predicate { + private final String attrName; + private final String attrVal; + + SimpleAttrEqValuePredicate(String attrName, String attrVal) { + this.attrName = attrName; + this.attrVal = attrVal; + } + + @Override + public boolean evaluate(NodePath2 nodePath, int elementIdx) { + String val = nodePath.attribs(elementIdx).get(attrName); + return val != null ? val.equals(attrVal) : attrVal == null; + } + } + + + public NodePathPattern(Map namespaces, String matchPattern) { + qnPath = new NodePath(); + parseXPathExpression(namespaces, matchPattern); + } + + public NodePathPattern(final QName qname) { + qnPath = new NodePath(qname); + } + + private void parseXPathExpression(final Map namespaces, final String matchPattern) { + final FastStringBuffer token = new FastStringBuffer(matchPattern.length()); + int pos = 0; + while (pos < matchPattern.length()) { + final char ch = matchPattern.charAt(pos); + switch (ch) { + case '/': + final String next = token.toString(); + token.setLength(0); + if (next.length() > 0) { + addSegment(namespaces, next); + } + if (matchPattern.charAt(++pos) == '/') { + qnPath.addComponent(NodePath.SKIP); + predicates.add(CONST_TRUE_PREDICATE); + } + break; + default: + token.append(ch); + pos++; + break; + } + } + if (token.length() > 0) { + addSegment(namespaces, token.toString()); + } + } + + private void addSegment(final Map namespaces, final String segment) { + String qname; + int predBeg = segment.indexOf('['); + Predicate pred = null; + if (predBeg >= 0) { + qname = segment.substring(0, predBeg); + pred = parsePredicate(segment.substring(predBeg)); + } else { + qname = segment.trim(); + pred = CONST_TRUE_PREDICATE; + } + + if ("*".equals(qname)) { + qnPath.addComponent(NodePath.WILDCARD); + } else { + qnPath.addComponent(namespaces, qname); + } + predicates.add(pred); + } + + private Predicate parsePredicate(String input) { + if (!input.startsWith("[") || !input.endsWith("]") || input.charAt(1) != '@') { + throw new IllegalArgumentException("Bad predicate spec: " + input + "\nOnly [@attr=value] is supported"); + } + + // So far we're supporting only [@attr=value] predicates: + int eqIdx = input.indexOf('='); + if (eqIdx < 0) { + throw new IllegalArgumentException("Bad predicate spec: " + input + "\nOnly [@attr=value] is supported"); + } + String name = input.substring(2, eqIdx).trim(); // 2 is to skip the leading [@ + String val = input.substring(eqIdx + 1, input.length() - 1).trim(); // -1 is to skip the trailing ] + + if (!(val.startsWith("\'") && val.endsWith("\'") || val.startsWith("\"") && val.endsWith("\""))) { + throw new IllegalArgumentException("Bad predicate spec: " + input + "\nAttribute value not in quotes"); + } else { + val = val.substring(1, val.length() - 1); // strip the quotes + } + + return new SimpleAttrEqValuePredicate(name, val); + } + + public int length() { + return qnPath.length(); + } + + public QName getLastComponent() { + return qnPath.getLastComponent(); + } + + public boolean hasWildcard() { + return qnPath.hasWildcard(); + } + + + public final boolean match(final QName qname) { + return qnPath.match(qname); + } + + public final boolean match(final NodePath other) { + return match(other, 0); + } + + private final boolean match(final NodePath o, final int from_pos) { + // TODO cast NodePath to NodePath2 and do 'extended' matching + final NodePath2 other = (NodePath2) o; + final int other_len = other.length(); + final int len = qnPath.length(); + boolean skip = false; + int i = 0; + QName components_i = null; + for (int j = from_pos; j < other_len; j++) { + if (i == len) { + return true; + } + if (components_i == null) + components_i = qnPath.getComponent(i); + + if (components_i == NodePath.SKIP) { + components_i = qnPath.getComponent(++i); + skip = true; + } + if((components_i == NodePath.WILDCARD || other.getComponent(j).compareTo(components_i) == 0) + && predicates.get(i).evaluate(other, j) + && (!skip || j + 1 == other_len || other.getComponent(j + 1).compareTo(components_i) != 0 + || !predicates.get(i).evaluate(other, j + 1))) { + ++i; + components_i = null; + skip = false; + } else if (skip) { + continue; + } else { + return false; + } + } + + return (i == len); + } + + @Override + public boolean equals(final Object obj) { + return qnPath.equals(obj); + } + + @Override + public int hashCode() { + return qnPath.hashCode(); + } +} diff --git a/extensions/indexes/lucene/test/src/org/exist/indexing/lucene/LuceneIndexTest.java b/extensions/indexes/lucene/test/src/org/exist/indexing/lucene/LuceneIndexTest.java index 04fa1437681..6b0c275e1fb 100644 --- a/extensions/indexes/lucene/test/src/org/exist/indexing/lucene/LuceneIndexTest.java +++ b/extensions/indexes/lucene/test/src/org/exist/indexing/lucene/LuceneIndexTest.java @@ -126,15 +126,15 @@ public class LuceneIndexTest { ""; private static final String XML8 = - "" + - " AAA on b1" + - " AAA on b2" + - " AAA on b3" + - " AAA on c1" + - " AAA on c2" + - ""; - - private static final String XML9 = + "" + + " AAA on b1" + + " AAA on b2" + + " AAA on b3" + + " AAA on c1" + + " AAA on c2" + + ""; + + private static final String XML9 = "" + " " + "

erste aus haus maus zaus yaus raus qaus leisten

" + @@ -293,6 +293,88 @@ public void simpleQueries() throws EXistException, CollectionConfigurationExcept } } + @Test + public void moreElaborateQueries() throws EXistException, CollectionConfigurationException, PermissionDeniedException, SAXException, LockException, IOException, XPathException, QName.IllegalQNameException { + final String XML10 = + "\n" + // xmlns=\"http://www.tei-c.org/ns/1.0\">\n" + + " \n" + + " Buick \n" + // this should get indexed + " Cadillac \n" + // this should not get indexed -- attribute name ns does not match + " Dodge \n" + // this should not get indexed -- attribute value does not match + " Ford \n" + // this should not get indexed -- attribute is entirely missing + " <tag> ABuick </tag> \n" + // this should get indexed + " <tag> ACadillac </tag> \n" + // this should not get indexed -- attribute name ns does not match + " <tag> ADodge </tag> \n" + // this should not get indexed -- attribute value does not match + " <tag> AFord </tag> \n" + // this should not get indexed -- attribute is entirely missing + " \n" + + ""; + + final String COLLECTION_CONFIG10 = + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + ""; + + + final DocumentSet docs = configureAndStore(COLLECTION_CONFIG10, XML10, "test.xml"); + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try(final DBBroker broker = pool.get(Optional.of(pool.getSecurityManager().getSystemSubject()))) { + + + // unbeknownst to me, the test on the next line fails if the literal "buick" is replaced with "Buick": + final Occurrences[] o1 = checkIndex(docs, broker, new QName[]{new QName("title")}, "buick", 1); + final Occurrences[] o2 = checkIndex(docs, broker, new QName[]{new QName("title")}, "cadillac", 0); + final Occurrences[] o3 = checkIndex(docs, broker, new QName[]{new QName("title")}, "dodge", 0); + final Occurrences[] o4 = checkIndex(docs, broker, new QName[]{new QName("title")}, "ford", 0); + + final Occurrences[] p1 = checkIndex(docs, broker, new QName[]{new QName("title")}, "abuick", 1); + final Occurrences[] p2 = checkIndex(docs, broker, new QName[]{new QName("title")}, "acadillac", 0); + final Occurrences[] p3 = checkIndex(docs, broker, new QName[]{new QName("title")}, "adodge", 0); + final Occurrences[] p4 = checkIndex(docs, broker, new QName[]{new QName("title")}, "aford", 0); + + final XQuery xquery = pool.getXQueryService(); + assertNotNull(xquery); + Sequence seq; + + seq = xquery.execute(broker, "//.[ft:query(title, 'Buick')]", null); + assertNotNull(seq); + assertEquals(1, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'Cadillac')]", null); + assertNotNull(seq); + assertEquals(0, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'Dodge')]", null); + assertNotNull(seq); + assertEquals(0, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'Ford')]", null); + assertNotNull(seq); + assertEquals(0, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'ABuick')]", null); + assertNotNull(seq); + assertEquals(1, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'ACadillac')]", null); + assertNotNull(seq); + assertEquals(0, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'ADodge')]", null); + assertNotNull(seq); + assertEquals(0, seq.getItemCount()); + + seq = xquery.execute(broker, "//.[ft:query(title, 'AFord')]", null); + assertNotNull(seq); + assertEquals(0, seq.getItemCount()); + } + } + @Test public void configuration() throws EXistException, CollectionConfigurationException, PermissionDeniedException, SAXException, LockException, IOException, XPathException, QName.IllegalQNameException { final DocumentSet docs = configureAndStore(COLLECTION_CONFIG4, XML4, "test.xml"); @@ -1217,6 +1299,10 @@ private DocumentSet configureAndStore(String configuration, Path directory) thro return docs; } + /** It really depends on the Analyzer used with the index, + * but probably you would like to have the 'term' argument all lower cased. + * @see Help needed with a test case + */ private Occurrences[] checkIndex(final DocumentSet docs, final DBBroker broker, final QName[] qn, final String term, final int expected) { final LuceneIndexWorker index = (LuceneIndexWorker)broker.getIndexController().getWorkerByIndexId(LuceneIndex.ID); final Map hints = new HashMap<>(); diff --git a/extensions/indexes/range/src/org/exist/indexing/range/RangeIndexWorker.java b/extensions/indexes/range/src/org/exist/indexing/range/RangeIndexWorker.java index a8a1acb4c70..7af21e41ffe 100644 --- a/extensions/indexes/range/src/org/exist/indexing/range/RangeIndexWorker.java +++ b/extensions/indexes/range/src/org/exist/indexing/range/RangeIndexWorker.java @@ -56,6 +56,7 @@ import org.exist.storage.ElementValue; import org.exist.storage.IndexSpec; import org.exist.storage.NodePath; +import org.exist.storage.NodePath2; import org.exist.storage.btree.DBException; import org.exist.storage.txn.Txn; import org.exist.util.ByteConversion; @@ -291,19 +292,20 @@ public IStoredNode getReindexRoot(IStoredNode node, N // return null; if (config == null) return null; - NodePath p = new NodePath(path); + NodePath2 p = new NodePath2((NodePath2)path); boolean reindexRequired = false; - if (node.getNodeType() == Node.ELEMENT_NODE && !includeSelf) - p.removeLastComponent(); + if (node.getNodeType() == Node.ELEMENT_NODE && !includeSelf) { + p.removeLastNode(); + } while (p.length() > 0) { if (config.matches(p)) { reindexRequired = true; break; } - p.removeLastComponent(); + p.removeLastNode(); } if (reindexRequired) { - p = new NodePath(path); + p = new NodePath2((NodePath2)path); IStoredNode topMost = null; IStoredNode currentNode = node; if (currentNode.getNodeType() != Node.ELEMENT_NODE) @@ -312,7 +314,7 @@ public IStoredNode getReindexRoot(IStoredNode node, N if (config.matches(p)) topMost = currentNode; currentNode = currentNode.getParentStoredNode(); - p.removeLastComponent(); + p.removeLastNode(); } return topMost; } diff --git a/src/org/exist/Indexer.java b/src/org/exist/Indexer.java index 66fd8cf20da..64b5de33e29 100644 --- a/src/org/exist/Indexer.java +++ b/src/org/exist/Indexer.java @@ -39,7 +39,7 @@ import org.exist.indexing.StreamListener.ReindexMode; import org.exist.storage.DBBroker; import org.exist.storage.IndexSpec; -import org.exist.storage.NodePath; +import org.exist.storage.NodePath2; import org.exist.storage.RangeIndexSpec; import org.exist.storage.txn.Txn; import org.exist.util.Configuration; @@ -93,7 +93,7 @@ public class Indexer extends Observable implements ContentHandler, LexicalHandle private XMLString charBuf = new XMLString(); private boolean inCDATASection = false; private int currentLine = 0; - private final NodePath currentPath = new NodePath(); + private final NodePath2 currentPath = new NodePath2(); private DocumentImpl document = null; private IndexSpec indexSpec = null; @@ -420,7 +420,7 @@ public void endElement(final String namespace, final String name, final String q indexListener.endElement(transaction, last, currentPath); } } - currentPath.removeLastComponent(); + currentPath.removeLastNode(); setPrevious(last); level--; } @@ -587,7 +587,7 @@ public void startElement(final String namespace, final String name, final String nsMappings.clear(); } stack.push(node); - currentPath.addComponent(qn); + currentPath.addNode(node, attributes); node.setPosition(elementCnt++); if (!validate) { if (childCnt != null) { @@ -611,7 +611,7 @@ public void startElement(final String namespace, final String name, final String nsMappings.clear(); } stack.push(node); - currentPath.addComponent(qn); + currentPath.addNode(node, attributes); node.setPosition(elementCnt++); if (!validate) { if (childCnt != null) { diff --git a/src/org/exist/dom/persistent/StoredNode.java b/src/org/exist/dom/persistent/StoredNode.java index 79327a5466e..6682a0b5144 100644 --- a/src/org/exist/dom/persistent/StoredNode.java +++ b/src/org/exist/dom/persistent/StoredNode.java @@ -28,6 +28,7 @@ import org.exist.stax.IEmbeddedXMLStreamReader; import org.exist.storage.DBBroker; import org.exist.storage.NodePath; +import org.exist.storage.NodePath2; import org.exist.storage.Signatures; import org.exist.storage.dom.INodeIterator; import org.exist.util.pool.NodePool; @@ -425,22 +426,25 @@ protected IStoredNode getLastNode(final IStoredNode node) { @Override public NodePath getPath() { - final NodePath path = new NodePath(); + final NodePath2 path = new NodePath2(); if(getNodeType() == Node.ELEMENT_NODE) { - path.addComponent(getQName()); + path.addNode(this); } - NodeImpl parent; + Node parent; if(getNodeType() == Node.ATTRIBUTE_NODE) { - parent = (NodeImpl) ((Attr)this).getOwnerElement(); + parent = ((Attr)this).getOwnerElement(); } else { - parent = (NodeImpl) getParentNode(); + parent = getParentNode(); } while(parent != null && parent.getNodeType() != Node.DOCUMENT_NODE) { - path.addComponentAtStart(parent.getQName()); - parent = (NodeImpl) parent.getParentNode(); + path.addNode(parent); + parent = parent.getParentNode(); } + + path.reverseNodes(); + return path; } diff --git a/src/org/exist/storage/NativeBroker.java b/src/org/exist/storage/NativeBroker.java index e8923f46ae7..06d5ef716bc 100644 --- a/src/org/exist/storage/NativeBroker.java +++ b/src/org/exist/storage/NativeBroker.java @@ -2837,7 +2837,7 @@ private void dropIndex(final Txn transaction, final DocumentImpl document) throw final IStoredNode node = (IStoredNode) nodes.item(i); try(final INodeIterator iterator = getNodeIterator(node)) { iterator.next(); - scanNodes(transaction, iterator, node, new NodePath(), IndexMode.REMOVE, listener); + scanNodes(transaction, iterator, node, new NodePath2(), IndexMode.REMOVE, listener); } catch(final IOException ioe) { LOG.error("Unable to close node iterator", ioe); } @@ -2982,7 +2982,7 @@ public void reindexXMLResource(final Txn transaction, final DocumentImpl doc, fi final IStoredNode node = (IStoredNode) nodes.item(i); try (final INodeIterator iterator = getNodeIterator(node)) { iterator.next(); - scanNodes(transaction, iterator, node, new NodePath(), mode, listener); + scanNodes(transaction, iterator, node, new NodePath2(), mode, listener); } catch (final IOException ioe) { LOG.error("Unable to close node iterator", ioe); } @@ -3532,9 +3532,9 @@ private boolean checkNodeTree(final INodeIterator iterator, final IStoredNode no * @param currentPath */ private void scanNodes(final Txn transaction, final INodeIterator iterator, final IStoredNode node, - final NodePath currentPath, final IndexMode mode, final StreamListener listener) { + final NodePath2 currentPath, final IndexMode mode, final StreamListener listener) { if(node.getNodeType() == Node.ELEMENT_NODE) { - currentPath.addComponent(node.getQName()); + currentPath.addNode(node); } indexNode(transaction, node, currentPath, mode); if(listener != null) { @@ -3573,7 +3573,7 @@ private void scanNodes(final Txn transaction, final INodeIterator iterator, fina if(listener != null) { listener.endElement(transaction, (ElementImpl) node, currentPath); } - currentPath.removeLastComponent(); + currentPath.removeLastNode(); } } diff --git a/src/org/exist/storage/NodePath.java b/src/org/exist/storage/NodePath.java index 41a2342b376..8bc84b7b750 100644 --- a/src/org/exist/storage/NodePath.java +++ b/src/org/exist/storage/NodePath.java @@ -101,17 +101,12 @@ public void addComponent(final QName component) { components[pos++] = component; } - public void addComponentAtStart(final QName component) { - if (pos == components.length) { - final QName[] t = new QName[pos + 1]; - System.arraycopy(components, 0, t, 1, pos); - components = t; - components[0] = component; - } else { - System.arraycopy(components, 0, components, 1, pos); - components[0] = component; + protected void reverseComponents() { + for (int i = 0; i < pos / 2; ++i) { + QName tmp = components[i]; + components[i] = components[pos - 1 - i]; + components[pos - 1 - i] = tmp; } - pos++; } public void removeLastComponent() { @@ -213,7 +208,8 @@ public String toString() { return buf.toString(); } - private void addComponent(final Map namespaces, String component) { + public void addComponent(final Map namespaces, final String origComponent) { + String component = origComponent; boolean isAttribute = false; if (component.startsWith("@")) { isAttribute = true; diff --git a/src/org/exist/storage/NodePath2.java b/src/org/exist/storage/NodePath2.java new file mode 100644 index 00000000000..b6f5e3b2272 --- /dev/null +++ b/src/org/exist/storage/NodePath2.java @@ -0,0 +1,145 @@ +/* + * eXist Open Source Native XML Database + * Copyright (C) 2001-2018 The eXist Project + * http://exist-db.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.exist.storage; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.exist.dom.INode; +import org.xml.sax.Attributes; + +import java.util.HashMap; + +/** + * @author Stanislav Jordanov + * @version 1.0 + *

+ * This is an extension of class NodePath, that keeps track of a *real* node/element path, not just a QName path + * as its base class -- NodePath does. + * This is required in order to implement the feature requested/discussed here: + * @see + * [Exist-open] Are more elaborate xpath expressions allowed in Lucene's index config + * + */ +public class NodePath2 extends NodePath { + + private final static Logger LOG = LogManager.getLogger(NodePath2.class); + + private HashMap attribs[] = new HashMap[4]; + + private int n_pos = 0; + + public NodePath2() { + super(); + } + + public NodePath2(NodePath2 o) { + super(o); + n_pos = o.n_pos; + attribs = new HashMap[n_pos]; + for (int i = 0; i < n_pos; i++) { + attribs[i] = o.attribs(i); + } + } + + + public void addNode(final Node node) { + addNode(node, null); + } + + public void addNode(final Node node, Attributes saxAttribs) { + assert node instanceof Element; + + super.addComponent(((INode) node).getQName()); + + if (n_pos == attribs.length) { + //final HashMap[] t = new HashMap[n_pos + 4]; + final HashMap[] t = new HashMap[n_pos + 4]; + System.arraycopy(attribs, 0, t, 0, n_pos); + attribs = t; + } + + HashMap amap = new HashMap<>(); + + if (saxAttribs != null) { + int alen = saxAttribs.getLength(); + + for (int i = 0; i < alen; ++i) { + amap.put(saxAttribs.getQName(i), saxAttribs.getValue(i)); + } + } else { + NamedNodeMap nnm = node.getAttributes(); + int alen = node.getAttributes().getLength(); + + for (int i = 0; i < alen; ++i) { + Node child = nnm.item(i); + if (child.getNodeType() == Node.ATTRIBUTE_NODE) + amap.put(child.getNodeName(), child.getNodeValue()); + } + } + + attribs[n_pos++] = amap; + } + + + public void reverseNodes() { + super.reverseComponents(); + for (int i = 0; i < n_pos / 2; ++i) { + HashMap tmp = attribs[i]; + attribs[i] = attribs[attribs.length - 1 - i]; + attribs[attribs.length - 1 - i] = tmp; + } + } + + + public void removeLastNode() { + super.removeLastComponent(); + + if (n_pos > 0) { + attribs[--n_pos] = null; + } + } + + + public void removeLastComponent() { + if (this.length() <= n_pos) { + LOG.error("Whoa!!! addNode() possibly paired with removeLastComponent() instead of removeLastNode()"); + } + super.removeLastComponent(); + } + + + public void reset() { + super.reset(); + + for (int i = 0; i < n_pos; i++) { + attribs[i] = null; + } + } + + + //public HashMap attribs(int elementIdx) { + public HashMap attribs(int elementIdx) { + return attribs[elementIdx]; + } +}