001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.configuration2.tree.xpath;
018
019import java.util.Collections;
020import java.util.LinkedList;
021import java.util.List;
022import java.util.StringTokenizer;
023import java.util.stream.Collectors;
024
025import org.apache.commons.configuration2.tree.ExpressionEngine;
026import org.apache.commons.configuration2.tree.NodeAddData;
027import org.apache.commons.configuration2.tree.NodeHandler;
028import org.apache.commons.configuration2.tree.QueryResult;
029import org.apache.commons.jxpath.JXPathContext;
030import org.apache.commons.jxpath.ri.JXPathContextReferenceImpl;
031import org.apache.commons.lang3.StringUtils;
032
033/**
034 * <p>
035 * A specialized implementation of the {@code ExpressionEngine} interface that is able to evaluate XPATH expressions.
036 * </p>
037 * <p>
038 * This class makes use of <a href="https://commons.apache.org/jxpath/"> Commons JXPath</a> for handling XPath
039 * expressions and mapping them to the nodes of a hierarchical configuration. This makes the rich and powerful XPATH
040 * syntax available for accessing properties from a configuration object.
041 * </p>
042 * <p>
043 * For selecting properties arbitrary XPATH expressions can be used, which select single or multiple configuration
044 * nodes. The associated {@code Configuration} instance will directly pass the specified property keys into this engine.
045 * If a key is not syntactically correct, an exception will be thrown.
046 * </p>
047 * <p>
048 * For adding new properties, this expression engine uses a specific syntax: the &quot;key&quot; of a new property must
049 * consist of two parts that are separated by whitespace:
050 * </p>
051 * <ol>
052 * <li>An XPATH expression selecting a single node, to which the new element(s) are to be added. This can be an
053 * arbitrary complex expression, but it must select exactly one node, otherwise an exception will be thrown.</li>
054 * <li>The name of the new element(s) to be added below this parent node. Here either a single node name or a complete
055 * path of nodes (separated by the &quot;/&quot; character or &quot;@&quot; for an attribute) can be specified.</li>
056 * </ol>
057 * <p>
058 * Some examples for valid keys that can be passed into the configuration's {@code addProperty()} method follow:
059 * </p>
060 *
061 * <pre>
062 * &quot;/tables/table[1] type&quot;
063 * </pre>
064 *
065 * <p>
066 * This will add a new {@code type} node as a child of the first {@code table} element.
067 * </p>
068 *
069 * <pre>
070 * &quot;/tables/table[1] @type&quot;
071 * </pre>
072 *
073 * <p>
074 * Similar to the example above, but this time a new attribute named {@code type} will be added to the first
075 * {@code table} element.
076 * </p>
077 *
078 * <pre>
079 * &quot;/tables table/fields/field/name&quot;
080 * </pre>
081 *
082 * <p>
083 * This example shows how a complex path can be added. Parent node is the {@code tables} element. Here a new branch
084 * consisting of the nodes {@code table}, {@code fields}, {@code field}, and {@code name} will be added.
085 * </p>
086 *
087 * <pre>
088 * &quot;/tables table/fields/field@type&quot;
089 * </pre>
090 *
091 * <p>
092 * This is similar to the last example, but in this case a complex path ending with an attribute is defined.
093 * </p>
094 * <p>
095 * <strong>Note:</strong> This extended syntax for adding properties only works with the {@code addProperty()} method.
096 * {@code setProperty()} does not support creating new nodes this way.
097 * </p>
098 * <p>
099 * From version 1.7 on, it is possible to use regular keys in calls to {@code addProperty()} (i.e. keys that do not have
100 * to contain a whitespace as delimiter). In this case the key is evaluated, and the biggest part pointing to an
101 * existing node is determined. The remaining part is then added as new path. As an example consider the key
102 * </p>
103 *
104 * <pre>
105 * &quot;tables/table[last()]/fields/field/name&quot;
106 * </pre>
107 *
108 * <p>
109 * If the key does not point to an existing node, the engine will check the paths
110 * {@code "tables/table[last()]/fields/field"}, {@code "tables/table[last()]/fields"}, {@code "tables/table[last()]"},
111 * and so on, until a key is found which points to a node. Let's assume that the last key listed above can be resolved
112 * in this way. Then from this key the following key is derived: {@code "tables/table[last()] fields/field/name"} by
113 * appending the remaining part after a whitespace. This key can now be processed using the original algorithm. Keys of
114 * this form can also be used with the {@code setProperty()} method. However, it is still recommended to use the old
115 * format because it makes explicit at which position new nodes should be added. For keys without a whitespace delimiter
116 * there may be ambiguities.
117 * </p>
118 *
119 * @since 1.3
120 */
121public class XPathExpressionEngine implements ExpressionEngine {
122    /** Constant for the path delimiter. */
123    static final String PATH_DELIMITER = "/";
124
125    /** Constant for the attribute delimiter. */
126    static final String ATTR_DELIMITER = "@";
127
128    /** Constant for the delimiters for splitting node paths. */
129    private static final String NODE_PATH_DELIMITERS = PATH_DELIMITER + ATTR_DELIMITER;
130
131    /**
132     * Constant for a space which is used as delimiter in keys for adding properties.
133     */
134    private static final String SPACE = " ";
135
136    /** Constant for a default size of a key buffer. */
137    private static final int BUF_SIZE = 128;
138
139    /** Constant for the start of an index expression. */
140    private static final char START_INDEX = '[';
141
142    /** Constant for the end of an index expression. */
143    private static final char END_INDEX = ']';
144
145    /** The internally used context factory. */
146    private final XPathContextFactory contextFactory;
147
148    /**
149     * Creates a new instance of {@code XPathExpressionEngine} with default settings.
150     */
151    public XPathExpressionEngine() {
152        this(new XPathContextFactory());
153    }
154
155    /**
156     * Creates a new instance of {@code XPathExpressionEngine} and sets the context factory. This constructor is mainly used
157     * for testing purposes.
158     *
159     * @param factory the {@code XPathContextFactory}
160     */
161    XPathExpressionEngine(final XPathContextFactory factory) {
162        contextFactory = factory;
163    }
164
165    /**
166     * {@inheritDoc} This implementation interprets the passed in key as an XPATH expression.
167     */
168    @Override
169    public <T> List<QueryResult<T>> query(final T root, final String key, final NodeHandler<T> handler) {
170        if (StringUtils.isEmpty(key)) {
171            final QueryResult<T> result = createResult(root);
172            return Collections.singletonList(result);
173        }
174        final JXPathContext context = createContext(root, handler);
175        List<?> results = context.selectNodes(key);
176        if (results == null) {
177            results = Collections.emptyList();
178        }
179        return convertResults(results);
180    }
181
182    /**
183     * {@inheritDoc} This implementation creates an XPATH expression that selects the given node (under the assumption that
184     * the passed in parent key is valid). As the {@code nodeKey()} implementation of
185     * {@link org.apache.commons.configuration2.tree.DefaultExpressionEngine DefaultExpressionEngine} this method does not
186     * return indices for nodes. So all child nodes of a given parent with the same name have the same key.
187     */
188    @Override
189    public <T> String nodeKey(final T node, final String parentKey, final NodeHandler<T> handler) {
190        if (parentKey == null) {
191            // name of the root node
192            return StringUtils.EMPTY;
193        }
194        if (handler.nodeName(node) == null) {
195            // paranoia check for undefined node names
196            return parentKey;
197        }
198        final StringBuilder buf = new StringBuilder(parentKey.length() + handler.nodeName(node).length() + PATH_DELIMITER.length());
199        if (!parentKey.isEmpty()) {
200            buf.append(parentKey);
201            buf.append(PATH_DELIMITER);
202        }
203        buf.append(handler.nodeName(node));
204        return buf.toString();
205    }
206
207    @Override
208    public String attributeKey(final String parentKey, final String attributeName) {
209        final StringBuilder buf = new StringBuilder(
210            StringUtils.length(parentKey) + StringUtils.length(attributeName) + PATH_DELIMITER.length() + ATTR_DELIMITER.length());
211        if (StringUtils.isNotEmpty(parentKey)) {
212            buf.append(parentKey).append(PATH_DELIMITER);
213        }
214        buf.append(ATTR_DELIMITER).append(attributeName);
215        return buf.toString();
216    }
217
218    /**
219     * {@inheritDoc} This implementation works similar to {@code nodeKey()}, but always adds an index expression to the
220     * resulting key.
221     */
222    @Override
223    public <T> String canonicalKey(final T node, final String parentKey, final NodeHandler<T> handler) {
224        final T parent = handler.getParent(node);
225        if (parent == null) {
226            // this is the root node
227            return StringUtils.defaultString(parentKey);
228        }
229
230        final StringBuilder buf = new StringBuilder(BUF_SIZE);
231        if (StringUtils.isNotEmpty(parentKey)) {
232            buf.append(parentKey).append(PATH_DELIMITER);
233        }
234        buf.append(handler.nodeName(node));
235        buf.append(START_INDEX);
236        buf.append(determineIndex(parent, node, handler));
237        buf.append(END_INDEX);
238        return buf.toString();
239    }
240
241    /**
242     * {@inheritDoc} The expected format of the passed in key is explained in the class comment.
243     */
244    @Override
245    public <T> NodeAddData<T> prepareAdd(final T root, final String key, final NodeHandler<T> handler) {
246        if (key == null) {
247            throw new IllegalArgumentException("prepareAdd: key must not be null!");
248        }
249
250        String addKey = key;
251        int index = findKeySeparator(addKey);
252        if (index < 0) {
253            addKey = generateKeyForAdd(root, addKey, handler);
254            index = findKeySeparator(addKey);
255        } else if (index >= addKey.length() - 1) {
256            invalidPath(addKey, " new node path must not be empty.");
257        }
258
259        final List<QueryResult<T>> nodes = query(root, addKey.substring(0, index).trim(), handler);
260        if (nodes.size() != 1) {
261            throw new IllegalArgumentException("prepareAdd: key '" + key + "' must select exactly one target node!");
262        }
263
264        return createNodeAddData(addKey.substring(index).trim(), nodes.get(0));
265    }
266
267    /**
268     * Creates the {@code JXPathContext} to be used for executing a query. This method delegates to the context factory.
269     *
270     * @param root the configuration root node
271     * @param handler the node handler
272     * @return the new context
273     */
274    private <T> JXPathContext createContext(final T root, final NodeHandler<T> handler) {
275        return getContextFactory().createContext(root, handler);
276    }
277
278    /**
279     * Creates a {@code NodeAddData} object as a result of a {@code prepareAdd()} operation. This method interprets the
280     * passed in path of the new node.
281     *
282     * @param path the path of the new node
283     * @param parentNodeResult the parent node
284     * @param <T> the type of the nodes involved
285     */
286    <T> NodeAddData<T> createNodeAddData(final String path, final QueryResult<T> parentNodeResult) {
287        if (parentNodeResult.isAttributeResult()) {
288            invalidPath(path, " cannot add properties to an attribute.");
289        }
290        final List<String> pathNodes = new LinkedList<>();
291        String lastComponent = null;
292        boolean attr = false;
293        boolean first = true;
294
295        final StringTokenizer tok = new StringTokenizer(path, NODE_PATH_DELIMITERS, true);
296        while (tok.hasMoreTokens()) {
297            final String token = tok.nextToken();
298            if (PATH_DELIMITER.equals(token)) {
299                if (attr) {
300                    invalidPath(path, " contains an attribute" + " delimiter at a disallowed position.");
301                }
302                if (lastComponent == null) {
303                    invalidPath(path, " contains a '/' at a disallowed position.");
304                }
305                pathNodes.add(lastComponent);
306                lastComponent = null;
307            } else if (ATTR_DELIMITER.equals(token)) {
308                if (attr) {
309                    invalidPath(path, " contains multiple attribute delimiters.");
310                }
311                if (lastComponent == null && !first) {
312                    invalidPath(path, " contains an attribute delimiter at a disallowed position.");
313                }
314                if (lastComponent != null) {
315                    pathNodes.add(lastComponent);
316                }
317                attr = true;
318                lastComponent = null;
319            } else {
320                lastComponent = token;
321            }
322            first = false;
323        }
324
325        if (lastComponent == null) {
326            invalidPath(path, "contains no components.");
327        }
328
329        return new NodeAddData<>(parentNodeResult.getNode(), lastComponent, attr, pathNodes);
330    }
331
332    /**
333     * Gets the {@code XPathContextFactory} used by this instance.
334     *
335     * @return the {@code XPathContextFactory}
336     */
337    XPathContextFactory getContextFactory() {
338        return contextFactory;
339    }
340
341    /**
342     * Tries to generate a key for adding a property. This method is called if a key was used for adding properties which
343     * does not contain a space character. It splits the key at its single components and searches for the last existing
344     * component. Then a key compatible key for adding properties is generated.
345     *
346     * @param root the root node of the configuration
347     * @param key the key in question
348     * @param handler the node handler
349     * @return the key to be used for adding the property
350     */
351    private <T> String generateKeyForAdd(final T root, final String key, final NodeHandler<T> handler) {
352        int pos = key.lastIndexOf(PATH_DELIMITER, key.length());
353
354        while (pos >= 0) {
355            final String keyExisting = key.substring(0, pos);
356            if (!query(root, keyExisting, handler).isEmpty()) {
357                final StringBuilder buf = new StringBuilder(key.length() + 1);
358                buf.append(keyExisting).append(SPACE);
359                buf.append(key.substring(pos + 1));
360                return buf.toString();
361            }
362            pos = key.lastIndexOf(PATH_DELIMITER, pos - 1);
363        }
364
365        return SPACE + key;
366    }
367
368    /**
369     * Determines the index of the given child node in the node list of its parent.
370     *
371     * @param parent the parent node
372     * @param child the child node
373     * @param handler the node handler
374     * @param <T> the type of the nodes involved
375     * @return the index of this child node
376     */
377    private static <T> int determineIndex(final T parent, final T child, final NodeHandler<T> handler) {
378        return handler.getChildren(parent, handler.nodeName(child)).indexOf(child) + 1;
379    }
380
381    /**
382     * Helper method for throwing an exception about an invalid path.
383     *
384     * @param path the invalid path
385     * @param msg the exception message
386     */
387    private static void invalidPath(final String path, final String msg) {
388        throw new IllegalArgumentException("Invalid node path: \"" + path + "\" " + msg);
389    }
390
391    /**
392     * Determines the position of the separator in a key for adding new properties. If no delimiter is found, result is -1.
393     *
394     * @param key the key
395     * @return the position of the delimiter
396     */
397    private static int findKeySeparator(final String key) {
398        int index = key.length() - 1;
399        while (index >= 0 && !Character.isWhitespace(key.charAt(index))) {
400            index--;
401        }
402        return index;
403    }
404
405    /**
406     * Converts the objects returned as query result from the JXPathContext to query result objects.
407     *
408     * @param results the list with results from the context
409     * @param <T> the type of results to be produced
410     * @return the result list
411     */
412    private static <T> List<QueryResult<T>> convertResults(final List<?> results) {
413        return results.stream().map(res -> (QueryResult<T>) createResult(res)).collect(Collectors.toList());
414    }
415
416    /**
417     * Creates a {@code QueryResult} object from the given result object of a query. Because of the node pointers involved
418     * result objects can only be of two types:
419     * <ul>
420     * <li>nodes of type T</li>
421     * <li>attribute results already wrapped in {@code QueryResult} objects</li>
422     * </ul>
423     * This method performs a corresponding cast. Warnings can be suppressed because of the implementation of the query
424     * functionality.
425     *
426     * @param resObj the query result object
427     * @param <T> the type of the result to be produced
428     * @return the {@code QueryResult}
429     */
430    @SuppressWarnings("unchecked")
431    private static <T> QueryResult<T> createResult(final Object resObj) {
432        if (resObj instanceof QueryResult) {
433            return (QueryResult<T>) resObj;
434        }
435        return QueryResult.createNodeResult((T) resObj);
436    }
437
438    // static initializer: registers the configuration node pointer factory
439    static {
440        JXPathContextReferenceImpl.addNodePointerFactory(new ConfigurationNodePointerFactory());
441    }
442}