001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.configuration2.convert;
018
019import java.util.Collection;
020import java.util.LinkedList;
021import java.util.List;
022
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * <p>
027 * The default implementation of the {@code ListDelimiterHandler} interface.
028 * </p>
029 * <p>
030 * This class supports list splitting and delimiter escaping using a delimiter character that can be specified when
031 * constructing an instance. Splitting of strings works by scanning the input for the list delimiter character. The list
032 * delimiter character can be escaped by a backslash. So, provided that a comma is configured as list delimiter, in the
033 * example {@code val1,val2,val3} three values are recognized. In {@code 3\,1415} the list delimiter is escaped so that
034 * only a single element is detected. (Note that when writing these examples in Java code, each backslash has to be
035 * doubled. This is also true for all other examples in this documentation.)
036 * </p>
037 * <p>
038 * Because the backslash has a special meaning as escaping character it is always treated in a special way. If it occurs
039 * as a normal character in a property value, it has to be escaped using another backslash (similar to the rules of the
040 * Java programming language). The following example shows the correct way to define windows network shares:
041 * {@code \\\\Server\\path}. Note that each backslash is doubled. When combining the list delimiter with backslashes the
042 * same escaping rules apply. For instance, in {@code C:\\Temp\\,D:\\data\\} the list delimiter is recognized; it is not
043 * escaped by the preceding backslash because this backslash is itself escaped. In contrast,
044 * {@code C:\\Temp\\\,D:\\data\\} defines a single element with a comma being part of the value; two backslashes after
045 * {@code Temp} result in a single one, the third backslash escapes the list delimiter.
046 * </p>
047 * <p>
048 * As can be seen, there are some constellations which are a bit tricky and cause a larger number of backslashes in
049 * sequence. Nevertheless, the escaping rules are consistent and do not cause ambiguous results.
050 * </p>
051 * <p>
052 * Implementation node: An instance of this class can safely be shared between multiple {@code Configuration} instances.
053 * </p>
054 *
055 * @since 2.0
056 */
057public class DefaultListDelimiterHandler extends AbstractListDelimiterHandler {
058
059    /** Constant for the escape character. */
060    private static final char ESCAPE = '\\';
061
062    /**
063     * Constant for a buffer size for escaping strings. When a character is escaped the string becomes longer. Therefore,
064     * the output buffer is longer than the original string length. But we assume, that there are not too many characters
065     * that need to be escaped.
066     */
067    private static final int BUF_SIZE = 16;
068
069    /** Stores the list delimiter character. */
070    private final char delimiter;
071
072    /**
073     * Creates a new instance of {@code DefaultListDelimiterHandler} and sets the list delimiter character.
074     *
075     * @param listDelimiter the list delimiter character
076     */
077    public DefaultListDelimiterHandler(final char listDelimiter) {
078        delimiter = listDelimiter;
079    }
080
081    @Override
082    public Object escapeList(final List<?> values, final ValueTransformer transformer) {
083        final Object[] escapedValues = new Object[values.size()];
084        int idx = 0;
085        for (final Object v : values) {
086            escapedValues[idx++] = escape(v, transformer);
087        }
088        return StringUtils.join(escapedValues, getDelimiter());
089    }
090
091    @Override
092    protected String escapeString(final String s) {
093        final StringBuilder buf = new StringBuilder(s.length() + BUF_SIZE);
094        for (int i = 0; i < s.length(); i++) {
095            final char c = s.charAt(i);
096            if (c == getDelimiter() || c == ESCAPE) {
097                buf.append(ESCAPE);
098            }
099            buf.append(c);
100        }
101        return buf.toString();
102    }
103
104    /**
105     * Gets the list delimiter character used by this instance.
106     *
107     * @return the list delimiter character
108     */
109    public char getDelimiter() {
110        return delimiter;
111    }
112
113    /**
114     * {@inheritDoc} This implementation reverses the escaping done by the {@code escape()} methods of this class. However,
115     * it tries to be tolerant with unexpected escaping sequences: If after the escape character "\" no allowed character
116     * follows, both the backslash and the following character are output.
117     */
118    @Override
119    protected Collection<String> splitString(final String s, final boolean trim) {
120        final List<String> list = new LinkedList<>();
121        StringBuilder token = new StringBuilder();
122        boolean inEscape = false;
123
124        for (int i = 0; i < s.length(); i++) {
125            final char c = s.charAt(i);
126            if (inEscape) {
127                // last character was the escape marker
128                // can current character be escaped?
129                if (c != getDelimiter() && c != ESCAPE) {
130                    // no, also add escape character
131                    token.append(ESCAPE);
132                }
133                token.append(c);
134                inEscape = false;
135            } else if (c == getDelimiter()) {
136                // found a list delimiter -> add token and
137                // reset buffer
138                String t = token.toString();
139                if (trim) {
140                    t = t.trim();
141                }
142                list.add(t);
143                token = new StringBuilder();
144            } else if (c == ESCAPE) {
145                // potentially escape next character
146                inEscape = true;
147            } else {
148                token.append(c);
149            }
150        }
151
152        // Trailing delimiter?
153        if (inEscape) {
154            token.append(ESCAPE);
155        }
156        // Add last token
157        String t = token.toString();
158        if (trim) {
159            t = t.trim();
160        }
161        list.add(t);
162
163        return list;
164    }
165}