View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.configuration2.convert;
18  
19  import java.util.Collection;
20  import java.util.LinkedList;
21  import java.util.List;
22  
23  import org.apache.commons.lang3.StringUtils;
24  
25  /**
26   * <p>
27   * The default implementation of the {@code ListDelimiterHandler} interface.
28   * </p>
29   * <p>
30   * This class supports list splitting and delimiter escaping using a delimiter character that can be specified when
31   * constructing an instance. Splitting of strings works by scanning the input for the list delimiter character. The list
32   * delimiter character can be escaped by a backslash. So, provided that a comma is configured as list delimiter, in the
33   * example {@code val1,val2,val3} three values are recognized. In {@code 3\,1415} the list delimiter is escaped so that
34   * only a single element is detected. (Note that when writing these examples in Java code, each backslash has to be
35   * doubled. This is also true for all other examples in this documentation.)
36   * </p>
37   * <p>
38   * Because the backslash has a special meaning as escaping character it is always treated in a special way. If it occurs
39   * as a normal character in a property value, it has to be escaped using another backslash (similar to the rules of the
40   * Java programming language). The following example shows the correct way to define windows network shares:
41   * {@code \\\\Server\\path}. Note that each backslash is doubled. When combining the list delimiter with backslashes the
42   * same escaping rules apply. For instance, in {@code C:\\Temp\\,D:\\data\\} the list delimiter is recognized; it is not
43   * escaped by the preceding backslash because this backslash is itself escaped. In contrast,
44   * {@code C:\\Temp\\\,D:\\data\\} defines a single element with a comma being part of the value; two backslashes after
45   * {@code Temp} result in a single one, the third backslash escapes the list delimiter.
46   * </p>
47   * <p>
48   * As can be seen, there are some constellations which are a bit tricky and cause a larger number of backslashes in
49   * sequence. Nevertheless, the escaping rules are consistent and do not cause ambiguous results.
50   * </p>
51   * <p>
52   * Implementation node: An instance of this class can safely be shared between multiple {@code Configuration} instances.
53   * </p>
54   *
55   * @since 2.0
56   */
57  public class DefaultListDelimiterHandler extends AbstractListDelimiterHandler {
58  
59      /** Constant for the escape character. */
60      private static final char ESCAPE = '\\';
61  
62      /**
63       * Constant for a buffer size for escaping strings. When a character is escaped the string becomes longer. Therefore,
64       * the output buffer is longer than the original string length. But we assume, that there are not too many characters
65       * that need to be escaped.
66       */
67      private static final int BUF_SIZE = 16;
68  
69      /** Stores the list delimiter character. */
70      private final char delimiter;
71  
72      /**
73       * Creates a new instance of {@code DefaultListDelimiterHandler} and sets the list delimiter character.
74       *
75       * @param listDelimiter the list delimiter character
76       */
77      public DefaultListDelimiterHandler(final char listDelimiter) {
78          delimiter = listDelimiter;
79      }
80  
81      @Override
82      public Object escapeList(final List<?> values, final ValueTransformer transformer) {
83          final Object[] escapedValues = new Object[values.size()];
84          int idx = 0;
85          for (final Object v : values) {
86              escapedValues[idx++] = escape(v, transformer);
87          }
88          return StringUtils.join(escapedValues, getDelimiter());
89      }
90  
91      @Override
92      protected String escapeString(final String s) {
93          final StringBuilder buf = new StringBuilder(s.length() + BUF_SIZE);
94          for (int i = 0; i < s.length(); i++) {
95              final char c = s.charAt(i);
96              if (c == getDelimiter() || c == ESCAPE) {
97                  buf.append(ESCAPE);
98              }
99              buf.append(c);
100         }
101         return buf.toString();
102     }
103 
104     /**
105      * Gets the list delimiter character used by this instance.
106      *
107      * @return the list delimiter character
108      */
109     public char getDelimiter() {
110         return delimiter;
111     }
112 
113     /**
114      * {@inheritDoc} This implementation reverses the escaping done by the {@code escape()} methods of this class. However,
115      * it tries to be tolerant with unexpected escaping sequences: If after the escape character "\" no allowed character
116      * follows, both the backslash and the following character are output.
117      */
118     @Override
119     protected Collection<String> splitString(final String s, final boolean trim) {
120         final List<String> list = new LinkedList<>();
121         StringBuilder token = new StringBuilder();
122         boolean inEscape = false;
123 
124         for (int i = 0; i < s.length(); i++) {
125             final char c = s.charAt(i);
126             if (inEscape) {
127                 // last character was the escape marker
128                 // can current character be escaped?
129                 if (c != getDelimiter() && c != ESCAPE) {
130                     // no, also add escape character
131                     token.append(ESCAPE);
132                 }
133                 token.append(c);
134                 inEscape = false;
135             } else if (c == getDelimiter()) {
136                 // found a list delimiter -> add token and
137                 // reset buffer
138                 String t = token.toString();
139                 if (trim) {
140                     t = t.trim();
141                 }
142                 list.add(t);
143                 token = new StringBuilder();
144             } else if (c == ESCAPE) {
145                 // potentially escape next character
146                 inEscape = true;
147             } else {
148                 token.append(c);
149             }
150         }
151 
152         // Trailing delimiter?
153         if (inEscape) {
154             token.append(ESCAPE);
155         }
156         // Add last token
157         String t = token.toString();
158         if (trim) {
159             t = t.trim();
160         }
161         list.add(t);
162 
163         return list;
164     }
165 }