DefaultListDelimiterHandler.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.configuration2.convert;

  18. import java.util.Collection;
  19. import java.util.LinkedList;
  20. import java.util.List;

  21. import org.apache.commons.lang3.StringUtils;

  22. /**
  23.  * <p>
  24.  * The default implementation of the {@code ListDelimiterHandler} interface.
  25.  * </p>
  26.  * <p>
  27.  * This class supports list splitting and delimiter escaping using a delimiter character that can be specified when
  28.  * constructing an instance. Splitting of strings works by scanning the input for the list delimiter character. The list
  29.  * delimiter character can be escaped by a backslash. So, provided that a comma is configured as list delimiter, in the
  30.  * example {@code val1,val2,val3} three values are recognized. In {@code 3\,1415} the list delimiter is escaped so that
  31.  * only a single element is detected. (Note that when writing these examples in Java code, each backslash has to be
  32.  * doubled. This is also true for all other examples in this documentation.)
  33.  * </p>
  34.  * <p>
  35.  * Because the backslash has a special meaning as escaping character it is always treated in a special way. If it occurs
  36.  * as a normal character in a property value, it has to be escaped using another backslash (similar to the rules of the
  37.  * Java programming language). The following example shows the correct way to define windows network shares:
  38.  * {@code \\\\Server\\path}. Note that each backslash is doubled. When combining the list delimiter with backslashes the
  39.  * same escaping rules apply. For instance, in {@code C:\\Temp\\,D:\\data\\} the list delimiter is recognized; it is not
  40.  * escaped by the preceding backslash because this backslash is itself escaped. In contrast,
  41.  * {@code C:\\Temp\\\,D:\\data\\} defines a single element with a comma being part of the value; two backslashes after
  42.  * {@code Temp} result in a single one, the third backslash escapes the list delimiter.
  43.  * </p>
  44.  * <p>
  45.  * As can be seen, there are some constellations which are a bit tricky and cause a larger number of backslashes in
  46.  * sequence. Nevertheless, the escaping rules are consistent and do not cause ambiguous results.
  47.  * </p>
  48.  * <p>
  49.  * Implementation node: An instance of this class can safely be shared between multiple {@code Configuration} instances.
  50.  * </p>
  51.  *
  52.  * @since 2.0
  53.  */
  54. public class DefaultListDelimiterHandler extends AbstractListDelimiterHandler {
  55.     /** Constant for the escape character. */
  56.     private static final char ESCAPE = '\\';

  57.     /**
  58.      * Constant for a buffer size for escaping strings. When a character is escaped the string becomes longer. Therefore,
  59.      * the output buffer is longer than the original string length. But we assume, that there are not too many characters
  60.      * that need to be escaped.
  61.      */
  62.     private static final int BUF_SIZE = 16;

  63.     /** Stores the list delimiter character. */
  64.     private final char delimiter;

  65.     /**
  66.      * Creates a new instance of {@code DefaultListDelimiterHandler} and sets the list delimiter character.
  67.      *
  68.      * @param listDelimiter the list delimiter character
  69.      */
  70.     public DefaultListDelimiterHandler(final char listDelimiter) {
  71.         delimiter = listDelimiter;
  72.     }

  73.     @Override
  74.     public Object escapeList(final List<?> values, final ValueTransformer transformer) {
  75.         final Object[] escapedValues = new Object[values.size()];
  76.         int idx = 0;
  77.         for (final Object v : values) {
  78.             escapedValues[idx++] = escape(v, transformer);
  79.         }
  80.         return StringUtils.join(escapedValues, getDelimiter());
  81.     }

  82.     @Override
  83.     protected String escapeString(final String s) {
  84.         final StringBuilder buf = new StringBuilder(s.length() + BUF_SIZE);
  85.         for (int i = 0; i < s.length(); i++) {
  86.             final char c = s.charAt(i);
  87.             if (c == getDelimiter() || c == ESCAPE) {
  88.                 buf.append(ESCAPE);
  89.             }
  90.             buf.append(c);
  91.         }
  92.         return buf.toString();
  93.     }

  94.     /**
  95.      * Gets the list delimiter character used by this instance.
  96.      *
  97.      * @return the list delimiter character
  98.      */
  99.     public char getDelimiter() {
  100.         return delimiter;
  101.     }

  102.     /**
  103.      * {@inheritDoc} This implementation reverses the escaping done by the {@code escape()} methods of this class. However,
  104.      * it tries to be tolerant with unexpected escaping sequences: If after the escape character "\" no allowed character
  105.      * follows, both the backslash and the following character are output.
  106.      */
  107.     @Override
  108.     protected Collection<String> splitString(final String s, final boolean trim) {
  109.         final List<String> list = new LinkedList<>();
  110.         StringBuilder token = new StringBuilder();
  111.         boolean inEscape = false;

  112.         for (int i = 0; i < s.length(); i++) {
  113.             final char c = s.charAt(i);
  114.             if (inEscape) {
  115.                 // last character was the escape marker
  116.                 // can current character be escaped?
  117.                 if (c != getDelimiter() && c != ESCAPE) {
  118.                     // no, also add escape character
  119.                     token.append(ESCAPE);
  120.                 }
  121.                 token.append(c);
  122.                 inEscape = false;
  123.             } else if (c == getDelimiter()) {
  124.                 // found a list delimiter -> add token and
  125.                 // reset buffer
  126.                 String t = token.toString();
  127.                 if (trim) {
  128.                     t = t.trim();
  129.                 }
  130.                 list.add(t);
  131.                 token = new StringBuilder();
  132.             } else if (c == ESCAPE) {
  133.                 // potentially escape next character
  134.                 inEscape = true;
  135.             } else {
  136.                 token.append(c);
  137.             }
  138.         }

  139.         // Trailing delimiter?
  140.         if (inEscape) {
  141.             token.append(ESCAPE);
  142.         }
  143.         // Add last token
  144.         String t = token.toString();
  145.         if (trim) {
  146.             t = t.trim();
  147.         }
  148.         list.add(t);

  149.         return list;
  150.     }
  151. }