001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.configuration2.convert; 018 019import java.util.Collection; 020import java.util.LinkedList; 021import java.util.List; 022 023import org.apache.commons.lang3.StringUtils; 024 025/** 026 * <p> 027 * The default implementation of the {@code ListDelimiterHandler} interface. 028 * </p> 029 * <p> 030 * This class supports list splitting and delimiter escaping using a delimiter character that can be specified when 031 * constructing an instance. Splitting of strings works by scanning the input for the list delimiter character. The list 032 * delimiter character can be escaped by a backslash. So, provided that a comma is configured as list delimiter, in the 033 * example {@code val1,val2,val3} three values are recognized. In {@code 3\,1415} the list delimiter is escaped so that 034 * only a single element is detected. (Note that when writing these examples in Java code, each backslash has to be 035 * doubled. This is also true for all other examples in this documentation.) 036 * </p> 037 * <p> 038 * Because the backslash has a special meaning as escaping character it is always treated in a special way. If it occurs 039 * as a normal character in a property value, it has to be escaped using another backslash (similar to the rules of the 040 * Java programming language). The following example shows the correct way to define windows network shares: 041 * {@code \\\\Server\\path}. Note that each backslash is doubled. When combining the list delimiter with backslashes the 042 * same escaping rules apply. For instance, in {@code C:\\Temp\\,D:\\data\\} the list delimiter is recognized; it is not 043 * escaped by the preceding backslash because this backslash is itself escaped. In contrast, 044 * {@code C:\\Temp\\\,D:\\data\\} defines a single element with a comma being part of the value; two backslashes after 045 * {@code Temp} result in a single one, the third backslash escapes the list delimiter. 046 * </p> 047 * <p> 048 * As can be seen, there are some constellations which are a bit tricky and cause a larger number of backslashes in 049 * sequence. Nevertheless, the escaping rules are consistent and do not cause ambiguous results. 050 * </p> 051 * <p> 052 * Implementation node: An instance of this class can safely be shared between multiple {@code Configuration} instances. 053 * </p> 054 * 055 * @since 2.0 056 */ 057public class DefaultListDelimiterHandler extends AbstractListDelimiterHandler { 058 059 /** Constant for the escape character. */ 060 private static final char ESCAPE = '\\'; 061 062 /** 063 * Constant for a buffer size for escaping strings. When a character is escaped the string becomes longer. Therefore, 064 * the output buffer is longer than the original string length. But we assume, that there are not too many characters 065 * that need to be escaped. 066 */ 067 private static final int BUF_SIZE = 16; 068 069 /** Stores the list delimiter character. */ 070 private final char delimiter; 071 072 /** 073 * Creates a new instance of {@code DefaultListDelimiterHandler} and sets the list delimiter character. 074 * 075 * @param listDelimiter the list delimiter character 076 */ 077 public DefaultListDelimiterHandler(final char listDelimiter) { 078 delimiter = listDelimiter; 079 } 080 081 @Override 082 public Object escapeList(final List<?> values, final ValueTransformer transformer) { 083 final Object[] escapedValues = new Object[values.size()]; 084 int idx = 0; 085 for (final Object v : values) { 086 escapedValues[idx++] = escape(v, transformer); 087 } 088 return StringUtils.join(escapedValues, getDelimiter()); 089 } 090 091 @Override 092 protected String escapeString(final String s) { 093 final StringBuilder buf = new StringBuilder(s.length() + BUF_SIZE); 094 for (int i = 0; i < s.length(); i++) { 095 final char c = s.charAt(i); 096 if (c == getDelimiter() || c == ESCAPE) { 097 buf.append(ESCAPE); 098 } 099 buf.append(c); 100 } 101 return buf.toString(); 102 } 103 104 /** 105 * Gets the list delimiter character used by this instance. 106 * 107 * @return the list delimiter character 108 */ 109 public char getDelimiter() { 110 return delimiter; 111 } 112 113 /** 114 * {@inheritDoc} This implementation reverses the escaping done by the {@code escape()} methods of this class. However, 115 * it tries to be tolerant with unexpected escaping sequences: If after the escape character "\" no allowed character 116 * follows, both the backslash and the following character are output. 117 */ 118 @Override 119 protected Collection<String> splitString(final String s, final boolean trim) { 120 final List<String> list = new LinkedList<>(); 121 StringBuilder token = new StringBuilder(); 122 boolean inEscape = false; 123 124 for (int i = 0; i < s.length(); i++) { 125 final char c = s.charAt(i); 126 if (inEscape) { 127 // last character was the escape marker 128 // can current character be escaped? 129 if (c != getDelimiter() && c != ESCAPE) { 130 // no, also add escape character 131 token.append(ESCAPE); 132 } 133 token.append(c); 134 inEscape = false; 135 } else if (c == getDelimiter()) { 136 // found a list delimiter -> add token and 137 // reset buffer 138 String t = token.toString(); 139 if (trim) { 140 t = t.trim(); 141 } 142 list.add(t); 143 token = new StringBuilder(); 144 } else if (c == ESCAPE) { 145 // potentially escape next character 146 inEscape = true; 147 } else { 148 token.append(c); 149 } 150 } 151 152 // Trailing delimiter? 153 if (inEscape) { 154 token.append(ESCAPE); 155 } 156 // Add last token 157 String t = token.toString(); 158 if (trim) { 159 t = t.trim(); 160 } 161 list.add(t); 162 163 return list; 164 } 165}