View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.jexl2.parser;
18  
19  /**
20   * Common constant strings utilities.
21   * <p>
22   * This package methods read JEXL string literals and handle escaping through the
23   * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
24   * and double quotes) and read Unicode hexadecimal encoded characters.
25   * </p>
26   * <p>
27   * The only escapable characters are the single and double quotes - ''' and '"' -,
28   * a Unicode sequence starting with 'u' followed by 4 hexadecimals and
29   * the backslash character - '\' - itself.
30   * </p>
31   * <p>
32   * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
33   * sequence output being the same as the input.
34   * </p>
35   */
36  public class StringParser {
37      /** Default constructor.  */
38      public StringParser() {
39      }
40  
41      /**
42       * Builds a string, handles escaping through '\' syntax.
43       * @param str the string to build from
44       * @param eatsep whether the separator, the first character, should be considered
45       * @return the built string
46       */
47      public static String buildString(CharSequence str, boolean eatsep) {
48          StringBuilder strb = new StringBuilder(str.length());
49          char sep = eatsep ? str.charAt(0) : 0;
50          int end = str.length() - (eatsep ? 1 : 0);
51          int begin = (eatsep ? 1 : 0);
52          read(strb, str, begin, end, sep);
53          return strb.toString();
54      }
55  
56      /**
57       * Read the remainder of a string till a given separator,
58       * handles escaping through '\' syntax.
59       * @param strb the destination buffer to copy characters into
60       * @param str the origin
61       * @param index the offset into the origin
62       * @param sep the separator, single or double quote, marking end of string
63       * @return the offset in origin
64       */
65      public static int readString(StringBuilder strb, CharSequence str, int index, char sep) {
66          return read(strb, str, index, str.length(), sep);
67      }
68      /** The length of an escaped unicode sequence. */
69      private static final int UCHAR_LEN = 4;
70  
71      /**
72       * Read the remainder of a string till a given separator,
73       * handles escaping through '\' syntax.
74       * @param strb the destination buffer to copy characters into
75       * @param str the origin
76       * @param begin the relative offset in str to begin reading
77       * @param end the relative offset in str to end reading
78       * @param sep the separator, single or double quote, marking end of string
79       * @return the last character offset handled in origin
80       */
81      private static int read(StringBuilder strb, CharSequence str, int begin, int end, char sep) {
82          boolean escape = false;
83          int index = begin;
84          for (; index < end; ++index) {
85              char c = str.charAt(index);
86              if (escape) {
87                  if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) {
88                      index += UCHAR_LEN;
89                  } else {
90                      // if c is not an escapable character, re-emmit the backslash before it
91                      boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep;
92                      if (notSeparator && c != '\\') {
93                          strb.append('\\');
94                      }
95                      strb.append(c);
96                  }
97                  escape = false;
98                  continue;
99              }
100             if (c == '\\') {
101                 escape = true;
102                 continue;
103             }
104             strb.append(c);
105             if (c == sep) {
106                 break;
107             }
108         }
109         return index;
110     }
111     /** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */
112     private static final int SHIFT = 12;
113     /** The base 10 offset used to convert hexa characters to decimal. */
114     private static final int BASE10 = 10;
115 
116     /**
117      * Reads a Unicode escape character.
118      * @param strb the builder to write the character to
119      * @param str the sequence
120      * @param begin the begin offset in sequence (after the '\\u')
121      * @return 0 if char could not be read, 4 otherwise
122      */
123     private static int readUnicodeChar(StringBuilder strb, CharSequence str, int begin) {
124         char xc = 0;
125         int bits = SHIFT;
126         int value = 0;
127         for (int offset = 0; offset < UCHAR_LEN; ++offset) {
128             char c = str.charAt(begin + offset);
129             if (c >= '0' && c <= '9') {
130                 value = (c - '0');
131             } else if (c >= 'a' && c <= 'h') {
132                 value = (c - 'a' + BASE10);
133             } else if (c >= 'A' && c <= 'H') {
134                 value = (c - 'A' + BASE10);
135             } else {
136                 return 0;
137             }
138             xc |= value << bits;
139             bits -= UCHAR_LEN;
140         }
141         strb.append(xc);
142         return UCHAR_LEN;
143     }
144     /** The last 7bits ascii character. */
145     private static final char LAST_ASCII = 127;
146     /** The first printable 7bits ascii character. */
147     private static final char FIRST_ASCII = 32;
148 
149     /**
150      * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
151      * @param str the string to escape
152      * @return the escaped representation
153      */
154     public static String escapeString(String str, char delim) {
155         if (str == null) {
156             return null;
157         }
158         final int length = str.length();
159         StringBuilder strb = new StringBuilder(length + 2);
160         strb.append(delim);
161         for (int i = 0; i < length; ++i) {
162             char c = str.charAt(i);
163             switch (c) {
164                 case 0:
165                     continue;
166                 case '\b':
167                     strb.append("\\b");
168                     break;
169                 case '\t':
170                     strb.append("\\t");
171                     break;
172                 case '\n':
173                     strb.append("\\n");
174                     break;
175                 case '\f':
176                     strb.append("\\f");
177                     break;
178                 case '\r':
179                     strb.append("\\r");
180                     break;
181                 case '\"':
182                     strb.append("\\\"");
183                     break;
184                 case '\'':
185                     strb.append("\\\'");
186                     break;
187                 case '\\':
188                     strb.append("\\\\");
189                     break;
190                 default:
191                     if (c >= FIRST_ASCII && c <= LAST_ASCII) {
192                         strb.append(c);
193                     } else {
194                         // convert to Unicode escape sequence
195                         strb.append('\\');
196                         strb.append('u');
197                         String hex = Integer.toHexString(c);
198                         for (int h = hex.length(); h < UCHAR_LEN; ++h) {
199                             strb.append('0');
200                         }
201                         strb.append(hex);
202                     }
203             }
204         }
205         strb.append(delim);
206         return strb.toString();
207     }
208 }