1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.util.BitSet;
23
24 import org.apache.commons.codec.BinaryDecoder;
25 import org.apache.commons.codec.BinaryEncoder;
26 import org.apache.commons.codec.CharEncoding;
27 import org.apache.commons.codec.DecoderException;
28 import org.apache.commons.codec.EncoderException;
29 import org.apache.commons.codec.StringDecoder;
30 import org.apache.commons.codec.StringEncoder;
31 import org.apache.commons.codec.binary.StringUtils;
32
33 /**
34 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
35 * <p>
36 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and
37 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below
38 * 1.4 rely on the platform's default charset encoding.
39 * </p>
40 * <p>
41 * This class is thread-safe as of 1.11
42 * </p>
43 *
44 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a>
45 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a>
46 *
47 * @since 1.2
48 */
49 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
50
51 /**
52 * Release 1.5 made this field final.
53 */
54 protected static final byte ESCAPE_CHAR = '%';
55
56 /**
57 * BitSet of www-form-url safe characters.
58 * This is a copy of the internal BitSet which is now used for the conversion.
59 * Changes to this field are ignored.
60 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230)
61 */
62 @Deprecated
63 protected static final BitSet WWW_FORM_URL;
64
65 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256);
66
67 // Static initializer for www_form_url
68 static {
69 // alpha characters
70 for (int i = 'a'; i <= 'z'; i++) {
71 WWW_FORM_URL_SAFE.set(i);
72 }
73 for (int i = 'A'; i <= 'Z'; i++) {
74 WWW_FORM_URL_SAFE.set(i);
75 }
76 // numeric characters
77 for (int i = '0'; i <= '9'; i++) {
78 WWW_FORM_URL_SAFE.set(i);
79 }
80 // special chars
81 WWW_FORM_URL_SAFE.set('-');
82 WWW_FORM_URL_SAFE.set('_');
83 WWW_FORM_URL_SAFE.set('.');
84 WWW_FORM_URL_SAFE.set('*');
85 // blank to be replaced with +
86 WWW_FORM_URL_SAFE.set(' ');
87
88 // Create a copy in case anyone (ab)uses it
89 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone();
90 }
91
92 /**
93 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
94 * back to their original representation.
95 *
96 * @param bytes
97 * array of URL safe characters
98 * @return array of original bytes
99 * @throws DecoderException
100 * Thrown if URL decoding is unsuccessful
101 */
102 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException {
103 if (bytes == null) {
104 return null;
105 }
106 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
107 for (int i = 0; i < bytes.length; i++) {
108 final int b = bytes[i];
109 if (b == '+') {
110 buffer.write(' ');
111 } else if (b == ESCAPE_CHAR) {
112 try {
113 final int u = Utils.digit16(bytes[++i]);
114 final int l = Utils.digit16(bytes[++i]);
115 buffer.write((char) ((u << 4) + l));
116 } catch (final ArrayIndexOutOfBoundsException e) {
117 throw new DecoderException("Invalid URL encoding: ", e);
118 }
119 } else {
120 buffer.write(b);
121 }
122 }
123 return buffer.toByteArray();
124 }
125
126 /**
127 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
128 *
129 * @param urlsafe
130 * bitset of characters deemed URL safe
131 * @param bytes
132 * array of bytes to convert to URL safe characters
133 * @return array of bytes containing URL safe characters
134 */
135 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
136 if (bytes == null) {
137 return null;
138 }
139 if (urlsafe == null) {
140 urlsafe = WWW_FORM_URL_SAFE;
141 }
142
143 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
144 for (final byte c : bytes) {
145 int b = c;
146 if (b < 0) {
147 b = 256 + b;
148 }
149 if (urlsafe.get(b)) {
150 if (b == ' ') {
151 b = '+';
152 }
153 buffer.write(b);
154 } else {
155 buffer.write(ESCAPE_CHAR);
156 final char hex1 = Utils.hexChar(b >> 4);
157 final char hex2 = Utils.hexChar(b);
158 buffer.write(hex1);
159 buffer.write(hex2);
160 }
161 }
162 return buffer.toByteArray();
163 }
164
165 /**
166 * The default charset used for string decoding and encoding.
167 *
168 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126)
169 */
170 @Deprecated
171 protected volatile String charset; // added volatile: see CODEC-232
172
173 /**
174 * Default constructor.
175 */
176 public URLCodec() {
177 this(CharEncoding.UTF_8);
178 }
179
180 /**
181 * Constructor which allows for the selection of a default charset.
182 *
183 * @param charset the default string charset to use.
184 */
185 public URLCodec(final String charset) {
186 this.charset = charset;
187 }
188
189 /**
190 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
191 * back to their original representation.
192 *
193 * @param bytes
194 * array of URL safe characters
195 * @return array of original bytes
196 * @throws DecoderException
197 * Thrown if URL decoding is unsuccessful
198 */
199 @Override
200 public byte[] decode(final byte[] bytes) throws DecoderException {
201 return decodeUrl(bytes);
202 }
203
204 /**
205 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
206 * representation.
207 *
208 * @param obj
209 * URL safe object to convert into its original form
210 * @return original object
211 * @throws DecoderException
212 * Thrown if the argument is not a {@code String} or {@code byte[]}. Thrown if a failure
213 * condition is encountered during the decode process.
214 */
215 @Override
216 public Object decode(final Object obj) throws DecoderException {
217 if (obj == null) {
218 return null;
219 }
220 if (obj instanceof byte[]) {
221 return decode((byte[]) obj);
222 }
223 if (obj instanceof String) {
224 return decode((String) obj);
225 }
226 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded");
227 }
228
229 /**
230 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are
231 * converted back to their original representation.
232 *
233 * @param str
234 * URL safe string to convert into its original form
235 * @return original string
236 * @throws DecoderException
237 * Thrown if URL decoding is unsuccessful
238 * @see #getDefaultCharset()
239 */
240 @Override
241 public String decode(final String str) throws DecoderException {
242 if (str == null) {
243 return null;
244 }
245 try {
246 return decode(str, getDefaultCharset());
247 } catch (final UnsupportedEncodingException e) {
248 throw new DecoderException(e.getMessage(), e);
249 }
250 }
251
252 /**
253 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted
254 * back to their original representation.
255 *
256 * @param str
257 * URL safe string to convert into its original form
258 * @param charsetName
259 * the original string charset
260 * @return original string
261 * @throws DecoderException
262 * Thrown if URL decoding is unsuccessful
263 * @throws UnsupportedEncodingException
264 * Thrown if charset is not supported
265 */
266 public String decode(final String str, final String charsetName)
267 throws DecoderException, UnsupportedEncodingException {
268 if (str == null) {
269 return null;
270 }
271 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName);
272 }
273
274 /**
275 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
276 *
277 * @param bytes
278 * array of bytes to convert to URL safe characters
279 * @return array of bytes containing URL safe characters
280 */
281 @Override
282 public byte[] encode(final byte[] bytes) {
283 return encodeUrl(WWW_FORM_URL_SAFE, bytes);
284 }
285
286 /**
287 * Encodes an object into its URL safe form. Unsafe characters are escaped.
288 *
289 * @param obj
290 * string to convert to a URL safe form
291 * @return URL safe object
292 * @throws EncoderException
293 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful
294 */
295 @Override
296 public Object encode(final Object obj) throws EncoderException {
297 if (obj == null) {
298 return null;
299 }
300 if (obj instanceof byte[]) {
301 return encode((byte[]) obj);
302 }
303 if (obj instanceof String) {
304 return encode((String) obj);
305 }
306 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded");
307 }
308
309 /**
310 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped.
311 *
312 * @param str
313 * string to convert to a URL safe form
314 * @return URL safe string
315 * @throws EncoderException
316 * Thrown if URL encoding is unsuccessful
317 *
318 * @see #getDefaultCharset()
319 */
320 @Override
321 public String encode(final String str) throws EncoderException {
322 if (str == null) {
323 return null;
324 }
325 try {
326 return encode(str, getDefaultCharset());
327 } catch (final UnsupportedEncodingException e) {
328 throw new EncoderException(e.getMessage(), e);
329 }
330 }
331
332 /**
333 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
334 *
335 * @param str
336 * string to convert to a URL safe form
337 * @param charsetName
338 * the charset for str
339 * @return URL safe string
340 * @throws UnsupportedEncodingException
341 * Thrown if charset is not supported
342 */
343 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException {
344 if (str == null) {
345 return null;
346 }
347 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName)));
348 }
349
350 /**
351 * The default charset used for string decoding and encoding.
352 *
353 * @return the default string charset.
354 */
355 public String getDefaultCharset() {
356 return this.charset;
357 }
358
359 /**
360 * The {@code String} encoding used for decoding and encoding.
361 *
362 * @return the encoding.
363 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
364 */
365 @Deprecated
366 public String getEncoding() {
367 return this.charset;
368 }
369
370 }