1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.util.BitSet;
23
24 import org.apache.commons.codec.BinaryDecoder;
25 import org.apache.commons.codec.BinaryEncoder;
26 import org.apache.commons.codec.CharEncoding;
27 import org.apache.commons.codec.DecoderException;
28 import org.apache.commons.codec.EncoderException;
29 import org.apache.commons.codec.StringDecoder;
30 import org.apache.commons.codec.StringEncoder;
31 import org.apache.commons.codec.binary.StringUtils;
32
33 /**
34 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
35 * <p>
36 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and
37 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below
38 * 1.4 rely on the platform's default charset encoding.
39 * <p>
40 * This class is immutable and thread-safe.
41 *
42 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a>
43 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a>
44 *
45 * @since 1.2
46 * @version $Id: URLCodec.html 889935 2013-12-11 05:05:13Z ggregory $
47 */
48 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
49
50 /**
51 * Radix used in encoding and decoding.
52 */
53 static final int RADIX = 16;
54
55 /**
56 * The default charset used for string decoding and encoding.
57 *
58 * @deprecated TODO: This field will be changed to a private final Charset in 2.0.
59 */
60 @Deprecated
61 protected String charset;
62
63 /**
64 * Release 1.5 made this field final.
65 */
66 protected static final byte ESCAPE_CHAR = '%';
67 /**
68 * BitSet of www-form-url safe characters.
69 */
70 protected static final BitSet WWW_FORM_URL = new BitSet(256);
71
72 // Static initializer for www_form_url
73 static {
74 // alpha characters
75 for (int i = 'a'; i <= 'z'; i++) {
76 WWW_FORM_URL.set(i);
77 }
78 for (int i = 'A'; i <= 'Z'; i++) {
79 WWW_FORM_URL.set(i);
80 }
81 // numeric characters
82 for (int i = '0'; i <= '9'; i++) {
83 WWW_FORM_URL.set(i);
84 }
85 // special chars
86 WWW_FORM_URL.set('-');
87 WWW_FORM_URL.set('_');
88 WWW_FORM_URL.set('.');
89 WWW_FORM_URL.set('*');
90 // blank to be replaced with +
91 WWW_FORM_URL.set(' ');
92 }
93
94
95 /**
96 * Default constructor.
97 */
98 public URLCodec() {
99 this(CharEncoding.UTF_8);
100 }
101
102 /**
103 * Constructor which allows for the selection of a default charset.
104 *
105 * @param charset the default string charset to use.
106 */
107 public URLCodec(final String charset) {
108 super();
109 this.charset = charset;
110 }
111
112 /**
113 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
114 *
115 * @param urlsafe
116 * bitset of characters deemed URL safe
117 * @param bytes
118 * array of bytes to convert to URL safe characters
119 * @return array of bytes containing URL safe characters
120 */
121 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
122 if (bytes == null) {
123 return null;
124 }
125 if (urlsafe == null) {
126 urlsafe = WWW_FORM_URL;
127 }
128
129 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
130 for (final byte c : bytes) {
131 int b = c;
132 if (b < 0) {
133 b = 256 + b;
134 }
135 if (urlsafe.get(b)) {
136 if (b == ' ') {
137 b = '+';
138 }
139 buffer.write(b);
140 } else {
141 buffer.write(ESCAPE_CHAR);
142 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
143 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
144 buffer.write(hex1);
145 buffer.write(hex2);
146 }
147 }
148 return buffer.toByteArray();
149 }
150
151 /**
152 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
153 * back to their original representation.
154 *
155 * @param bytes
156 * array of URL safe characters
157 * @return array of original bytes
158 * @throws DecoderException
159 * Thrown if URL decoding is unsuccessful
160 */
161 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException {
162 if (bytes == null) {
163 return null;
164 }
165 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
166 for (int i = 0; i < bytes.length; i++) {
167 final int b = bytes[i];
168 if (b == '+') {
169 buffer.write(' ');
170 } else if (b == ESCAPE_CHAR) {
171 try {
172 final int u = Utils.digit16(bytes[++i]);
173 final int l = Utils.digit16(bytes[++i]);
174 buffer.write((char) ((u << 4) + l));
175 } catch (final ArrayIndexOutOfBoundsException e) {
176 throw new DecoderException("Invalid URL encoding: ", e);
177 }
178 } else {
179 buffer.write(b);
180 }
181 }
182 return buffer.toByteArray();
183 }
184
185 /**
186 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
187 *
188 * @param bytes
189 * array of bytes to convert to URL safe characters
190 * @return array of bytes containing URL safe characters
191 */
192 @Override
193 public byte[] encode(final byte[] bytes) {
194 return encodeUrl(WWW_FORM_URL, bytes);
195 }
196
197
198 /**
199 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
200 * back to their original representation.
201 *
202 * @param bytes
203 * array of URL safe characters
204 * @return array of original bytes
205 * @throws DecoderException
206 * Thrown if URL decoding is unsuccessful
207 */
208 @Override
209 public byte[] decode(final byte[] bytes) throws DecoderException {
210 return decodeUrl(bytes);
211 }
212
213 /**
214 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
215 *
216 * @param str
217 * string to convert to a URL safe form
218 * @param charset
219 * the charset for str
220 * @return URL safe string
221 * @throws UnsupportedEncodingException
222 * Thrown if charset is not supported
223 */
224 public String encode(final String str, final String charset) throws UnsupportedEncodingException {
225 if (str == null) {
226 return null;
227 }
228 return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
229 }
230
231 /**
232 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped.
233 *
234 * @param str
235 * string to convert to a URL safe form
236 * @return URL safe string
237 * @throws EncoderException
238 * Thrown if URL encoding is unsuccessful
239 *
240 * @see #getDefaultCharset()
241 */
242 @Override
243 public String encode(final String str) throws EncoderException {
244 if (str == null) {
245 return null;
246 }
247 try {
248 return encode(str, getDefaultCharset());
249 } catch (final UnsupportedEncodingException e) {
250 throw new EncoderException(e.getMessage(), e);
251 }
252 }
253
254
255 /**
256 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted
257 * back to their original representation.
258 *
259 * @param str
260 * URL safe string to convert into its original form
261 * @param charset
262 * the original string charset
263 * @return original string
264 * @throws DecoderException
265 * Thrown if URL decoding is unsuccessful
266 * @throws UnsupportedEncodingException
267 * Thrown if charset is not supported
268 */
269 public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
270 if (str == null) {
271 return null;
272 }
273 return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
274 }
275
276 /**
277 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are
278 * converted back to their original representation.
279 *
280 * @param str
281 * URL safe string to convert into its original form
282 * @return original string
283 * @throws DecoderException
284 * Thrown if URL decoding is unsuccessful
285 * @see #getDefaultCharset()
286 */
287 @Override
288 public String decode(final String str) throws DecoderException {
289 if (str == null) {
290 return null;
291 }
292 try {
293 return decode(str, getDefaultCharset());
294 } catch (final UnsupportedEncodingException e) {
295 throw new DecoderException(e.getMessage(), e);
296 }
297 }
298
299 /**
300 * Encodes an object into its URL safe form. Unsafe characters are escaped.
301 *
302 * @param obj
303 * string to convert to a URL safe form
304 * @return URL safe object
305 * @throws EncoderException
306 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful
307 */
308 @Override
309 public Object encode(final Object obj) throws EncoderException {
310 if (obj == null) {
311 return null;
312 } else if (obj instanceof byte[]) {
313 return encode((byte[])obj);
314 } else if (obj instanceof String) {
315 return encode((String)obj);
316 } else {
317 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded");
318
319 }
320 }
321
322 /**
323 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
324 * representation.
325 *
326 * @param obj
327 * URL safe object to convert into its original form
328 * @return original object
329 * @throws DecoderException
330 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
331 * condition is encountered during the decode process.
332 */
333 @Override
334 public Object decode(final Object obj) throws DecoderException {
335 if (obj == null) {
336 return null;
337 } else if (obj instanceof byte[]) {
338 return decode((byte[]) obj);
339 } else if (obj instanceof String) {
340 return decode((String) obj);
341 } else {
342 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded");
343
344 }
345 }
346
347 /**
348 * The default charset used for string decoding and encoding.
349 *
350 * @return the default string charset.
351 */
352 public String getDefaultCharset() {
353 return this.charset;
354 }
355
356 /**
357 * The <code>String</code> encoding used for decoding and encoding.
358 *
359 * @return Returns the encoding.
360 *
361 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
362 */
363 @Deprecated
364 public String getEncoding() {
365 return this.charset;
366 }
367
368 }