1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.util.BitSet;
23
24 import org.apache.commons.codec.BinaryDecoder;
25 import org.apache.commons.codec.BinaryEncoder;
26 import org.apache.commons.codec.CharEncoding;
27 import org.apache.commons.codec.DecoderException;
28 import org.apache.commons.codec.EncoderException;
29 import org.apache.commons.codec.StringDecoder;
30 import org.apache.commons.codec.StringEncoder;
31 import org.apache.commons.codec.binary.StringUtils;
32
33 /**
34 * <p>Implements the 'www-form-urlencoded' encoding scheme,
35 * also misleadingly known as URL encoding.</p>
36 *
37 * <p>For more detailed information please refer to
38 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
39 * Chapter 17.13.4 'Form content types'</a> of the
40 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
41 *
42 * <p>
43 * This codec is meant to be a replacement for standard Java classes
44 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
45 * on older Java platforms, as these classes in Java versions below
46 * 1.4 rely on the platform's default charset encoding.
47 * </p>
48 *
49 * @author Apache Software Foundation
50 * @since 1.2
51 * @version $Id: URLCodec.java 1201529 2011-11-13 21:57:16Z ggregory $
52 */
53 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
54
55 /**
56 * Radix used in encoding and decoding.
57 */
58 static final int RADIX = 16;
59
60 /**
61 * The default charset used for string decoding and encoding.
62 *
63 * TODO: This field will be final in 2.0.
64 */
65 protected String charset;
66
67 /**
68 * Release 1.5 made this field final.
69 */
70 protected static final byte ESCAPE_CHAR = '%';
71 /**
72 * BitSet of www-form-url safe characters.
73 */
74 protected static final BitSet WWW_FORM_URL = new BitSet(256);
75
76 // Static initializer for www_form_url
77 static {
78 // alpha characters
79 for (int i = 'a'; i <= 'z'; i++) {
80 WWW_FORM_URL.set(i);
81 }
82 for (int i = 'A'; i <= 'Z'; i++) {
83 WWW_FORM_URL.set(i);
84 }
85 // numeric characters
86 for (int i = '0'; i <= '9'; i++) {
87 WWW_FORM_URL.set(i);
88 }
89 // special chars
90 WWW_FORM_URL.set('-');
91 WWW_FORM_URL.set('_');
92 WWW_FORM_URL.set('.');
93 WWW_FORM_URL.set('*');
94 // blank to be replaced with +
95 WWW_FORM_URL.set(' ');
96 }
97
98
99 /**
100 * Default constructor.
101 */
102 public URLCodec() {
103 this(CharEncoding.UTF_8);
104 }
105
106 /**
107 * Constructor which allows for the selection of a default charset
108 *
109 * @param charset the default string charset to use.
110 */
111 public URLCodec(String charset) {
112 super();
113 this.charset = charset;
114 }
115
116 /**
117 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
118 *
119 * @param urlsafe
120 * bitset of characters deemed URL safe
121 * @param bytes
122 * array of bytes to convert to URL safe characters
123 * @return array of bytes containing URL safe characters
124 */
125 public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
126 if (bytes == null) {
127 return null;
128 }
129 if (urlsafe == null) {
130 urlsafe = WWW_FORM_URL;
131 }
132
133 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
134 for (byte c : bytes) {
135 int b = c;
136 if (b < 0) {
137 b = 256 + b;
138 }
139 if (urlsafe.get(b)) {
140 if (b == ' ') {
141 b = '+';
142 }
143 buffer.write(b);
144 } else {
145 buffer.write(ESCAPE_CHAR);
146 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
147 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
148 buffer.write(hex1);
149 buffer.write(hex2);
150 }
151 }
152 return buffer.toByteArray();
153 }
154
155 /**
156 * Decodes an array of URL safe 7-bit characters into an array of
157 * original bytes. Escaped characters are converted back to their
158 * original representation.
159 *
160 * @param bytes array of URL safe characters
161 * @return array of original bytes
162 * @throws DecoderException Thrown if URL decoding is unsuccessful
163 */
164 public static final byte[] decodeUrl(byte[] bytes) throws DecoderException {
165 if (bytes == null) {
166 return null;
167 }
168 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
169 for (int i = 0; i < bytes.length; i++) {
170 int b = bytes[i];
171 if (b == '+') {
172 buffer.write(' ');
173 } else if (b == ESCAPE_CHAR) {
174 try {
175 int u = Utils.digit16(bytes[++i]);
176 int l = Utils.digit16(bytes[++i]);
177 buffer.write((char) ((u << 4) + l));
178 } catch (ArrayIndexOutOfBoundsException e) {
179 throw new DecoderException("Invalid URL encoding: ", e);
180 }
181 } else {
182 buffer.write(b);
183 }
184 }
185 return buffer.toByteArray();
186 }
187
188 /**
189 * Encodes an array of bytes into an array of URL safe 7-bit
190 * characters. Unsafe characters are escaped.
191 *
192 * @param bytes array of bytes to convert to URL safe characters
193 * @return array of bytes containing URL safe characters
194 */
195 public byte[] encode(byte[] bytes) {
196 return encodeUrl(WWW_FORM_URL, bytes);
197 }
198
199
200 /**
201 * Decodes an array of URL safe 7-bit characters into an array of
202 * original bytes. Escaped characters are converted back to their
203 * original representation.
204 *
205 * @param bytes array of URL safe characters
206 * @return array of original bytes
207 * @throws DecoderException Thrown if URL decoding is unsuccessful
208 */
209 public byte[] decode(byte[] bytes) throws DecoderException {
210 return decodeUrl(bytes);
211 }
212
213 /**
214 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
215 *
216 * @param pString
217 * string to convert to a URL safe form
218 * @param charset
219 * the charset for pString
220 * @return URL safe string
221 * @throws UnsupportedEncodingException
222 * Thrown if charset is not supported
223 */
224 public String encode(String pString, String charset) throws UnsupportedEncodingException {
225 if (pString == null) {
226 return null;
227 }
228 return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
229 }
230
231 /**
232 * Encodes a string into its URL safe form using the default string
233 * charset. Unsafe characters are escaped.
234 *
235 * @param pString string to convert to a URL safe form
236 * @return URL safe string
237 * @throws EncoderException Thrown if URL encoding is unsuccessful
238 *
239 * @see #getDefaultCharset()
240 */
241 public String encode(String pString) throws EncoderException {
242 if (pString == null) {
243 return null;
244 }
245 try {
246 return encode(pString, getDefaultCharset());
247 } catch (UnsupportedEncodingException e) {
248 throw new EncoderException(e.getMessage(), e);
249 }
250 }
251
252
253 /**
254 * Decodes a URL safe string into its original form using the
255 * specified encoding. Escaped characters are converted back
256 * to their original representation.
257 *
258 * @param pString URL safe string to convert into its original form
259 * @param charset the original string charset
260 * @return original string
261 * @throws DecoderException Thrown if URL decoding is unsuccessful
262 * @throws UnsupportedEncodingException Thrown if charset is not
263 * supported
264 */
265 public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
266 if (pString == null) {
267 return null;
268 }
269 return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
270 }
271
272 /**
273 * Decodes a URL safe string into its original form using the default
274 * string charset. Escaped characters are converted back to their
275 * original representation.
276 *
277 * @param pString URL safe string to convert into its original form
278 * @return original string
279 * @throws DecoderException Thrown if URL decoding is unsuccessful
280 *
281 * @see #getDefaultCharset()
282 */
283 public String decode(String pString) throws DecoderException {
284 if (pString == null) {
285 return null;
286 }
287 try {
288 return decode(pString, getDefaultCharset());
289 } catch (UnsupportedEncodingException e) {
290 throw new DecoderException(e.getMessage(), e);
291 }
292 }
293
294 /**
295 * Encodes an object into its URL safe form. Unsafe characters are
296 * escaped.
297 *
298 * @param pObject string to convert to a URL safe form
299 * @return URL safe object
300 * @throws EncoderException Thrown if URL encoding is not
301 * applicable to objects of this type or
302 * if encoding is unsuccessful
303 */
304 public Object encode(Object pObject) throws EncoderException {
305 if (pObject == null) {
306 return null;
307 } else if (pObject instanceof byte[]) {
308 return encode((byte[])pObject);
309 } else if (pObject instanceof String) {
310 return encode((String)pObject);
311 } else {
312 throw new EncoderException("Objects of type " +
313 pObject.getClass().getName() + " cannot be URL encoded");
314
315 }
316 }
317
318 /**
319 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
320 * representation.
321 *
322 * @param pObject
323 * URL safe object to convert into its original form
324 * @return original object
325 * @throws DecoderException
326 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
327 * encountered during the decode process.
328 */
329 public Object decode(Object pObject) throws DecoderException {
330 if (pObject == null) {
331 return null;
332 } else if (pObject instanceof byte[]) {
333 return decode((byte[]) pObject);
334 } else if (pObject instanceof String) {
335 return decode((String) pObject);
336 } else {
337 throw new DecoderException("Objects of type " + pObject.getClass().getName() + " cannot be URL decoded");
338
339 }
340 }
341
342 /**
343 * The default charset used for string decoding and encoding.
344 *
345 * @return the default string charset.
346 */
347 public String getDefaultCharset() {
348 return this.charset;
349 }
350
351 /**
352 * The <code>String</code> encoding used for decoding and encoding.
353 *
354 * @return Returns the encoding.
355 *
356 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
357 */
358 public String getEncoding() {
359 return this.charset;
360 }
361
362 }