1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.util.BitSet;
23
24 import org.apache.commons.codec.BinaryDecoder;
25 import org.apache.commons.codec.BinaryEncoder;
26 import org.apache.commons.codec.CharEncoding;
27 import org.apache.commons.codec.DecoderException;
28 import org.apache.commons.codec.EncoderException;
29 import org.apache.commons.codec.StringDecoder;
30 import org.apache.commons.codec.StringEncoder;
31 import org.apache.commons.codec.binary.StringUtils;
32
33 /**
34 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
35 * <p>
36 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and
37 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below
38 * 1.4 rely on the platform's default charset encoding.
39 * <p>
40 * This class is thread-safe since 1.11
41 *
42 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a>
43 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a>
44 *
45 * @since 1.2
46 * @version $Id: URLCodec.java 1789142 2017-03-28 13:58:58Z sebb $
47 */
48 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
49
50 /**
51 * The default charset used for string decoding and encoding.
52 *
53 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126)
54 */
55 @Deprecated
56 protected volatile String charset; // added volatile: see CODEC-232
57
58 /**
59 * Release 1.5 made this field final.
60 */
61 protected static final byte ESCAPE_CHAR = '%';
62
63 /**
64 * BitSet of www-form-url safe characters.
65 * This is a copy of the internal BitSet which is now used for the conversion.
66 * Changes to this field are ignored.
67 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230)
68 */
69 @Deprecated
70 protected static final BitSet WWW_FORM_URL;
71
72 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256);
73
74 // Static initializer for www_form_url
75 static {
76 // alpha characters
77 for (int i = 'a'; i <= 'z'; i++) {
78 WWW_FORM_URL_SAFE.set(i);
79 }
80 for (int i = 'A'; i <= 'Z'; i++) {
81 WWW_FORM_URL_SAFE.set(i);
82 }
83 // numeric characters
84 for (int i = '0'; i <= '9'; i++) {
85 WWW_FORM_URL_SAFE.set(i);
86 }
87 // special chars
88 WWW_FORM_URL_SAFE.set('-');
89 WWW_FORM_URL_SAFE.set('_');
90 WWW_FORM_URL_SAFE.set('.');
91 WWW_FORM_URL_SAFE.set('*');
92 // blank to be replaced with +
93 WWW_FORM_URL_SAFE.set(' ');
94
95 // Create a copy in case anyone (ab)uses it
96 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone();
97 }
98
99
100 /**
101 * Default constructor.
102 */
103 public URLCodec() {
104 this(CharEncoding.UTF_8);
105 }
106
107 /**
108 * Constructor which allows for the selection of a default charset.
109 *
110 * @param charset the default string charset to use.
111 */
112 public URLCodec(final String charset) {
113 super();
114 this.charset = charset;
115 }
116
117 /**
118 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
119 *
120 * @param urlsafe
121 * bitset of characters deemed URL safe
122 * @param bytes
123 * array of bytes to convert to URL safe characters
124 * @return array of bytes containing URL safe characters
125 */
126 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
127 if (bytes == null) {
128 return null;
129 }
130 if (urlsafe == null) {
131 urlsafe = WWW_FORM_URL_SAFE;
132 }
133
134 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
135 for (final byte c : bytes) {
136 int b = c;
137 if (b < 0) {
138 b = 256 + b;
139 }
140 if (urlsafe.get(b)) {
141 if (b == ' ') {
142 b = '+';
143 }
144 buffer.write(b);
145 } else {
146 buffer.write(ESCAPE_CHAR);
147 final char hex1 = Utils.hexDigit(b >> 4);
148 final char hex2 = Utils.hexDigit(b);
149 buffer.write(hex1);
150 buffer.write(hex2);
151 }
152 }
153 return buffer.toByteArray();
154 }
155
156 /**
157 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
158 * back to their original representation.
159 *
160 * @param bytes
161 * array of URL safe characters
162 * @return array of original bytes
163 * @throws DecoderException
164 * Thrown if URL decoding is unsuccessful
165 */
166 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException {
167 if (bytes == null) {
168 return null;
169 }
170 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
171 for (int i = 0; i < bytes.length; i++) {
172 final int b = bytes[i];
173 if (b == '+') {
174 buffer.write(' ');
175 } else if (b == ESCAPE_CHAR) {
176 try {
177 final int u = Utils.digit16(bytes[++i]);
178 final int l = Utils.digit16(bytes[++i]);
179 buffer.write((char) ((u << 4) + l));
180 } catch (final ArrayIndexOutOfBoundsException e) {
181 throw new DecoderException("Invalid URL encoding: ", e);
182 }
183 } else {
184 buffer.write(b);
185 }
186 }
187 return buffer.toByteArray();
188 }
189
190 /**
191 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
192 *
193 * @param bytes
194 * array of bytes to convert to URL safe characters
195 * @return array of bytes containing URL safe characters
196 */
197 @Override
198 public byte[] encode(final byte[] bytes) {
199 return encodeUrl(WWW_FORM_URL_SAFE, bytes);
200 }
201
202
203 /**
204 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
205 * back to their original representation.
206 *
207 * @param bytes
208 * array of URL safe characters
209 * @return array of original bytes
210 * @throws DecoderException
211 * Thrown if URL decoding is unsuccessful
212 */
213 @Override
214 public byte[] decode(final byte[] bytes) throws DecoderException {
215 return decodeUrl(bytes);
216 }
217
218 /**
219 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
220 *
221 * @param str
222 * string to convert to a URL safe form
223 * @param charset
224 * the charset for str
225 * @return URL safe string
226 * @throws UnsupportedEncodingException
227 * Thrown if charset is not supported
228 */
229 public String encode(final String str, final String charset) throws UnsupportedEncodingException {
230 if (str == null) {
231 return null;
232 }
233 return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
234 }
235
236 /**
237 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped.
238 *
239 * @param str
240 * string to convert to a URL safe form
241 * @return URL safe string
242 * @throws EncoderException
243 * Thrown if URL encoding is unsuccessful
244 *
245 * @see #getDefaultCharset()
246 */
247 @Override
248 public String encode(final String str) throws EncoderException {
249 if (str == null) {
250 return null;
251 }
252 try {
253 return encode(str, getDefaultCharset());
254 } catch (final UnsupportedEncodingException e) {
255 throw new EncoderException(e.getMessage(), e);
256 }
257 }
258
259
260 /**
261 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted
262 * back to their original representation.
263 *
264 * @param str
265 * URL safe string to convert into its original form
266 * @param charset
267 * the original string charset
268 * @return original string
269 * @throws DecoderException
270 * Thrown if URL decoding is unsuccessful
271 * @throws UnsupportedEncodingException
272 * Thrown if charset is not supported
273 */
274 public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
275 if (str == null) {
276 return null;
277 }
278 return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
279 }
280
281 /**
282 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are
283 * converted back to their original representation.
284 *
285 * @param str
286 * URL safe string to convert into its original form
287 * @return original string
288 * @throws DecoderException
289 * Thrown if URL decoding is unsuccessful
290 * @see #getDefaultCharset()
291 */
292 @Override
293 public String decode(final String str) throws DecoderException {
294 if (str == null) {
295 return null;
296 }
297 try {
298 return decode(str, getDefaultCharset());
299 } catch (final UnsupportedEncodingException e) {
300 throw new DecoderException(e.getMessage(), e);
301 }
302 }
303
304 /**
305 * Encodes an object into its URL safe form. Unsafe characters are escaped.
306 *
307 * @param obj
308 * string to convert to a URL safe form
309 * @return URL safe object
310 * @throws EncoderException
311 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful
312 */
313 @Override
314 public Object encode(final Object obj) throws EncoderException {
315 if (obj == null) {
316 return null;
317 } else if (obj instanceof byte[]) {
318 return encode((byte[])obj);
319 } else if (obj instanceof String) {
320 return encode((String)obj);
321 } else {
322 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded");
323
324 }
325 }
326
327 /**
328 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
329 * representation.
330 *
331 * @param obj
332 * URL safe object to convert into its original form
333 * @return original object
334 * @throws DecoderException
335 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
336 * condition is encountered during the decode process.
337 */
338 @Override
339 public Object decode(final Object obj) throws DecoderException {
340 if (obj == null) {
341 return null;
342 } else if (obj instanceof byte[]) {
343 return decode((byte[]) obj);
344 } else if (obj instanceof String) {
345 return decode((String) obj);
346 } else {
347 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded");
348
349 }
350 }
351
352 /**
353 * The default charset used for string decoding and encoding.
354 *
355 * @return the default string charset.
356 */
357 public String getDefaultCharset() {
358 return this.charset;
359 }
360
361 /**
362 * The <code>String</code> encoding used for decoding and encoding.
363 *
364 * @return Returns the encoding.
365 *
366 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
367 */
368 @Deprecated
369 public String getEncoding() {
370 return this.charset;
371 }
372
373 }