001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.net; 019 020 import java.io.ByteArrayOutputStream; 021 import java.io.UnsupportedEncodingException; 022 import java.util.BitSet; 023 024 import org.apache.commons.codec.BinaryDecoder; 025 import org.apache.commons.codec.BinaryEncoder; 026 import org.apache.commons.codec.CharEncoding; 027 import org.apache.commons.codec.DecoderException; 028 import org.apache.commons.codec.EncoderException; 029 import org.apache.commons.codec.StringDecoder; 030 import org.apache.commons.codec.StringEncoder; 031 import org.apache.commons.codec.binary.StringUtils; 032 033 /** 034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. 035 * <p> 036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and 037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below 038 * 1.4 rely on the platform's default charset encoding. 039 * <p> 040 * This class is immutable and thread-safe. 041 * 042 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> 043 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a> 044 * 045 * @since 1.2 046 * @version $Id: URLCodec.html 889935 2013-12-11 05:05:13Z ggregory $ 047 */ 048 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 049 050 /** 051 * Radix used in encoding and decoding. 052 */ 053 static final int RADIX = 16; 054 055 /** 056 * The default charset used for string decoding and encoding. 057 * 058 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. 059 */ 060 @Deprecated 061 protected String charset; 062 063 /** 064 * Release 1.5 made this field final. 065 */ 066 protected static final byte ESCAPE_CHAR = '%'; 067 /** 068 * BitSet of www-form-url safe characters. 069 */ 070 protected static final BitSet WWW_FORM_URL = new BitSet(256); 071 072 // Static initializer for www_form_url 073 static { 074 // alpha characters 075 for (int i = 'a'; i <= 'z'; i++) { 076 WWW_FORM_URL.set(i); 077 } 078 for (int i = 'A'; i <= 'Z'; i++) { 079 WWW_FORM_URL.set(i); 080 } 081 // numeric characters 082 for (int i = '0'; i <= '9'; i++) { 083 WWW_FORM_URL.set(i); 084 } 085 // special chars 086 WWW_FORM_URL.set('-'); 087 WWW_FORM_URL.set('_'); 088 WWW_FORM_URL.set('.'); 089 WWW_FORM_URL.set('*'); 090 // blank to be replaced with + 091 WWW_FORM_URL.set(' '); 092 } 093 094 095 /** 096 * Default constructor. 097 */ 098 public URLCodec() { 099 this(CharEncoding.UTF_8); 100 } 101 102 /** 103 * Constructor which allows for the selection of a default charset. 104 * 105 * @param charset the default string charset to use. 106 */ 107 public URLCodec(final String charset) { 108 super(); 109 this.charset = charset; 110 } 111 112 /** 113 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 114 * 115 * @param urlsafe 116 * bitset of characters deemed URL safe 117 * @param bytes 118 * array of bytes to convert to URL safe characters 119 * @return array of bytes containing URL safe characters 120 */ 121 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { 122 if (bytes == null) { 123 return null; 124 } 125 if (urlsafe == null) { 126 urlsafe = WWW_FORM_URL; 127 } 128 129 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 130 for (final byte c : bytes) { 131 int b = c; 132 if (b < 0) { 133 b = 256 + b; 134 } 135 if (urlsafe.get(b)) { 136 if (b == ' ') { 137 b = '+'; 138 } 139 buffer.write(b); 140 } else { 141 buffer.write(ESCAPE_CHAR); 142 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); 143 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); 144 buffer.write(hex1); 145 buffer.write(hex2); 146 } 147 } 148 return buffer.toByteArray(); 149 } 150 151 /** 152 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 153 * back to their original representation. 154 * 155 * @param bytes 156 * array of URL safe characters 157 * @return array of original bytes 158 * @throws DecoderException 159 * Thrown if URL decoding is unsuccessful 160 */ 161 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { 162 if (bytes == null) { 163 return null; 164 } 165 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 166 for (int i = 0; i < bytes.length; i++) { 167 final int b = bytes[i]; 168 if (b == '+') { 169 buffer.write(' '); 170 } else if (b == ESCAPE_CHAR) { 171 try { 172 final int u = Utils.digit16(bytes[++i]); 173 final int l = Utils.digit16(bytes[++i]); 174 buffer.write((char) ((u << 4) + l)); 175 } catch (final ArrayIndexOutOfBoundsException e) { 176 throw new DecoderException("Invalid URL encoding: ", e); 177 } 178 } else { 179 buffer.write(b); 180 } 181 } 182 return buffer.toByteArray(); 183 } 184 185 /** 186 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 187 * 188 * @param bytes 189 * array of bytes to convert to URL safe characters 190 * @return array of bytes containing URL safe characters 191 */ 192 @Override 193 public byte[] encode(final byte[] bytes) { 194 return encodeUrl(WWW_FORM_URL, bytes); 195 } 196 197 198 /** 199 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 200 * back to their original representation. 201 * 202 * @param bytes 203 * array of URL safe characters 204 * @return array of original bytes 205 * @throws DecoderException 206 * Thrown if URL decoding is unsuccessful 207 */ 208 @Override 209 public byte[] decode(final byte[] bytes) throws DecoderException { 210 return decodeUrl(bytes); 211 } 212 213 /** 214 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 215 * 216 * @param str 217 * string to convert to a URL safe form 218 * @param charset 219 * the charset for str 220 * @return URL safe string 221 * @throws UnsupportedEncodingException 222 * Thrown if charset is not supported 223 */ 224 public String encode(final String str, final String charset) throws UnsupportedEncodingException { 225 if (str == null) { 226 return null; 227 } 228 return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); 229 } 230 231 /** 232 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. 233 * 234 * @param str 235 * string to convert to a URL safe form 236 * @return URL safe string 237 * @throws EncoderException 238 * Thrown if URL encoding is unsuccessful 239 * 240 * @see #getDefaultCharset() 241 */ 242 @Override 243 public String encode(final String str) throws EncoderException { 244 if (str == null) { 245 return null; 246 } 247 try { 248 return encode(str, getDefaultCharset()); 249 } catch (final UnsupportedEncodingException e) { 250 throw new EncoderException(e.getMessage(), e); 251 } 252 } 253 254 255 /** 256 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted 257 * back to their original representation. 258 * 259 * @param str 260 * URL safe string to convert into its original form 261 * @param charset 262 * the original string charset 263 * @return original string 264 * @throws DecoderException 265 * Thrown if URL decoding is unsuccessful 266 * @throws UnsupportedEncodingException 267 * Thrown if charset is not supported 268 */ 269 public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException { 270 if (str == null) { 271 return null; 272 } 273 return new String(decode(StringUtils.getBytesUsAscii(str)), charset); 274 } 275 276 /** 277 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are 278 * converted back to their original representation. 279 * 280 * @param str 281 * URL safe string to convert into its original form 282 * @return original string 283 * @throws DecoderException 284 * Thrown if URL decoding is unsuccessful 285 * @see #getDefaultCharset() 286 */ 287 @Override 288 public String decode(final String str) throws DecoderException { 289 if (str == null) { 290 return null; 291 } 292 try { 293 return decode(str, getDefaultCharset()); 294 } catch (final UnsupportedEncodingException e) { 295 throw new DecoderException(e.getMessage(), e); 296 } 297 } 298 299 /** 300 * Encodes an object into its URL safe form. Unsafe characters are escaped. 301 * 302 * @param obj 303 * string to convert to a URL safe form 304 * @return URL safe object 305 * @throws EncoderException 306 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful 307 */ 308 @Override 309 public Object encode(final Object obj) throws EncoderException { 310 if (obj == null) { 311 return null; 312 } else if (obj instanceof byte[]) { 313 return encode((byte[])obj); 314 } else if (obj instanceof String) { 315 return encode((String)obj); 316 } else { 317 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); 318 319 } 320 } 321 322 /** 323 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 324 * representation. 325 * 326 * @param obj 327 * URL safe object to convert into its original form 328 * @return original object 329 * @throws DecoderException 330 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure 331 * condition is encountered during the decode process. 332 */ 333 @Override 334 public Object decode(final Object obj) throws DecoderException { 335 if (obj == null) { 336 return null; 337 } else if (obj instanceof byte[]) { 338 return decode((byte[]) obj); 339 } else if (obj instanceof String) { 340 return decode((String) obj); 341 } else { 342 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); 343 344 } 345 } 346 347 /** 348 * The default charset used for string decoding and encoding. 349 * 350 * @return the default string charset. 351 */ 352 public String getDefaultCharset() { 353 return this.charset; 354 } 355 356 /** 357 * The <code>String</code> encoding used for decoding and encoding. 358 * 359 * @return Returns the encoding. 360 * 361 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. 362 */ 363 @Deprecated 364 public String getEncoding() { 365 return this.charset; 366 } 367 368 }