001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.ByteArrayOutputStream; 021import java.io.UnsupportedEncodingException; 022import java.util.BitSet; 023 024import org.apache.commons.codec.BinaryDecoder; 025import org.apache.commons.codec.BinaryEncoder; 026import org.apache.commons.codec.CharEncoding; 027import org.apache.commons.codec.DecoderException; 028import org.apache.commons.codec.EncoderException; 029import org.apache.commons.codec.StringDecoder; 030import org.apache.commons.codec.StringEncoder; 031import org.apache.commons.codec.binary.StringUtils; 032 033/** 034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. 035 * <p> 036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and 037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below 038 * 1.4 rely on the platform's default charset encoding. 039 * <p> 040 * This class is thread-safe since 1.11 041 * 042 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> 043 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> 044 * 045 * @since 1.2 046 */ 047public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 048 049 /** 050 * The default charset used for string decoding and encoding. 051 * 052 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) 053 */ 054 @Deprecated 055 protected volatile String charset; // added volatile: see CODEC-232 056 057 /** 058 * Release 1.5 made this field final. 059 */ 060 protected static final byte ESCAPE_CHAR = '%'; 061 062 /** 063 * BitSet of www-form-url safe characters. 064 * This is a copy of the internal BitSet which is now used for the conversion. 065 * Changes to this field are ignored. 066 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230) 067 */ 068 @Deprecated 069 protected static final BitSet WWW_FORM_URL; 070 071 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); 072 073 // Static initializer for www_form_url 074 static { 075 // alpha characters 076 for (int i = 'a'; i <= 'z'; i++) { 077 WWW_FORM_URL_SAFE.set(i); 078 } 079 for (int i = 'A'; i <= 'Z'; i++) { 080 WWW_FORM_URL_SAFE.set(i); 081 } 082 // numeric characters 083 for (int i = '0'; i <= '9'; i++) { 084 WWW_FORM_URL_SAFE.set(i); 085 } 086 // special chars 087 WWW_FORM_URL_SAFE.set('-'); 088 WWW_FORM_URL_SAFE.set('_'); 089 WWW_FORM_URL_SAFE.set('.'); 090 WWW_FORM_URL_SAFE.set('*'); 091 // blank to be replaced with + 092 WWW_FORM_URL_SAFE.set(' '); 093 094 // Create a copy in case anyone (ab)uses it 095 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); 096 } 097 098 099 /** 100 * Default constructor. 101 */ 102 public URLCodec() { 103 this(CharEncoding.UTF_8); 104 } 105 106 /** 107 * Constructor which allows for the selection of a default charset. 108 * 109 * @param charset the default string charset to use. 110 */ 111 public URLCodec(final String charset) { 112 super(); 113 this.charset = charset; 114 } 115 116 /** 117 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 118 * 119 * @param urlsafe 120 * bitset of characters deemed URL safe 121 * @param bytes 122 * array of bytes to convert to URL safe characters 123 * @return array of bytes containing URL safe characters 124 */ 125 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { 126 if (bytes == null) { 127 return null; 128 } 129 if (urlsafe == null) { 130 urlsafe = WWW_FORM_URL_SAFE; 131 } 132 133 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 134 for (final byte c : bytes) { 135 int b = c; 136 if (b < 0) { 137 b = 256 + b; 138 } 139 if (urlsafe.get(b)) { 140 if (b == ' ') { 141 b = '+'; 142 } 143 buffer.write(b); 144 } else { 145 buffer.write(ESCAPE_CHAR); 146 final char hex1 = Utils.hexDigit(b >> 4); 147 final char hex2 = Utils.hexDigit(b); 148 buffer.write(hex1); 149 buffer.write(hex2); 150 } 151 } 152 return buffer.toByteArray(); 153 } 154 155 /** 156 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 157 * back to their original representation. 158 * 159 * @param bytes 160 * array of URL safe characters 161 * @return array of original bytes 162 * @throws DecoderException 163 * Thrown if URL decoding is unsuccessful 164 */ 165 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { 166 if (bytes == null) { 167 return null; 168 } 169 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 170 for (int i = 0; i < bytes.length; i++) { 171 final int b = bytes[i]; 172 if (b == '+') { 173 buffer.write(' '); 174 } else if (b == ESCAPE_CHAR) { 175 try { 176 final int u = Utils.digit16(bytes[++i]); 177 final int l = Utils.digit16(bytes[++i]); 178 buffer.write((char) ((u << 4) + l)); 179 } catch (final ArrayIndexOutOfBoundsException e) { 180 throw new DecoderException("Invalid URL encoding: ", e); 181 } 182 } else { 183 buffer.write(b); 184 } 185 } 186 return buffer.toByteArray(); 187 } 188 189 /** 190 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 191 * 192 * @param bytes 193 * array of bytes to convert to URL safe characters 194 * @return array of bytes containing URL safe characters 195 */ 196 @Override 197 public byte[] encode(final byte[] bytes) { 198 return encodeUrl(WWW_FORM_URL_SAFE, bytes); 199 } 200 201 202 /** 203 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 204 * back to their original representation. 205 * 206 * @param bytes 207 * array of URL safe characters 208 * @return array of original bytes 209 * @throws DecoderException 210 * Thrown if URL decoding is unsuccessful 211 */ 212 @Override 213 public byte[] decode(final byte[] bytes) throws DecoderException { 214 return decodeUrl(bytes); 215 } 216 217 /** 218 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 219 * 220 * @param str 221 * string to convert to a URL safe form 222 * @param charsetName 223 * the charset for str 224 * @return URL safe string 225 * @throws UnsupportedEncodingException 226 * Thrown if charset is not supported 227 */ 228 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException { 229 if (str == null) { 230 return null; 231 } 232 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName))); 233 } 234 235 /** 236 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. 237 * 238 * @param str 239 * string to convert to a URL safe form 240 * @return URL safe string 241 * @throws EncoderException 242 * Thrown if URL encoding is unsuccessful 243 * 244 * @see #getDefaultCharset() 245 */ 246 @Override 247 public String encode(final String str) throws EncoderException { 248 if (str == null) { 249 return null; 250 } 251 try { 252 return encode(str, getDefaultCharset()); 253 } catch (final UnsupportedEncodingException e) { 254 throw new EncoderException(e.getMessage(), e); 255 } 256 } 257 258 259 /** 260 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted 261 * back to their original representation. 262 * 263 * @param str 264 * URL safe string to convert into its original form 265 * @param charsetName 266 * the original string charset 267 * @return original string 268 * @throws DecoderException 269 * Thrown if URL decoding is unsuccessful 270 * @throws UnsupportedEncodingException 271 * Thrown if charset is not supported 272 */ 273 public String decode(final String str, final String charsetName) 274 throws DecoderException, UnsupportedEncodingException { 275 if (str == null) { 276 return null; 277 } 278 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName); 279 } 280 281 /** 282 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are 283 * converted back to their original representation. 284 * 285 * @param str 286 * URL safe string to convert into its original form 287 * @return original string 288 * @throws DecoderException 289 * Thrown if URL decoding is unsuccessful 290 * @see #getDefaultCharset() 291 */ 292 @Override 293 public String decode(final String str) throws DecoderException { 294 if (str == null) { 295 return null; 296 } 297 try { 298 return decode(str, getDefaultCharset()); 299 } catch (final UnsupportedEncodingException e) { 300 throw new DecoderException(e.getMessage(), e); 301 } 302 } 303 304 /** 305 * Encodes an object into its URL safe form. Unsafe characters are escaped. 306 * 307 * @param obj 308 * string to convert to a URL safe form 309 * @return URL safe object 310 * @throws EncoderException 311 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful 312 */ 313 @Override 314 public Object encode(final Object obj) throws EncoderException { 315 if (obj == null) { 316 return null; 317 } else if (obj instanceof byte[]) { 318 return encode((byte[])obj); 319 } else if (obj instanceof String) { 320 return encode((String)obj); 321 } else { 322 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); 323 324 } 325 } 326 327 /** 328 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 329 * representation. 330 * 331 * @param obj 332 * URL safe object to convert into its original form 333 * @return original object 334 * @throws DecoderException 335 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure 336 * condition is encountered during the decode process. 337 */ 338 @Override 339 public Object decode(final Object obj) throws DecoderException { 340 if (obj == null) { 341 return null; 342 } else if (obj instanceof byte[]) { 343 return decode((byte[]) obj); 344 } else if (obj instanceof String) { 345 return decode((String) obj); 346 } else { 347 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); 348 349 } 350 } 351 352 /** 353 * The default charset used for string decoding and encoding. 354 * 355 * @return the default string charset. 356 */ 357 public String getDefaultCharset() { 358 return this.charset; 359 } 360 361 /** 362 * The <code>String</code> encoding used for decoding and encoding. 363 * 364 * @return Returns the encoding. 365 * 366 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. 367 */ 368 @Deprecated 369 public String getEncoding() { 370 return this.charset; 371 } 372 373}