001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.ByteArrayOutputStream; 021import java.io.UnsupportedEncodingException; 022import java.util.BitSet; 023 024import org.apache.commons.codec.BinaryDecoder; 025import org.apache.commons.codec.BinaryEncoder; 026import org.apache.commons.codec.CharEncoding; 027import org.apache.commons.codec.DecoderException; 028import org.apache.commons.codec.EncoderException; 029import org.apache.commons.codec.StringDecoder; 030import org.apache.commons.codec.StringEncoder; 031import org.apache.commons.codec.binary.StringUtils; 032 033/** 034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. 035 * <p> 036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and 037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below 038 * 1.4 rely on the platform's default charset encoding. 039 * <p> 040 * This class is thread-safe since 1.11 041 * 042 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> 043 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> 044 * 045 * @since 1.2 046 * @version $Id$ 047 */ 048public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 049 050 /** 051 * The default charset used for string decoding and encoding. 052 * 053 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) 054 */ 055 @Deprecated 056 protected volatile String charset; // added volatile: see CODEC-232 057 058 /** 059 * Release 1.5 made this field final. 060 */ 061 protected static final byte ESCAPE_CHAR = '%'; 062 063 /** 064 * BitSet of www-form-url safe characters. 065 * This is a copy of the internal BitSet which is now used for the conversion. 066 * Changes to this field are ignored. 067 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230) 068 */ 069 @Deprecated 070 protected static final BitSet WWW_FORM_URL; 071 072 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); 073 074 // Static initializer for www_form_url 075 static { 076 // alpha characters 077 for (int i = 'a'; i <= 'z'; i++) { 078 WWW_FORM_URL_SAFE.set(i); 079 } 080 for (int i = 'A'; i <= 'Z'; i++) { 081 WWW_FORM_URL_SAFE.set(i); 082 } 083 // numeric characters 084 for (int i = '0'; i <= '9'; i++) { 085 WWW_FORM_URL_SAFE.set(i); 086 } 087 // special chars 088 WWW_FORM_URL_SAFE.set('-'); 089 WWW_FORM_URL_SAFE.set('_'); 090 WWW_FORM_URL_SAFE.set('.'); 091 WWW_FORM_URL_SAFE.set('*'); 092 // blank to be replaced with + 093 WWW_FORM_URL_SAFE.set(' '); 094 095 // Create a copy in case anyone (ab)uses it 096 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); 097 } 098 099 100 /** 101 * Default constructor. 102 */ 103 public URLCodec() { 104 this(CharEncoding.UTF_8); 105 } 106 107 /** 108 * Constructor which allows for the selection of a default charset. 109 * 110 * @param charset the default string charset to use. 111 */ 112 public URLCodec(final String charset) { 113 super(); 114 this.charset = charset; 115 } 116 117 /** 118 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 119 * 120 * @param urlsafe 121 * bitset of characters deemed URL safe 122 * @param bytes 123 * array of bytes to convert to URL safe characters 124 * @return array of bytes containing URL safe characters 125 */ 126 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { 127 if (bytes == null) { 128 return null; 129 } 130 if (urlsafe == null) { 131 urlsafe = WWW_FORM_URL_SAFE; 132 } 133 134 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 135 for (final byte c : bytes) { 136 int b = c; 137 if (b < 0) { 138 b = 256 + b; 139 } 140 if (urlsafe.get(b)) { 141 if (b == ' ') { 142 b = '+'; 143 } 144 buffer.write(b); 145 } else { 146 buffer.write(ESCAPE_CHAR); 147 final char hex1 = Utils.hexDigit(b >> 4); 148 final char hex2 = Utils.hexDigit(b); 149 buffer.write(hex1); 150 buffer.write(hex2); 151 } 152 } 153 return buffer.toByteArray(); 154 } 155 156 /** 157 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 158 * back to their original representation. 159 * 160 * @param bytes 161 * array of URL safe characters 162 * @return array of original bytes 163 * @throws DecoderException 164 * Thrown if URL decoding is unsuccessful 165 */ 166 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { 167 if (bytes == null) { 168 return null; 169 } 170 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 171 for (int i = 0; i < bytes.length; i++) { 172 final int b = bytes[i]; 173 if (b == '+') { 174 buffer.write(' '); 175 } else if (b == ESCAPE_CHAR) { 176 try { 177 final int u = Utils.digit16(bytes[++i]); 178 final int l = Utils.digit16(bytes[++i]); 179 buffer.write((char) ((u << 4) + l)); 180 } catch (final ArrayIndexOutOfBoundsException e) { 181 throw new DecoderException("Invalid URL encoding: ", e); 182 } 183 } else { 184 buffer.write(b); 185 } 186 } 187 return buffer.toByteArray(); 188 } 189 190 /** 191 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 192 * 193 * @param bytes 194 * array of bytes to convert to URL safe characters 195 * @return array of bytes containing URL safe characters 196 */ 197 @Override 198 public byte[] encode(final byte[] bytes) { 199 return encodeUrl(WWW_FORM_URL_SAFE, bytes); 200 } 201 202 203 /** 204 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 205 * back to their original representation. 206 * 207 * @param bytes 208 * array of URL safe characters 209 * @return array of original bytes 210 * @throws DecoderException 211 * Thrown if URL decoding is unsuccessful 212 */ 213 @Override 214 public byte[] decode(final byte[] bytes) throws DecoderException { 215 return decodeUrl(bytes); 216 } 217 218 /** 219 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 220 * 221 * @param str 222 * string to convert to a URL safe form 223 * @param charsetName 224 * the charset for str 225 * @return URL safe string 226 * @throws UnsupportedEncodingException 227 * Thrown if charset is not supported 228 */ 229 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException { 230 if (str == null) { 231 return null; 232 } 233 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName))); 234 } 235 236 /** 237 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. 238 * 239 * @param str 240 * string to convert to a URL safe form 241 * @return URL safe string 242 * @throws EncoderException 243 * Thrown if URL encoding is unsuccessful 244 * 245 * @see #getDefaultCharset() 246 */ 247 @Override 248 public String encode(final String str) throws EncoderException { 249 if (str == null) { 250 return null; 251 } 252 try { 253 return encode(str, getDefaultCharset()); 254 } catch (final UnsupportedEncodingException e) { 255 throw new EncoderException(e.getMessage(), e); 256 } 257 } 258 259 260 /** 261 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted 262 * back to their original representation. 263 * 264 * @param str 265 * URL safe string to convert into its original form 266 * @param charsetName 267 * the original string charset 268 * @return original string 269 * @throws DecoderException 270 * Thrown if URL decoding is unsuccessful 271 * @throws UnsupportedEncodingException 272 * Thrown if charset is not supported 273 */ 274 public String decode(final String str, final String charsetName) 275 throws DecoderException, UnsupportedEncodingException { 276 if (str == null) { 277 return null; 278 } 279 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName); 280 } 281 282 /** 283 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are 284 * converted back to their original representation. 285 * 286 * @param str 287 * URL safe string to convert into its original form 288 * @return original string 289 * @throws DecoderException 290 * Thrown if URL decoding is unsuccessful 291 * @see #getDefaultCharset() 292 */ 293 @Override 294 public String decode(final String str) throws DecoderException { 295 if (str == null) { 296 return null; 297 } 298 try { 299 return decode(str, getDefaultCharset()); 300 } catch (final UnsupportedEncodingException e) { 301 throw new DecoderException(e.getMessage(), e); 302 } 303 } 304 305 /** 306 * Encodes an object into its URL safe form. Unsafe characters are escaped. 307 * 308 * @param obj 309 * string to convert to a URL safe form 310 * @return URL safe object 311 * @throws EncoderException 312 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful 313 */ 314 @Override 315 public Object encode(final Object obj) throws EncoderException { 316 if (obj == null) { 317 return null; 318 } else if (obj instanceof byte[]) { 319 return encode((byte[])obj); 320 } else if (obj instanceof String) { 321 return encode((String)obj); 322 } else { 323 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); 324 325 } 326 } 327 328 /** 329 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 330 * representation. 331 * 332 * @param obj 333 * URL safe object to convert into its original form 334 * @return original object 335 * @throws DecoderException 336 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure 337 * condition is encountered during the decode process. 338 */ 339 @Override 340 public Object decode(final Object obj) throws DecoderException { 341 if (obj == null) { 342 return null; 343 } else if (obj instanceof byte[]) { 344 return decode((byte[]) obj); 345 } else if (obj instanceof String) { 346 return decode((String) obj); 347 } else { 348 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); 349 350 } 351 } 352 353 /** 354 * The default charset used for string decoding and encoding. 355 * 356 * @return the default string charset. 357 */ 358 public String getDefaultCharset() { 359 return this.charset; 360 } 361 362 /** 363 * The <code>String</code> encoding used for decoding and encoding. 364 * 365 * @return Returns the encoding. 366 * 367 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. 368 */ 369 @Deprecated 370 public String getEncoding() { 371 return this.charset; 372 } 373 374}