001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.nio.ByteBuffer; 021import java.util.BitSet; 022 023import org.apache.commons.codec.BinaryDecoder; 024import org.apache.commons.codec.BinaryEncoder; 025import org.apache.commons.codec.DecoderException; 026import org.apache.commons.codec.EncoderException; 027 028/** 029 * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of 030 * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts 031 * of the URI. 032 * <p> 033 * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public 034 * interface only call the access 035 * </p> 036 * 037 * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a> 038 * @since 1.12 039 */ 040public class PercentCodec implements BinaryEncoder, BinaryDecoder { 041 042 /** 043 * The escape character used by the Percent-Encoding in order to introduce an encoded character. 044 */ 045 046 private static final byte ESCAPE_CHAR = '%'; 047 048 /** 049 * The bit set used to store the character that should be always encoded 050 */ 051 private final BitSet alwaysEncodeChars = new BitSet(); 052 053 /** 054 * The flag defining if the space character should be encoded as '+' 055 */ 056 private final boolean plusForSpace; 057 058 /** 059 * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups 060 */ 061 private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; 062 063 /** 064 * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding 065 * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape 066 * character for Percent-Encoding. 067 */ 068 public PercentCodec() { 069 this.plusForSpace = false; 070 insertAlwaysEncodeChar(ESCAPE_CHAR); 071 } 072 073 /** 074 * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should 075 * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that 076 * is used as escape character for Percent-Encoding. 077 * 078 * @param alwaysEncodeChars the unsafe characters that should always be encoded 079 * @param plusForSpace the flag defining if the space character should be encoded as '+' 080 */ 081 public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { 082 this.plusForSpace = plusForSpace; 083 insertAlwaysEncodeChars(alwaysEncodeChars); 084 } 085 086 private boolean canEncode(final byte c) { 087 return !isAsciiChar(c) || inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c); 088 } 089 090 private boolean containsSpace(final byte[] bytes) { 091 for (final byte b : bytes) { 092 if (b == ' ') { 093 return true; 094 } 095 } 096 return false; 097 } 098 099 /** 100 * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to 101 * decode the encoded characters to Unicode. 102 */ 103 @Override 104 public byte[] decode(final byte[] bytes) throws DecoderException { 105 if (bytes == null) { 106 return null; 107 } 108 109 final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); 110 for (int i = 0; i < bytes.length; i++) { 111 final byte b = bytes[i]; 112 if (b == ESCAPE_CHAR) { 113 try { 114 final int u = Utils.digit16(bytes[++i]); 115 final int l = Utils.digit16(bytes[++i]); 116 buffer.put((byte) ((u << 4) + l)); 117 } catch (final ArrayIndexOutOfBoundsException e) { 118 throw new DecoderException("Invalid percent decoding: ", e); 119 } 120 } else if (plusForSpace && b == '+') { 121 buffer.put((byte) ' '); 122 } else { 123 buffer.put(b); 124 } 125 } 126 return buffer.array(); 127 } 128 129 /** 130 * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. 131 * 132 * @param obj the object to decode 133 * @return the decoding result byte[] as Object 134 * @throws DecoderException if the object is not a byte array 135 */ 136 @Override 137 public Object decode(final Object obj) throws DecoderException { 138 if (obj == null) { 139 return null; 140 } 141 if (obj instanceof byte[]) { 142 return decode((byte[]) obj); 143 } 144 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); 145 } 146 147 private byte[] doEncode(final byte[] bytes, final int expectedLength, final boolean willEncode) { 148 final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); 149 for (final byte b : bytes) { 150 if (willEncode && canEncode(b)) { 151 byte bb = b; 152 if (bb < 0) { 153 bb = (byte) (256 + bb); 154 } 155 final char hex1 = Utils.hexDigit(bb >> 4); 156 final char hex2 = Utils.hexDigit(bb); 157 buffer.put(ESCAPE_CHAR); 158 buffer.put((byte) hex1); 159 buffer.put((byte) hex2); 160 } else if (plusForSpace && b == ' ') { 161 buffer.put((byte) '+'); 162 } else { 163 buffer.put(b); 164 } 165 } 166 return buffer.array(); 167 } 168 169 /** 170 * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the 171 * US-ASCII characters that are configured to be always encoded. 172 */ 173 @Override 174 public byte[] encode(final byte[] bytes) throws EncoderException { 175 if (bytes == null) { 176 return null; 177 } 178 179 final int expectedEncodingBytes = expectedEncodingBytes(bytes); 180 final boolean willEncode = expectedEncodingBytes != bytes.length; 181 if (willEncode || plusForSpace && containsSpace(bytes)) { 182 return doEncode(bytes, expectedEncodingBytes, willEncode); 183 } 184 return bytes; 185 } 186 187 /** 188 * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. 189 * 190 * @param obj the object to encode 191 * @return the encoding result byte[] as Object 192 * @throws EncoderException if the object is not a byte array 193 */ 194 @Override 195 public Object encode(final Object obj) throws EncoderException { 196 if (obj == null) { 197 return null; 198 } 199 if (obj instanceof byte[]) { 200 return encode((byte[]) obj); 201 } 202 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); 203 } 204 205 private int expectedDecodingBytes(final byte[] bytes) { 206 int byteCount = 0; 207 for (int i = 0; i < bytes.length; ) { 208 final byte b = bytes[i]; 209 i += b == ESCAPE_CHAR ? 3: 1; 210 byteCount++; 211 } 212 return byteCount; 213 } 214 215 private int expectedEncodingBytes(final byte[] bytes) { 216 int byteCount = 0; 217 for (final byte b : bytes) { 218 byteCount += canEncode(b) ? 3: 1; 219 } 220 return byteCount; 221 } 222 223 private boolean inAlwaysEncodeCharsRange(final byte c) { 224 return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; 225 } 226 227 /** 228 * Inserts a single character into a BitSet and maintains the min and max of the characters of the 229 * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. 230 * 231 * @param b the byte that is candidate for min and max limit 232 */ 233 private void insertAlwaysEncodeChar(final byte b) { 234 if (b < 0) { 235 throw new IllegalArgumentException("byte must be >= 0"); 236 } 237 this.alwaysEncodeChars.set(b); 238 if (b < alwaysEncodeCharsMin) { 239 alwaysEncodeCharsMin = b; 240 } 241 if (b > alwaysEncodeCharsMax) { 242 alwaysEncodeCharsMax = b; 243 } 244 } 245 246 /** 247 * Adds the byte array into a BitSet for faster lookup 248 * 249 * @param alwaysEncodeCharsArray 250 */ 251 private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { 252 if (alwaysEncodeCharsArray != null) { 253 for (final byte b : alwaysEncodeCharsArray) { 254 insertAlwaysEncodeChar(b); 255 } 256 } 257 insertAlwaysEncodeChar(ESCAPE_CHAR); 258 } 259 260 private boolean isAsciiChar(final byte c) { 261 return c >= 0; 262 } 263}