001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.nio.ByteBuffer; 021import java.util.BitSet; 022import org.apache.commons.codec.BinaryDecoder; 023import org.apache.commons.codec.BinaryEncoder; 024import org.apache.commons.codec.DecoderException; 025import org.apache.commons.codec.EncoderException; 026 027/** 028 * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of 029 * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts 030 * of the URI. 031 * <p> 032 * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public 033 * interface only call the access 034 * </p> 035 * 036 * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a> 037 * @since 1.12 038 */ 039public class PercentCodec implements BinaryEncoder, BinaryDecoder { 040 041 /** 042 * The escape character used by the Percent-Encoding in order to introduce an encoded character. 043 */ 044 045 private static final byte ESCAPE_CHAR = '%'; 046 047 /** 048 * The bit set used to store the character that should be always encoded 049 */ 050 private final BitSet alwaysEncodeChars = new BitSet(); 051 052 /** 053 * The flag defining if the space character should be encoded as '+' 054 */ 055 private final boolean plusForSpace; 056 057 /** 058 * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups 059 */ 060 private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; 061 062 /** 063 * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding 064 * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape 065 * character for Percent-Encoding. 066 */ 067 public PercentCodec() { 068 this.plusForSpace = false; 069 insertAlwaysEncodeChar(ESCAPE_CHAR); 070 } 071 072 /** 073 * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should 074 * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that 075 * is used as escape character for Percent-Encoding. 076 * 077 * @param alwaysEncodeChars the unsafe characters that should always be encoded 078 * @param plusForSpace the flag defining if the space character should be encoded as '+' 079 */ 080 public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { 081 this.plusForSpace = plusForSpace; 082 insertAlwaysEncodeChars(alwaysEncodeChars); 083 } 084 085 /** 086 * Adds the byte array into a BitSet for faster lookup 087 * 088 * @param alwaysEncodeCharsArray 089 */ 090 private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { 091 if (alwaysEncodeCharsArray != null) { 092 for (final byte b : alwaysEncodeCharsArray) { 093 insertAlwaysEncodeChar(b); 094 } 095 } 096 insertAlwaysEncodeChar(ESCAPE_CHAR); 097 } 098 099 /** 100 * Inserts a single character into a BitSet and maintains the min and max of the characters of the 101 * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. 102 * 103 * @param b the byte that is candidate for min and max limit 104 */ 105 private void insertAlwaysEncodeChar(final byte b) { 106 this.alwaysEncodeChars.set(b); 107 if (b < alwaysEncodeCharsMin) { 108 alwaysEncodeCharsMin = b; 109 } 110 if (b > alwaysEncodeCharsMax) { 111 alwaysEncodeCharsMax = b; 112 } 113 } 114 115 /** 116 * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the 117 * US-ASCII characters that are configured to be always encoded. 118 */ 119 @Override 120 public byte[] encode(final byte[] bytes) throws EncoderException { 121 if (bytes == null) { 122 return null; 123 } 124 125 final int expectedEncodingBytes = expectedEncodingBytes(bytes); 126 final boolean willEncode = expectedEncodingBytes != bytes.length; 127 if (willEncode || (plusForSpace && containsSpace(bytes))) { 128 return doEncode(bytes, expectedEncodingBytes, willEncode); 129 } 130 return bytes; 131 } 132 133 private byte[] doEncode(final byte[] bytes, final int expectedLength, final boolean willEncode) { 134 final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); 135 for (final byte b : bytes) { 136 if (willEncode && canEncode(b)) { 137 byte bb = b; 138 if (bb < 0) { 139 bb = (byte) (256 + bb); 140 } 141 final char hex1 = Utils.hexDigit(bb >> 4); 142 final char hex2 = Utils.hexDigit(bb); 143 buffer.put(ESCAPE_CHAR); 144 buffer.put((byte) hex1); 145 buffer.put((byte) hex2); 146 } else { 147 if (plusForSpace && b == ' ') { 148 buffer.put((byte) '+'); 149 } else { 150 buffer.put(b); 151 } 152 } 153 } 154 return buffer.array(); 155 } 156 157 private int expectedEncodingBytes(final byte[] bytes) { 158 int byteCount = 0; 159 for (final byte b : bytes) { 160 byteCount += canEncode(b) ? 3: 1; 161 } 162 return byteCount; 163 } 164 165 private boolean containsSpace(final byte[] bytes) { 166 for (final byte b : bytes) { 167 if (b == ' ') { 168 return true; 169 } 170 } 171 return false; 172 } 173 174 private boolean canEncode(final byte c) { 175 return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c)); 176 } 177 178 private boolean inAlwaysEncodeCharsRange(final byte c) { 179 return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; 180 } 181 182 private boolean isAsciiChar(final byte c) { 183 return c >= 0; 184 } 185 186 /** 187 * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to 188 * decode the encoded characters to Unicode. 189 */ 190 @Override 191 public byte[] decode(final byte[] bytes) throws DecoderException { 192 if (bytes == null) { 193 return null; 194 } 195 196 final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); 197 for (int i = 0; i < bytes.length; i++) { 198 final byte b = bytes[i]; 199 if (b == ESCAPE_CHAR) { 200 try { 201 final int u = Utils.digit16(bytes[++i]); 202 final int l = Utils.digit16(bytes[++i]); 203 buffer.put((byte) ((u << 4) + l)); 204 } catch (final ArrayIndexOutOfBoundsException e) { 205 throw new DecoderException("Invalid percent decoding: ", e); 206 } 207 } else { 208 if (plusForSpace && b == '+') { 209 buffer.put((byte) ' '); 210 } else { 211 buffer.put(b); 212 } 213 } 214 } 215 return buffer.array(); 216 } 217 218 private int expectedDecodingBytes(final byte[] bytes) { 219 int byteCount = 0; 220 for (int i = 0; i < bytes.length; ) { 221 final byte b = bytes[i]; 222 i += b == ESCAPE_CHAR ? 3: 1; 223 byteCount++; 224 } 225 return byteCount; 226 } 227 228 /** 229 * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. 230 * 231 * @param obj the object to encode 232 * @return the encoding result byte[] as Object 233 * @throws EncoderException if the object is not a byte array 234 */ 235 @Override 236 public Object encode(final Object obj) throws EncoderException { 237 if (obj == null) { 238 return null; 239 } else if (obj instanceof byte[]) { 240 return encode((byte[]) obj); 241 } else { 242 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); 243 } 244 } 245 246 /** 247 * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. 248 * 249 * @param obj the object to decode 250 * @return the decoding result byte[] as Object 251 * @throws DecoderException if the object is not a byte array 252 */ 253 @Override 254 public Object decode(final Object obj) throws DecoderException { 255 if (obj == null) { 256 return null; 257 } else if (obj instanceof byte[]) { 258 return decode((byte[]) obj); 259 } else { 260 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); 261 } 262 } 263}