001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 /**
021 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
022 *
023 * <p>
024 * The class can be parameterized in the following manner with various constructors:
025 * <ul>
026 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
027 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of
028 * 8 in the encoded data.
029 * <li>Line separator: Default is CRLF ("\r\n")</li>
030 * </ul>
031 * </p>
032 * <p>
033 * This class operates directly on byte streams, and not character streams.
034 * </p>
035 * <p>
036 * This class is thread-safe.
037 * </p>
038 *
039 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
040 *
041 * @since 1.5
042 * @version $Id: Base32.html 889935 2013-12-11 05:05:13Z ggregory $
043 */
044 public class Base32 extends BaseNCodec {
045
046 /**
047 * BASE32 characters are 5 bits in length.
048 * They are formed by taking a block of five octets to form a 40-bit string,
049 * which is converted into eight BASE32 characters.
050 */
051 private static final int BITS_PER_ENCODED_BYTE = 5;
052 private static final int BYTES_PER_ENCODED_BLOCK = 8;
053 private static final int BYTES_PER_UNENCODED_BLOCK = 5;
054
055 /**
056 * Chunk separator per RFC 2045 section 2.1.
057 *
058 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
059 */
060 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
061
062 /**
063 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified
064 * in Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the Base32
065 * alphabet but fall within the bounds of the array are translated to -1.
066 */
067 private static final byte[] DECODE_TABLE = {
068 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
069 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
070 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f
072 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
073 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-N
074 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a O-Z
075 };
076
077 /**
078 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet"
079 * equivalents as specified in Table 3 of RFC 2045.
080 */
081 private static final byte[] ENCODE_TABLE = {
082 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
083 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
084 '2', '3', '4', '5', '6', '7',
085 };
086
087 /**
088 * This array is a lookup table that translates Unicode characters drawn from the "Base32 |Hex Alphabet" (as
089 * specified in Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the
090 * Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
091 */
092 private static final byte[] HEX_DECODE_TABLE = {
093 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
094 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
095 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
096 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f
097 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
098 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-N
099 25, 26, 27, 28, 29, 30, 31, 32, // 50-57 O-V
100 };
101
102 /**
103 * This array is a lookup table that translates 5-bit positive integer index values into their
104 * "Base32 Hex Alphabet" equivalents as specified in Table 3 of RFC 2045.
105 */
106 private static final byte[] HEX_ENCODE_TABLE = {
107 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
108 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
109 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
110 };
111
112 /** Mask used to extract 5 bits, used when encoding Base32 bytes */
113 private static final int MASK_5BITS = 0x1f;
114
115 // The static final fields above are used for the original static byte[] methods on Base32.
116 // The private member fields below are used with the new streaming approach, which requires
117 // some state be preserved between calls of encode() and decode().
118
119 /**
120 * Place holder for the bytes we're dealing with for our based logic.
121 * Bitwise operations store and extract the encoding or decoding from this variable.
122 */
123
124 /**
125 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
126 * <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code>
127 */
128 private final int decodeSize;
129
130 /**
131 * Decode table to use.
132 */
133 private final byte[] decodeTable;
134
135 /**
136 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
137 * <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code>
138 */
139 private final int encodeSize;
140
141 /**
142 * Encode table to use.
143 */
144 private final byte[] encodeTable;
145
146 /**
147 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
148 */
149 private final byte[] lineSeparator;
150
151 /**
152 * Creates a Base32 codec used for decoding and encoding.
153 * <p>
154 * When encoding the line length is 0 (no chunking).
155 * </p>
156 *
157 */
158 public Base32() {
159 this(false);
160 }
161
162 /**
163 * Creates a Base32 codec used for decoding and encoding.
164 * <p>
165 * When encoding the line length is 0 (no chunking).
166 * </p>
167 * @param useHex if {@code true} then use Base32 Hex alphabet
168 */
169 public Base32(final boolean useHex) {
170 this(0, null, useHex);
171 }
172
173 /**
174 * Creates a Base32 codec used for decoding and encoding.
175 * <p>
176 * When encoding the line length is given in the constructor, the line separator is CRLF.
177 * </p>
178 *
179 * @param lineLength
180 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
181 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
182 * decoding.
183 */
184 public Base32(final int lineLength) {
185 this(lineLength, CHUNK_SEPARATOR);
186 }
187
188 /**
189 * Creates a Base32 codec used for decoding and encoding.
190 * <p>
191 * When encoding the line length and line separator are given in the constructor.
192 * </p>
193 * <p>
194 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
195 * </p>
196 *
197 * @param lineLength
198 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
199 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
200 * decoding.
201 * @param lineSeparator
202 * Each line of encoded data will end with this sequence of bytes.
203 * @throws IllegalArgumentException
204 * The provided lineSeparator included some Base32 characters. That's not going to work!
205 */
206 public Base32(final int lineLength, final byte[] lineSeparator) {
207 this(lineLength, lineSeparator, false);
208 }
209
210 /**
211 * Creates a Base32 / Base32 Hex codec used for decoding and encoding.
212 * <p>
213 * When encoding the line length and line separator are given in the constructor.
214 * </p>
215 * <p>
216 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
217 * </p>
218 *
219 * @param lineLength
220 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
221 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
222 * decoding.
223 * @param lineSeparator
224 * Each line of encoded data will end with this sequence of bytes.
225 * @param useHex
226 * if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
227 * @throws IllegalArgumentException
228 * The provided lineSeparator included some Base32 characters. That's not going to work! Or the
229 * lineLength > 0 and lineSeparator is null.
230 */
231 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
232 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
233 lineLength,
234 lineSeparator == null ? 0 : lineSeparator.length);
235 if (useHex){
236 this.encodeTable = HEX_ENCODE_TABLE;
237 this.decodeTable = HEX_DECODE_TABLE;
238 } else {
239 this.encodeTable = ENCODE_TABLE;
240 this.decodeTable = DECODE_TABLE;
241 }
242 if (lineLength > 0) {
243 if (lineSeparator == null) {
244 throw new IllegalArgumentException("lineLength "+lineLength+" > 0, but lineSeparator is null");
245 }
246 // Must be done after initializing the tables
247 if (containsAlphabetOrPad(lineSeparator)) {
248 final String sep = StringUtils.newStringUtf8(lineSeparator);
249 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
250 }
251 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
252 this.lineSeparator = new byte[lineSeparator.length];
253 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
254 } else {
255 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
256 this.lineSeparator = null;
257 }
258 this.decodeSize = this.encodeSize - 1;
259 }
260
261 /**
262 * <p>
263 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
264 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
265 * call is not necessary when decoding, but it doesn't hurt, either.
266 * </p>
267 * <p>
268 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
269 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
270 * garbage-out philosophy: it will not check the provided data for validity.
271 * </p>
272 *
273 * @param in
274 * byte[] array of ascii data to Base32 decode.
275 * @param inPos
276 * Position to start reading data from.
277 * @param inAvail
278 * Amount of bytes available from input for encoding.
279 * @param context the context to be used
280 *
281 * Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position
282 */
283 @Override
284 void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
285 // package protected for access from I/O streams
286
287 if (context.eof) {
288 return;
289 }
290 if (inAvail < 0) {
291 context.eof = true;
292 }
293 for (int i = 0; i < inAvail; i++) {
294 final byte b = in[inPos++];
295 if (b == PAD) {
296 // We're done.
297 context.eof = true;
298 break;
299 } else {
300 final byte[] buffer = ensureBufferSize(decodeSize, context);
301 if (b >= 0 && b < this.decodeTable.length) {
302 final int result = this.decodeTable[b];
303 if (result >= 0) {
304 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
305 // collect decoded bytes
306 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
307 if (context.modulus == 0) { // we can output the 5 bytes
308 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS);
309 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
310 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
311 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
312 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
313 }
314 }
315 }
316 }
317 }
318
319 // Two forms of EOF as far as Base32 decoder is concerned: actual
320 // EOF (-1) and first time '=' character is encountered in stream.
321 // This approach makes the '=' padding characters completely optional.
322 if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do
323 final byte[] buffer = ensureBufferSize(decodeSize, context);
324
325 // we ignore partial bytes, i.e. only multiples of 8 count
326 switch (context.modulus) {
327 case 2 : // 10 bits, drop 2 and output one byte
328 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS);
329 break;
330 case 3 : // 15 bits, drop 7 and output 1 byte
331 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS);
332 break;
333 case 4 : // 20 bits = 2*8 + 4
334 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
335 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
336 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
337 break;
338 case 5 : // 25bits = 3*8 + 1
339 context.lbitWorkArea = context.lbitWorkArea >> 1;
340 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
341 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
342 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
343 break;
344 case 6 : // 30bits = 3*8 + 6
345 context.lbitWorkArea = context.lbitWorkArea >> 6;
346 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
347 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
348 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
349 break;
350 case 7 : // 35 = 4*8 +3
351 context.lbitWorkArea = context.lbitWorkArea >> 3;
352 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
353 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
354 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
355 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
356 break;
357 default:
358 // modulus can be 0-7, and we excluded 0,1 already
359 throw new IllegalStateException("Impossible modulus "+context.modulus);
360 }
361 }
362 }
363
364 /**
365 * <p>
366 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
367 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last
368 * remaining bytes (if not multiple of 5).
369 * </p>
370 *
371 * @param in
372 * byte[] array of binary data to Base32 encode.
373 * @param inPos
374 * Position to start reading data from.
375 * @param inAvail
376 * Amount of bytes available from input for encoding.
377 * @param context the context to be used
378 */
379 @Override
380 void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
381 // package protected for access from I/O streams
382
383 if (context.eof) {
384 return;
385 }
386 // inAvail < 0 is how we're informed of EOF in the underlying data we're
387 // encoding.
388 if (inAvail < 0) {
389 context.eof = true;
390 if (0 == context.modulus && lineLength == 0) {
391 return; // no leftovers to process and not using chunking
392 }
393 final byte[] buffer = ensureBufferSize(encodeSize, context);
394 final int savedPos = context.pos;
395 switch (context.modulus) { // % 5
396 case 0 :
397 break;
398 case 1 : // Only 1 octet; take top 5 bits then remainder
399 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
400 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
401 buffer[context.pos++] = PAD;
402 buffer[context.pos++] = PAD;
403 buffer[context.pos++] = PAD;
404 buffer[context.pos++] = PAD;
405 buffer[context.pos++] = PAD;
406 buffer[context.pos++] = PAD;
407 break;
408 case 2 : // 2 octets = 16 bits to use
409 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
410 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
411 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
412 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
413 buffer[context.pos++] = PAD;
414 buffer[context.pos++] = PAD;
415 buffer[context.pos++] = PAD;
416 buffer[context.pos++] = PAD;
417 break;
418 case 3 : // 3 octets = 24 bits to use
419 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
420 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
421 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
422 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
423 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
424 buffer[context.pos++] = PAD;
425 buffer[context.pos++] = PAD;
426 buffer[context.pos++] = PAD;
427 break;
428 case 4 : // 4 octets = 32 bits to use
429 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
430 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
431 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
432 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
433 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
434 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
435 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
436 buffer[context.pos++] = PAD;
437 break;
438 default:
439 throw new IllegalStateException("Impossible modulus "+context.modulus);
440 }
441 context.currentLinePos += context.pos - savedPos; // keep track of current line position
442 // if currentPos == 0 we are at the start of a line, so don't add CRLF
443 if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required
444 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
445 context.pos += lineSeparator.length;
446 }
447 } else {
448 for (int i = 0; i < inAvail; i++) {
449 final byte[] buffer = ensureBufferSize(encodeSize, context);
450 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
451 int b = in[inPos++];
452 if (b < 0) {
453 b += 256;
454 }
455 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
456 if (0 == context.modulus) { // we have enough bytes to create our output
457 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS];
458 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS];
459 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS];
460 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS];
461 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS];
462 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS];
463 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS];
464 buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS];
465 context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
466 if (lineLength > 0 && lineLength <= context.currentLinePos) {
467 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
468 context.pos += lineSeparator.length;
469 context.currentLinePos = 0;
470 }
471 }
472 }
473 }
474 }
475
476 /**
477 * Returns whether or not the <code>octet</code> is in the Base32 alphabet.
478 *
479 * @param octet
480 * The value to test
481 * @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise.
482 */
483 @Override
484 public boolean isInAlphabet(final byte octet) {
485 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
486 }
487 }