001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
023
024import java.io.IOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import org.apache.commons.compress.archivers.zip.ZipEncoding;
028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029
030/**
031 * This class provides static utility methods to work with byte streams.
032 *
033 * @Immutable
034 */
035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
036public class TarUtils {
037
038    private static final int BYTE_MASK = 255;
039
040    static final ZipEncoding DEFAULT_ENCODING =
041        ZipEncodingHelper.getZipEncoding(null);
042
043    /**
044     * Encapsulates the algorithms used up to Commons Compress 1.3 as
045     * ZipEncoding.
046     */
047    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
048            @Override
049            public boolean canEncode(final String name) { return true; }
050
051            @Override
052            public ByteBuffer encode(final String name) {
053                final int length = name.length();
054                final byte[] buf = new byte[length];
055
056                // copy until end of input or output is reached.
057                for (int i = 0; i < length; ++i) {
058                    buf[i] = (byte) name.charAt(i);
059                }
060                return ByteBuffer.wrap(buf);
061            }
062
063            @Override
064            public String decode(final byte[] buffer) {
065                final int length = buffer.length;
066                final StringBuilder result = new StringBuilder(length);
067
068                for (final byte b : buffer) {
069                    if (b == 0) { // Trailing null
070                        break;
071                    }
072                    result.append((char) (b & 0xFF)); // Allow for sign-extension
073                }
074
075                return result.toString();
076            }
077        };
078
079    /** Private constructor to prevent instantiation of this utility class. */
080    private TarUtils(){
081    }
082
083    /**
084     * Parse an octal string from a buffer.
085     *
086     * <p>Leading spaces are ignored.
087     * The buffer must contain a trailing space or NUL,
088     * and may contain an additional trailing space or NUL.</p>
089     *
090     * <p>The input buffer is allowed to contain all NULs,
091     * in which case the method returns 0L
092     * (this allows for missing fields).</p>
093     *
094     * <p>To work-around some tar implementations that insert a
095     * leading NUL this method returns 0 if it detects a leading NUL
096     * since Commons Compress 1.4.</p>
097     *
098     * @param buffer The buffer from which to parse.
099     * @param offset The offset into the buffer from which to parse.
100     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
101     * @return The long value of the octal string.
102     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
103     */
104    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
105        long    result = 0;
106        int     end = offset + length;
107        int     start = offset;
108
109        if (length < 2){
110            throw new IllegalArgumentException("Length "+length+" must be at least 2");
111        }
112
113        if (buffer[start] == 0) {
114            return 0L;
115        }
116
117        // Skip leading spaces
118        while (start < end){
119            if (buffer[start] == ' '){
120                start++;
121            } else {
122                break;
123            }
124        }
125
126        // Trim all trailing NULs and spaces.
127        // The ustar and POSIX tar specs require a trailing NUL or
128        // space but some implementations use the extra digit for big
129        // sizes/uids/gids ...
130        byte trailer = buffer[end - 1];
131        while (start < end && (trailer == 0 || trailer == ' ')) {
132            end--;
133            trailer = buffer[end - 1];
134        }
135
136        for ( ;start < end; start++) {
137            final byte currentByte = buffer[start];
138            // CheckStyle:MagicNumber OFF
139            if (currentByte < '0' || currentByte > '7'){
140                throw new IllegalArgumentException(
141                        exceptionMessage(buffer, offset, length, start, currentByte));
142            }
143            result = (result << 3) + (currentByte - '0'); // convert from ASCII
144            // CheckStyle:MagicNumber ON
145        }
146
147        return result;
148    }
149
150    /**
151     * Compute the value contained in a byte buffer.  If the most
152     * significant bit of the first byte in the buffer is set, this
153     * bit is ignored and the rest of the buffer is interpreted as a
154     * binary number.  Otherwise, the buffer is interpreted as an
155     * octal number as per the parseOctal function above.
156     *
157     * @param buffer The buffer from which to parse.
158     * @param offset The offset into the buffer from which to parse.
159     * @param length The maximum number of bytes to parse.
160     * @return The long value of the octal or binary string.
161     * @throws IllegalArgumentException if the trailing space/NUL is
162     * missing or an invalid byte is detected in an octal number, or
163     * if a binary number would exceed the size of a signed long
164     * 64-bit integer.
165     * @since 1.4
166     */
167    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
168                                          final int length) {
169
170        if ((buffer[offset] & 0x80) == 0) {
171            return parseOctal(buffer, offset, length);
172        }
173        final boolean negative = buffer[offset] == (byte) 0xff;
174        if (length < 9) {
175            return parseBinaryLong(buffer, offset, length, negative);
176        }
177        return parseBinaryBigInteger(buffer, offset, length, negative);
178    }
179
180    private static long parseBinaryLong(final byte[] buffer, final int offset,
181                                        final int length,
182                                        final boolean negative) {
183        if (length >= 9) {
184            throw new IllegalArgumentException("At offset " + offset + ", "
185                                               + length + " byte binary number"
186                                               + " exceeds maximum signed long"
187                                               + " value");
188        }
189        long val = 0;
190        for (int i = 1; i < length; i++) {
191            val = (val << 8) + (buffer[offset + i] & 0xff);
192        }
193        if (negative) {
194            // 2's complement
195            val--;
196            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
197        }
198        return negative ? -val : val;
199    }
200
201    private static long parseBinaryBigInteger(final byte[] buffer,
202                                              final int offset,
203                                              final int length,
204                                              final boolean negative) {
205        final byte[] remainder = new byte[length - 1];
206        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
207        BigInteger val = new BigInteger(remainder);
208        if (negative) {
209            // 2's complement
210            val = val.add(BigInteger.valueOf(-1)).not();
211        }
212        if (val.bitLength() > 63) {
213            throw new IllegalArgumentException("At offset " + offset + ", "
214                                               + length + " byte binary number"
215                                               + " exceeds maximum signed long"
216                                               + " value");
217        }
218        return negative ? -val.longValue() : val.longValue();
219    }
220
221    /**
222     * Parse a boolean byte from a buffer.
223     * Leading spaces and NUL are ignored.
224     * The buffer may contain trailing spaces or NULs.
225     *
226     * @param buffer The buffer from which to parse.
227     * @param offset The offset into the buffer from which to parse.
228     * @return The boolean value of the bytes.
229     * @throws IllegalArgumentException if an invalid byte is detected.
230     */
231    public static boolean parseBoolean(final byte[] buffer, final int offset) {
232        return buffer[offset] == 1;
233    }
234
235    // Helper method to generate the exception message
236    private static String exceptionMessage(final byte[] buffer, final int offset,
237            final int length, final int current, final byte currentByte) {
238        // default charset is good enough for an exception message,
239        //
240        // the alternative was to modify parseOctal and
241        // parseOctalOrBinary to receive the ZipEncoding of the
242        // archive (deprecating the existing public methods, of
243        // course) and dealing with the fact that ZipEncoding#decode
244        // can throw an IOException which parseOctal* doesn't declare
245        String string = new String(buffer, offset, length);
246
247        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
248        return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
249    }
250
251    /**
252     * Parse an entry name from a buffer.
253     * Parsing stops when a NUL is found
254     * or the buffer length is reached.
255     *
256     * @param buffer The buffer from which to parse.
257     * @param offset The offset into the buffer from which to parse.
258     * @param length The maximum number of bytes to parse.
259     * @return The entry name.
260     */
261    public static String parseName(final byte[] buffer, final int offset, final int length) {
262        try {
263            return parseName(buffer, offset, length, DEFAULT_ENCODING);
264        } catch (final IOException ex) { // NOSONAR
265            try {
266                return parseName(buffer, offset, length, FALLBACK_ENCODING);
267            } catch (final IOException ex2) {
268                // impossible
269                throw new RuntimeException(ex2); //NOSONAR
270            }
271        }
272    }
273
274    /**
275     * Parse an entry name from a buffer.
276     * Parsing stops when a NUL is found
277     * or the buffer length is reached.
278     *
279     * @param buffer The buffer from which to parse.
280     * @param offset The offset into the buffer from which to parse.
281     * @param length The maximum number of bytes to parse.
282     * @param encoding name of the encoding to use for file names
283     * @since 1.4
284     * @return The entry name.
285     * @throws IOException on error
286     */
287    public static String parseName(final byte[] buffer, final int offset,
288                                   final int length,
289                                   final ZipEncoding encoding)
290        throws IOException {
291
292        int len = 0;
293        for (int i = offset; len < length && buffer[i] != 0; i++) {
294            len++;
295        }
296        if (len > 0) {
297            final byte[] b = new byte[len];
298            System.arraycopy(buffer, offset, b, 0, len);
299            return encoding.decode(b);
300        }
301        return "";
302    }
303
304    /**
305     * Copy a name into a buffer.
306     * Copies characters from the name into the buffer
307     * starting at the specified offset.
308     * If the buffer is longer than the name, the buffer
309     * is filled with trailing NULs.
310     * If the name is longer than the buffer,
311     * the output is truncated.
312     *
313     * @param name The header name from which to copy the characters.
314     * @param buf The buffer where the name is to be stored.
315     * @param offset The starting offset into the buffer
316     * @param length The maximum number of header bytes to copy.
317     * @return The updated offset, i.e. offset + length
318     */
319    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
320        try {
321            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
322        } catch (final IOException ex) { // NOSONAR
323            try {
324                return formatNameBytes(name, buf, offset, length,
325                                       FALLBACK_ENCODING);
326            } catch (final IOException ex2) {
327                // impossible
328                throw new RuntimeException(ex2); //NOSONAR
329            }
330        }
331    }
332
333    /**
334     * Copy a name into a buffer.
335     * Copies characters from the name into the buffer
336     * starting at the specified offset.
337     * If the buffer is longer than the name, the buffer
338     * is filled with trailing NULs.
339     * If the name is longer than the buffer,
340     * the output is truncated.
341     *
342     * @param name The header name from which to copy the characters.
343     * @param buf The buffer where the name is to be stored.
344     * @param offset The starting offset into the buffer
345     * @param length The maximum number of header bytes to copy.
346     * @param encoding name of the encoding to use for file names
347     * @since 1.4
348     * @return The updated offset, i.e. offset + length
349     * @throws IOException on error
350     */
351    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
352                                      final int length,
353                                      final ZipEncoding encoding)
354        throws IOException {
355        int len = name.length();
356        ByteBuffer b = encoding.encode(name);
357        while (b.limit() > length && len > 0) {
358            b = encoding.encode(name.substring(0, --len));
359        }
360        final int limit = b.limit() - b.position();
361        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
362
363        // Pad any remaining output bytes with NUL
364        for (int i = limit; i < length; ++i) {
365            buf[offset + i] = 0;
366        }
367
368        return offset + length;
369    }
370
371    /**
372     * Fill buffer with unsigned octal number, padded with leading zeroes.
373     *
374     * @param value number to convert to octal - treated as unsigned
375     * @param buffer destination buffer
376     * @param offset starting offset in buffer
377     * @param length length of buffer to fill
378     * @throws IllegalArgumentException if the value will not fit in the buffer
379     */
380    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
381            final int offset, final int length) {
382        int remaining = length;
383        remaining--;
384        if (value == 0) {
385            buffer[offset + remaining--] = (byte) '0';
386        } else {
387            long val = value;
388            for (; remaining >= 0 && val != 0; --remaining) {
389                // CheckStyle:MagicNumber OFF
390                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
391                val = val >>> 3;
392                // CheckStyle:MagicNumber ON
393            }
394            if (val != 0){
395                throw new IllegalArgumentException
396                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
397            }
398        }
399
400        for (; remaining >= 0; --remaining) { // leading zeros
401            buffer[offset + remaining] = (byte) '0';
402        }
403    }
404
405    /**
406     * Write an octal integer into a buffer.
407     *
408     * Uses {@link #formatUnsignedOctalString} to format
409     * the value as an octal string with leading zeros.
410     * The converted number is followed by space and NUL
411     *
412     * @param value The value to write
413     * @param buf The buffer to receive the output
414     * @param offset The starting offset into the buffer
415     * @param length The size of the output buffer
416     * @return The updated offset, i.e offset+length
417     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
418     */
419    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
420
421        int idx=length-2; // For space and trailing null
422        formatUnsignedOctalString(value, buf, offset, idx);
423
424        buf[offset + idx++] = (byte) ' '; // Trailing space
425        buf[offset + idx]   = 0; // Trailing null
426
427        return offset + length;
428    }
429
430    /**
431     * Write an octal long integer into a buffer.
432     *
433     * Uses {@link #formatUnsignedOctalString} to format
434     * the value as an octal string with leading zeros.
435     * The converted number is followed by a space.
436     *
437     * @param value The value to write as octal
438     * @param buf The destinationbuffer.
439     * @param offset The starting offset into the buffer.
440     * @param length The length of the buffer
441     * @return The updated offset
442     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
443     */
444    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
445
446        final int idx=length-1; // For space
447
448        formatUnsignedOctalString(value, buf, offset, idx);
449        buf[offset + idx] = (byte) ' '; // Trailing space
450
451        return offset + length;
452    }
453
454    /**
455     * Write an long integer into a buffer as an octal string if this
456     * will fit, or as a binary number otherwise.
457     *
458     * Uses {@link #formatUnsignedOctalString} to format
459     * the value as an octal string with leading zeros.
460     * The converted number is followed by a space.
461     *
462     * @param value The value to write into the buffer.
463     * @param buf The destination buffer.
464     * @param offset The starting offset into the buffer.
465     * @param length The length of the buffer.
466     * @return The updated offset.
467     * @throws IllegalArgumentException if the value (and trailer)
468     * will not fit in the buffer.
469     * @since 1.4
470     */
471    public static int formatLongOctalOrBinaryBytes(
472        final long value, final byte[] buf, final int offset, final int length) {
473
474        // Check whether we are dealing with UID/GID or SIZE field
475        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
476
477        final boolean negative = value < 0;
478        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
479            return formatLongOctalBytes(value, buf, offset, length);
480        }
481
482        if (length < 9) {
483            formatLongBinary(value, buf, offset, length, negative);
484        } else {
485            formatBigIntegerBinary(value, buf, offset, length, negative);
486        }
487
488        buf[offset] = (byte) (negative ? 0xff : 0x80);
489        return offset + length;
490    }
491
492    private static void formatLongBinary(final long value, final byte[] buf,
493                                         final int offset, final int length,
494                                         final boolean negative) {
495        final int bits = (length - 1) * 8;
496        final long max = 1L << bits;
497        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
498        if (val < 0 || val >= max) {
499            throw new IllegalArgumentException("Value " + value +
500                " is too large for " + length + " byte field.");
501        }
502        if (negative) {
503            val ^= max - 1;
504            val++;
505            val |= 0xffL << bits;
506        }
507        for (int i = offset + length - 1; i >= offset; i--) {
508            buf[i] = (byte) val;
509            val >>= 8;
510        }
511    }
512
513    private static void formatBigIntegerBinary(final long value, final byte[] buf,
514                                               final int offset,
515                                               final int length,
516                                               final boolean negative) {
517        final BigInteger val = BigInteger.valueOf(value);
518        final byte[] b = val.toByteArray();
519        final int len = b.length;
520        if (len > length - 1) {
521            throw new IllegalArgumentException("Value " + value +
522                " is too large for " + length + " byte field.");
523        }
524        final int off = offset + length - len;
525        System.arraycopy(b, 0, buf, off, len);
526        final byte fill = (byte) (negative ? 0xff : 0);
527        for (int i = offset + 1; i < off; i++) {
528            buf[i] = fill;
529        }
530    }
531
532    /**
533     * Writes an octal value into a buffer.
534     *
535     * Uses {@link #formatUnsignedOctalString} to format
536     * the value as an octal string with leading zeros.
537     * The converted number is followed by NUL and then space.
538     *
539     * @param value The value to convert
540     * @param buf The destination buffer
541     * @param offset The starting offset into the buffer.
542     * @param length The size of the buffer.
543     * @return The updated value of offset, i.e. offset+length
544     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
545     */
546    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
547
548        int idx=length-2; // for NUL and space
549        formatUnsignedOctalString(value, buf, offset, idx);
550
551        buf[offset + idx++]   = 0; // Trailing null
552        buf[offset + idx]     = (byte) ' '; // Trailing space
553
554        return offset + length;
555    }
556
557    /**
558     * Compute the checksum of a tar entry header.
559     *
560     * @param buf The tar entry's header buffer.
561     * @return The computed checksum.
562     */
563    public static long computeCheckSum(final byte[] buf) {
564        long sum = 0;
565
566        for (final byte element : buf) {
567            sum += BYTE_MASK & element;
568        }
569
570        return sum;
571    }
572
573    /**
574     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
575     * <blockquote>
576     * The checksum is calculated by taking the sum of the unsigned byte values
577     * of the header block with the eight checksum bytes taken to be ascii
578     * spaces (decimal value 32). It is stored as a six digit octal number with
579     * leading zeroes followed by a NUL and then a space. Various
580     * implementations do not adhere to this format. For better compatibility,
581     * ignore leading and trailing whitespace, and get the first six digits. In
582     * addition, some historic tar implementations treated bytes as signed.
583     * Implementations typically calculate the checksum both ways, and treat it
584     * as good if either the signed or unsigned sum matches the included
585     * checksum.
586     * </blockquote>
587     * <p>
588     * The return value of this method should be treated as a best-effort
589     * heuristic rather than an absolute and final truth. The checksum
590     * verification logic may well evolve over time as more special cases
591     * are encountered.
592     *
593     * @param header tar header
594     * @return whether the checksum is reasonably good
595     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
596     * @since 1.5
597     */
598    public static boolean verifyCheckSum(final byte[] header) {
599        final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
600        long unsignedSum = 0;
601        long signedSum = 0;
602
603        for (int i = 0; i < header.length; i++) {
604            byte b = header[i];
605            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
606                b = ' ';
607            }
608            unsignedSum += 0xff & b;
609            signedSum += b;
610        }
611        return storedSum == unsignedSum || storedSum == signedSum;
612    }
613
614}