001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.bzip2;
020
021import java.io.IOException;
022import java.io.OutputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026import org.apache.commons.io.IOUtils;
027
028/**
029 * An output stream that compresses into the BZip2 format into another stream.
030 *
031 * <p>
032 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force
033 * {@code BZip2CompressorOutputStream} to release the allocated memory.
034 * </p>
035 *
036 * <p>
037 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower
038 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input.
039 * </p>
040 *
041 * <p>
042 * You can compute the memory usage for compressing by the following formula:
043 * </p>
044 *
045 * <pre>
046 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
047 * </pre>
048 *
049 * <p>
050 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use
051 * </p>
052 *
053 * <pre>
054 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
055 * </pre>
056 *
057 * <table style="width:100%" border="1">
058 * <caption>Memory usage by blocksize</caption>
059 * <tr>
060 * <th colspan="3">Memory usage by blocksize</th>
061 * </tr>
062 * <tr>
063 * <th style="text-align: right">Blocksize</th>
064 * <th style="text-align: right">Compression<br>
065 * memory usage</th>
066 * <th style="text-align: right">Decompression<br>
067 * memory usage</th>
068 * </tr>
069 * <tr>
070 * <td style="text-align: right">100k</td>
071 * <td style="text-align: right">1300k</td>
072 * <td style="text-align: right">565k</td>
073 * </tr>
074 * <tr>
075 * <td style="text-align: right">200k</td>
076 * <td style="text-align: right">2200k</td>
077 * <td style="text-align: right">1065k</td>
078 * </tr>
079 * <tr>
080 * <td style="text-align: right">300k</td>
081 * <td style="text-align: right">3100k</td>
082 * <td style="text-align: right">1565k</td>
083 * </tr>
084 * <tr>
085 * <td style="text-align: right">400k</td>
086 * <td style="text-align: right">4000k</td>
087 * <td style="text-align: right">2065k</td>
088 * </tr>
089 * <tr>
090 * <td style="text-align: right">500k</td>
091 * <td style="text-align: right">4900k</td>
092 * <td style="text-align: right">2565k</td>
093 * </tr>
094 * <tr>
095 * <td style="text-align: right">600k</td>
096 * <td style="text-align: right">5800k</td>
097 * <td style="text-align: right">3065k</td>
098 * </tr>
099 * <tr>
100 * <td style="text-align: right">700k</td>
101 * <td style="text-align: right">6700k</td>
102 * <td style="text-align: right">3565k</td>
103 * </tr>
104 * <tr>
105 * <td style="text-align: right">800k</td>
106 * <td style="text-align: right">7600k</td>
107 * <td style="text-align: right">4065k</td>
108 * </tr>
109 * <tr>
110 * <td style="text-align: right">900k</td>
111 * <td style="text-align: right">8500k</td>
112 * <td style="text-align: right">4565k</td>
113 * </tr>
114 * </table>
115 *
116 * <p>
117 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block.
118 * </p>
119 *
120 * <p>
121 * Instances of this class are not threadsafe.
122 * </p>
123 *
124 * <p>
125 * TODO: Update to BZip2 1.0.1
126 * </p>
127 *
128 * @NotThreadSafe
129 */
130public class BZip2CompressorOutputStream extends CompressorOutputStream<OutputStream> implements BZip2Constants {
131
132    static final class Data {
133
134        // with blockSize 900k
135        /* maps unsigned byte => "does it occur in block" */
136        final boolean[] inUse = new boolean[256]; // 256 byte
137        final byte[] unseqToSeq = new byte[256]; // 256 byte
138        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
139        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
140        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
141
142        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
143        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
144        // byte
145        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
146        // byte
147        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
148        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
149        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
150        // byte
151        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
152        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
153
154        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
155        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
156        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
157
158        // ------------
159        // 333408 byte
160
161        /*
162         * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0.
163         */
164        final byte[] block; // 900021 byte
165        /*
166         * maps index in Burrows-Wheeler transformed block => index of byte in original block
167         */
168        final int[] fmap; // 3600000 byte
169        final char[] sfmap; // 3600000 byte
170        // ------------
171        // 8433529 byte
172        // ============
173
174        /**
175         * Index of original line in Burrows-Wheeler table.
176         *
177         * <p>
178         * This is the index in fmap that points to the last byte of the original data.
179         * </p>
180         */
181        int origPtr;
182
183        Data(final int blockSize100k) {
184            final int n = blockSize100k * BASEBLOCKSIZE;
185            this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES];
186            this.fmap = new int[n];
187            this.sfmap = new char[2 * n];
188        }
189
190    }
191
192    /**
193     * The minimum supported blocksize {@code  == 1}.
194     */
195    public static final int MIN_BLOCKSIZE = 1;
196
197    /**
198     * The maximum supported blocksize {@code  == 9}.
199     */
200    public static final int MAX_BLOCKSIZE = 9;
201    private static final int GREATER_ICOST = 15;
202
203    private static final int LESSER_ICOST = 0;
204
205    /**
206     * Chooses a blocksize based on the given length of the data to compress.
207     *
208     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns
209     *         {@code MAX_BLOCKSIZE} always.
210     *
211     * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}.
212     */
213    public static int chooseBlockSize(final long inputLength) {
214        return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE;
215    }
216
217    private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) {
218        int vec = 0;
219        for (int n = minLen; n <= maxLen; n++) {
220            for (int i = 0; i < alphaSize; i++) {
221                if ((length[i] & 0xff) == n) {
222                    code[i] = vec;
223                    vec++;
224                }
225            }
226            vec <<= 1;
227        }
228    }
229
230    private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) {
231        /*
232         * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel.
233         */
234        final int[] heap = dat.heap;
235        final int[] weight = dat.weight;
236        final int[] parent = dat.parent;
237
238        for (int i = alphaSize; --i >= 0;) {
239            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
240        }
241
242        for (boolean tooLong = true; tooLong;) {
243            tooLong = false;
244
245            int nNodes = alphaSize;
246            int nHeap = 0;
247            heap[0] = 0;
248            weight[0] = 0;
249            parent[0] = -2;
250
251            for (int i = 1; i <= alphaSize; i++) {
252                parent[i] = -1;
253                nHeap++;
254                heap[nHeap] = i;
255
256                int zz = nHeap;
257                final int tmp = heap[zz];
258                while (weight[tmp] < weight[heap[zz >> 1]]) {
259                    heap[zz] = heap[zz >> 1];
260                    zz >>= 1;
261                }
262                heap[zz] = tmp;
263            }
264
265            while (nHeap > 1) {
266                final int n1 = heap[1];
267                heap[1] = heap[nHeap];
268                nHeap--;
269
270                int yy = 0;
271                int zz = 1;
272                int tmp = heap[1];
273
274                while (true) {
275                    yy = zz << 1;
276
277                    if (yy > nHeap) {
278                        break;
279                    }
280
281                    if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
282                        yy++;
283                    }
284
285                    if (weight[tmp] < weight[heap[yy]]) {
286                        break;
287                    }
288
289                    heap[zz] = heap[yy];
290                    zz = yy;
291                }
292
293                heap[zz] = tmp;
294
295                final int n2 = heap[1];
296                heap[1] = heap[nHeap];
297                nHeap--;
298
299                yy = 0;
300                zz = 1;
301                tmp = heap[1];
302
303                while (true) {
304                    yy = zz << 1;
305
306                    if (yy > nHeap) {
307                        break;
308                    }
309
310                    if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
311                        yy++;
312                    }
313
314                    if (weight[tmp] < weight[heap[yy]]) {
315                        break;
316                    }
317
318                    heap[zz] = heap[yy];
319                    zz = yy;
320                }
321
322                heap[zz] = tmp;
323                nNodes++;
324                parent[n1] = parent[n2] = nNodes;
325
326                final int weight_n1 = weight[n1];
327                final int weight_n2 = weight[n2];
328                weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff);
329
330                parent[nNodes] = -1;
331                nHeap++;
332                heap[nHeap] = nNodes;
333
334                tmp = 0;
335                zz = nHeap;
336                tmp = heap[zz];
337                final int weight_tmp = weight[tmp];
338                while (weight_tmp < weight[heap[zz >> 1]]) {
339                    heap[zz] = heap[zz >> 1];
340                    zz >>= 1;
341                }
342                heap[zz] = tmp;
343
344            }
345
346            for (int i = 1; i <= alphaSize; i++) {
347                int j = 0;
348                int k = i;
349
350                for (int parent_k; (parent_k = parent[k]) >= 0;) {
351                    k = parent_k;
352                    j++;
353                }
354
355                len[i - 1] = (byte) j;
356                if (j > maxLen) {
357                    tooLong = true;
358                }
359            }
360
361            if (tooLong) {
362                for (int i = 1; i < alphaSize; i++) {
363                    int j = weight[i] >> 8;
364                    j = 1 + (j >> 1);
365                    weight[i] = j << 8;
366                }
367            }
368        }
369    }
370
371    /**
372     * Index of the last char in the block, so the block size == last + 1.
373     */
374    private int last;
375    /**
376     * Always: in the range 0 .. 9. The current block size is 100000 * this number.
377     */
378    private final int blockSize100k;
379
380    private int bsBuff;
381
382    private int bsLive;
383
384    private final CRC crc = new CRC();
385    private int nInUse;
386
387    private int nMTF;
388    private int currentChar = -1;
389    private int runLength;
390
391    private int combinedCRC;
392
393    private final int allowableBlockSize;
394    /**
395     * All memory intensive stuff.
396     */
397    private Data data;
398
399    private BlockSort blockSorter;
400
401    private volatile boolean closed;
402
403    /**
404     * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
405     *
406     * @param out the destination stream.
407     *
408     * @throws IOException          if an I/O error occurs in the specified stream.
409     * @throws NullPointerException if {@code out == null}.
410     */
411    public BZip2CompressorOutputStream(final OutputStream out) throws IOException {
412        this(out, MAX_BLOCKSIZE);
413    }
414
415    /**
416     * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
417     *
418     * @param out       the destination stream.
419     * @param blockSize the blockSize as 100k units.
420     *
421     * @throws IOException              if an I/O error occurs in the specified stream.
422     * @throws IllegalArgumentException if {@code (blockSize &lt; 1) || (blockSize &gt; 9)}.
423     * @throws NullPointerException     if {@code out == null}.
424     *
425     * @see #MIN_BLOCKSIZE
426     * @see #MAX_BLOCKSIZE
427     */
428    public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
429        super(out);
430        if (blockSize < 1) {
431            throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
432        }
433        if (blockSize > 9) {
434            throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
435        }
436        this.blockSize100k = blockSize;
437        /* 20 is just a paranoia constant */
438        this.allowableBlockSize = this.blockSize100k * BASEBLOCKSIZE - 20;
439        init();
440    }
441
442    private void blockSort() {
443        blockSorter.blockSort(data, last);
444    }
445
446    private void bsFinishedWithStream() throws IOException {
447        while (this.bsLive > 0) {
448            final int ch = this.bsBuff >> 24;
449            this.out.write(ch); // write 8-bit
450            this.bsBuff <<= 8;
451            this.bsLive -= 8;
452        }
453    }
454
455    private void bsPutInt(final int u) throws IOException {
456        bsW(8, u >> 24 & 0xff);
457        bsW(8, u >> 16 & 0xff);
458        bsW(8, u >> 8 & 0xff);
459        bsW(8, u & 0xff);
460    }
461
462    private void bsPutUByte(final int c) throws IOException {
463        bsW(8, c);
464    }
465
466    private void bsW(final int n, final int v) throws IOException {
467        final OutputStream outShadow = this.out;
468        int bsLiveShadow = this.bsLive;
469        int bsBuffShadow = this.bsBuff;
470
471        while (bsLiveShadow >= 8) {
472            outShadow.write(bsBuffShadow >> 24); // write 8-bit
473            bsBuffShadow <<= 8;
474            bsLiveShadow -= 8;
475        }
476
477        this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n;
478        this.bsLive = bsLiveShadow + n;
479    }
480
481    private void checkClosed() throws IOException {
482        if (closed) {
483            throw new IOException("Stream closed");
484        }
485    }
486
487    @Override
488    public void close() throws IOException {
489        if (!closed) {
490            try {
491                finish();
492            } finally {
493                IOUtils.close(out);
494            }
495        }
496    }
497
498    private void endBlock() throws IOException {
499        final int blockCRC = this.crc.getValue();
500        this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31;
501        this.combinedCRC ^= blockCRC;
502
503        // empty block at end of file
504        if (this.last == -1) {
505            return;
506        }
507
508        /* sort the block and establish posn of original string */
509        blockSort();
510
511        /*
512         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the
513         * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits,
514         * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal
515         * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files.
516         */
517        bsPutUByte(0x31);
518        bsPutUByte(0x41);
519        bsPutUByte(0x59);
520        bsPutUByte(0x26);
521        bsPutUByte(0x53);
522        bsPutUByte(0x59);
523
524        /* Now the block's CRC, so it is in a known place. */
525        bsPutInt(blockCRC);
526
527        /* Now a single bit indicating no randomization. */
528        bsW(1, 0);
529
530        /* Finally, block's contents proper. */
531        moveToFrontCodeAndSend();
532    }
533
534    private void endCompression() throws IOException {
535        /*
536         * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it
537         * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.)
538         */
539        bsPutUByte(0x17);
540        bsPutUByte(0x72);
541        bsPutUByte(0x45);
542        bsPutUByte(0x38);
543        bsPutUByte(0x50);
544        bsPutUByte(0x90);
545
546        bsPutInt(this.combinedCRC);
547        bsFinishedWithStream();
548    }
549
550    public void finish() throws IOException {
551        if (!closed) {
552            closed = true;
553            try {
554                if (this.runLength > 0) {
555                    writeRun();
556                }
557                this.currentChar = -1;
558                endBlock();
559                endCompression();
560            } finally {
561                this.blockSorter = null;
562                this.data = null;
563            }
564        }
565    }
566
567    @Override
568    public void flush() throws IOException {
569        if (out != null) {
570            super.flush();
571        }
572    }
573
574    /*
575     * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form.
576     *
577     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
578     */
579    private void generateMTFValues() {
580        final int lastShadow = this.last;
581        final Data dataShadow = this.data;
582        final boolean[] inUse = dataShadow.inUse;
583        final byte[] block = dataShadow.block;
584        final int[] fmap = dataShadow.fmap;
585        final char[] sfmap = dataShadow.sfmap;
586        final int[] mtfFreq = dataShadow.mtfFreq;
587        final byte[] unseqToSeq = dataShadow.unseqToSeq;
588        final byte[] yy = dataShadow.generateMTFValues_yy;
589
590        // make maps
591        int nInUseShadow = 0;
592        for (int i = 0; i < 256; i++) {
593            if (inUse[i]) {
594                unseqToSeq[i] = (byte) nInUseShadow;
595                nInUseShadow++;
596            }
597        }
598        this.nInUse = nInUseShadow;
599
600        final int eob = nInUseShadow + 1;
601
602        Arrays.fill(mtfFreq, 0, eob + 1, 0);
603
604        for (int i = nInUseShadow; --i >= 0;) {
605            yy[i] = (byte) i;
606        }
607
608        int wr = 0;
609        int zPend = 0;
610
611        for (int i = 0; i <= lastShadow; i++) {
612            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
613            byte tmp = yy[0];
614            int j = 0;
615
616            while (ll_i != tmp) {
617                j++;
618                final byte tmp2 = tmp;
619                tmp = yy[j];
620                yy[j] = tmp2;
621            }
622            yy[0] = tmp;
623
624            if (j == 0) {
625                zPend++;
626            } else {
627                if (zPend > 0) {
628                    zPend--;
629                    while (true) {
630                        if ((zPend & 1) == 0) {
631                            sfmap[wr] = RUNA;
632                            wr++;
633                            mtfFreq[RUNA]++;
634                        } else {
635                            sfmap[wr] = RUNB;
636                            wr++;
637                            mtfFreq[RUNB]++;
638                        }
639
640                        if (zPend < 2) {
641                            break;
642                        }
643                        zPend = zPend - 2 >> 1;
644                    }
645                    zPend = 0;
646                }
647                sfmap[wr] = (char) (j + 1);
648                wr++;
649                mtfFreq[j + 1]++;
650            }
651        }
652
653        if (zPend > 0) {
654            zPend--;
655            while (true) {
656                if ((zPend & 1) == 0) {
657                    sfmap[wr] = RUNA;
658                    wr++;
659                    mtfFreq[RUNA]++;
660                } else {
661                    sfmap[wr] = RUNB;
662                    wr++;
663                    mtfFreq[RUNB]++;
664                }
665
666                if (zPend < 2) {
667                    break;
668                }
669                zPend = zPend - 2 >> 1;
670            }
671        }
672
673        sfmap[wr] = (char) eob;
674        mtfFreq[eob]++;
675        this.nMTF = wr + 1;
676    }
677
678    /**
679     * Returns the blocksize parameter specified at construction time.
680     *
681     * @return the blocksize parameter specified at construction time
682     */
683    public final int getBlockSize() {
684        return this.blockSize100k;
685    }
686
687    /**
688     * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating
689     * blockSize100k.
690     *
691     * @throws IOException if the magic bytes could not been written
692     */
693    private void init() throws IOException {
694        bsPutUByte('B');
695        bsPutUByte('Z');
696
697        this.data = new Data(this.blockSize100k);
698        this.blockSorter = new BlockSort(this.data);
699
700        // huffmanized magic bytes
701        bsPutUByte('h');
702        bsPutUByte('0' + this.blockSize100k);
703
704        this.combinedCRC = 0;
705        initBlock();
706    }
707
708    private void initBlock() {
709        // blockNo++;
710        this.crc.reset();
711        this.last = -1;
712        // ch = 0;
713
714        final boolean[] inUse = this.data.inUse;
715        for (int i = 256; --i >= 0;) {
716            inUse[i] = false;
717        }
718
719    }
720
721    private void moveToFrontCodeAndSend() throws IOException {
722        bsW(24, this.data.origPtr);
723        generateMTFValues();
724        sendMTFValues();
725    }
726
727    private void sendMTFValues() throws IOException {
728        final byte[][] len = this.data.sendMTFValues_len;
729        final int alphaSize = this.nInUse + 2;
730
731        for (int t = N_GROUPS; --t >= 0;) {
732            final byte[] len_t = len[t];
733            for (int v = alphaSize; --v >= 0;) {
734                len_t[v] = GREATER_ICOST;
735            }
736        }
737
738        /* Decide how many coding tables to use */
739        // assert (this.nMTF > 0) : this.nMTF;
740        final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6;
741
742        /* Generate an initial set of coding tables */
743        sendMTFValues0(nGroups, alphaSize);
744
745        /*
746         * Iterate up to N_ITERS times to improve the tables.
747         */
748        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
749
750        /* Compute MTF values for the selectors. */
751        sendMTFValues2(nGroups, nSelectors);
752
753        /* Assign actual codes for the tables. */
754        sendMTFValues3(nGroups, alphaSize);
755
756        /* Transmit the mapping table. */
757        sendMTFValues4();
758
759        /* Now the selectors. */
760        sendMTFValues5(nGroups, nSelectors);
761
762        /* Now the coding tables. */
763        sendMTFValues6(nGroups, alphaSize);
764
765        /* And finally, the block data proper */
766        sendMTFValues7();
767    }
768
769    private void sendMTFValues0(final int nGroups, final int alphaSize) {
770        final byte[][] len = this.data.sendMTFValues_len;
771        final int[] mtfFreq = this.data.mtfFreq;
772
773        int remF = this.nMTF;
774        int gs = 0;
775
776        for (int nPart = nGroups; nPart > 0; nPart--) {
777            final int tFreq = remF / nPart;
778            int ge = gs - 1;
779            int aFreq = 0;
780
781            for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) {
782                aFreq += mtfFreq[++ge];
783            }
784
785            if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) {
786                aFreq -= mtfFreq[ge--];
787            }
788
789            final byte[] len_np = len[nPart - 1];
790            for (int v = alphaSize; --v >= 0;) {
791                if (v >= gs && v <= ge) {
792                    len_np[v] = LESSER_ICOST;
793                } else {
794                    len_np[v] = GREATER_ICOST;
795                }
796            }
797
798            gs = ge + 1;
799            remF -= aFreq;
800        }
801    }
802
803    private int sendMTFValues1(final int nGroups, final int alphaSize) {
804        final Data dataShadow = this.data;
805        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
806        final int[] fave = dataShadow.sendMTFValues_fave;
807        final short[] cost = dataShadow.sendMTFValues_cost;
808        final char[] sfmap = dataShadow.sfmap;
809        final byte[] selector = dataShadow.selector;
810        final byte[][] len = dataShadow.sendMTFValues_len;
811        final byte[] len_0 = len[0];
812        final byte[] len_1 = len[1];
813        final byte[] len_2 = len[2];
814        final byte[] len_3 = len[3];
815        final byte[] len_4 = len[4];
816        final byte[] len_5 = len[5];
817        final int nMTFShadow = this.nMTF;
818
819        int nSelectors = 0;
820
821        for (int iter = 0; iter < N_ITERS; iter++) {
822            for (int t = nGroups; --t >= 0;) {
823                fave[t] = 0;
824                final int[] rfreqt = rfreq[t];
825                for (int i = alphaSize; --i >= 0;) {
826                    rfreqt[i] = 0;
827                }
828            }
829
830            nSelectors = 0;
831
832            for (int gs = 0; gs < this.nMTF;) {
833                // Set group start & end marks.
834
835                // Calculate the cost of this group as coded by each of the
836                // coding tables.
837
838                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
839
840                final byte mask = (byte) 0xff;
841                if (nGroups == N_GROUPS) {
842                    // unrolled version of the else-block
843
844                    short cost0 = 0;
845                    short cost1 = 0;
846                    short cost2 = 0;
847                    short cost3 = 0;
848                    short cost4 = 0;
849                    short cost5 = 0;
850
851                    for (int i = gs; i <= ge; i++) {
852                        final int icv = sfmap[i];
853                        cost0 += (short) (len_0[icv] & mask);
854                        cost1 += (short) (len_1[icv] & mask);
855                        cost2 += (short) (len_2[icv] & mask);
856                        cost3 += (short) (len_3[icv] & mask);
857                        cost4 += (short) (len_4[icv] & mask);
858                        cost5 += (short) (len_5[icv] & mask);
859                    }
860
861                    cost[0] = cost0;
862                    cost[1] = cost1;
863                    cost[2] = cost2;
864                    cost[3] = cost3;
865                    cost[4] = cost4;
866                    cost[5] = cost5;
867
868                } else {
869                    for (int t = nGroups; --t >= 0;) {
870                        cost[t] = 0;
871                    }
872
873                    for (int i = gs; i <= ge; i++) {
874                        final int icv = sfmap[i];
875                        for (int t = nGroups; --t >= 0;) {
876                            cost[t] += (short) (len[t][icv] & mask);
877                        }
878                    }
879                }
880
881                /*
882                 * Find the coding table which is best for this group, and record its identity in the selector table.
883                 */
884                int bt = -1;
885                for (int t = nGroups, bc = 999999999; --t >= 0;) {
886                    final int cost_t = cost[t];
887                    if (cost_t < bc) {
888                        bc = cost_t;
889                        bt = t;
890                    }
891                }
892
893                fave[bt]++;
894                selector[nSelectors] = (byte) bt;
895                nSelectors++;
896
897                /*
898                 * Increment the symbol frequencies for the selected table.
899                 */
900                final int[] rfreq_bt = rfreq[bt];
901                for (int i = gs; i <= ge; i++) {
902                    rfreq_bt[sfmap[i]]++;
903                }
904
905                gs = ge + 1;
906            }
907
908            /*
909             * Recompute the tables based on the accumulated frequencies.
910             */
911            for (int t = 0; t < nGroups; t++) {
912                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
913            }
914        }
915
916        return nSelectors;
917    }
918
919    private void sendMTFValues2(final int nGroups, final int nSelectors) {
920        // assert (nGroups < 8) : nGroups;
921
922        final Data dataShadow = this.data;
923        final byte[] pos = dataShadow.sendMTFValues2_pos;
924
925        for (int i = nGroups; --i >= 0;) {
926            pos[i] = (byte) i;
927        }
928
929        for (int i = 0; i < nSelectors; i++) {
930            final byte ll_i = dataShadow.selector[i];
931            byte tmp = pos[0];
932            int j = 0;
933
934            while (ll_i != tmp) {
935                j++;
936                final byte tmp2 = tmp;
937                tmp = pos[j];
938                pos[j] = tmp2;
939            }
940
941            pos[0] = tmp;
942            dataShadow.selectorMtf[i] = (byte) j;
943        }
944    }
945
946    private void sendMTFValues3(final int nGroups, final int alphaSize) {
947        final int[][] code = this.data.sendMTFValues_code;
948        final byte[][] len = this.data.sendMTFValues_len;
949
950        for (int t = 0; t < nGroups; t++) {
951            int minLen = 32;
952            int maxLen = 0;
953            final byte[] len_t = len[t];
954            for (int i = alphaSize; --i >= 0;) {
955                final int l = len_t[i] & 0xff;
956                if (l > maxLen) {
957                    maxLen = l;
958                }
959                if (l < minLen) {
960                    minLen = l;
961                }
962            }
963
964            // assert (maxLen <= 20) : maxLen;
965            // assert (minLen >= 1) : minLen;
966
967            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
968        }
969    }
970
971    private void sendMTFValues4() throws IOException {
972        final boolean[] inUse = this.data.inUse;
973        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
974
975        for (int i = 16; --i >= 0;) {
976            inUse16[i] = false;
977            final int i16 = i * 16;
978            for (int j = 16; --j >= 0;) {
979                if (inUse[i16 + j]) {
980                    inUse16[i] = true;
981                    break;
982                }
983            }
984        }
985
986        for (int i = 0; i < 16; i++) {
987            bsW(1, inUse16[i] ? 1 : 0);
988        }
989
990        final OutputStream outShadow = this.out;
991        int bsLiveShadow = this.bsLive;
992        int bsBuffShadow = this.bsBuff;
993
994        for (int i = 0; i < 16; i++) {
995            if (inUse16[i]) {
996                final int i16 = i * 16;
997                for (int j = 0; j < 16; j++) {
998                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
999                    while (bsLiveShadow >= 8) {
1000                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1001                        bsBuffShadow <<= 8;
1002                        bsLiveShadow -= 8;
1003                    }
1004                    if (inUse[i16 + j]) {
1005                        bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1006                    }
1007                    bsLiveShadow++;
1008                }
1009            }
1010        }
1011
1012        this.bsBuff = bsBuffShadow;
1013        this.bsLive = bsLiveShadow;
1014    }
1015
1016    private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException {
1017        bsW(3, nGroups);
1018        bsW(15, nSelectors);
1019
1020        final OutputStream outShadow = this.out;
1021        final byte[] selectorMtf = this.data.selectorMtf;
1022
1023        int bsLiveShadow = this.bsLive;
1024        int bsBuffShadow = this.bsBuff;
1025
1026        for (int i = 0; i < nSelectors; i++) {
1027            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1028                // inlined: bsW(1, 1);
1029                while (bsLiveShadow >= 8) {
1030                    outShadow.write(bsBuffShadow >> 24);
1031                    bsBuffShadow <<= 8;
1032                    bsLiveShadow -= 8;
1033                }
1034                bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1035                bsLiveShadow++;
1036            }
1037
1038            // inlined: bsW(1, 0);
1039            while (bsLiveShadow >= 8) {
1040                outShadow.write(bsBuffShadow >> 24);
1041                bsBuffShadow <<= 8;
1042                bsLiveShadow -= 8;
1043            }
1044            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1045            bsLiveShadow++;
1046        }
1047
1048        this.bsBuff = bsBuffShadow;
1049        this.bsLive = bsLiveShadow;
1050    }
1051
1052    private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException {
1053        final byte[][] len = this.data.sendMTFValues_len;
1054        final OutputStream outShadow = this.out;
1055
1056        int bsLiveShadow = this.bsLive;
1057        int bsBuffShadow = this.bsBuff;
1058
1059        for (int t = 0; t < nGroups; t++) {
1060            final byte[] len_t = len[t];
1061            int curr = len_t[0] & 0xff;
1062
1063            // inlined: bsW(5, curr);
1064            while (bsLiveShadow >= 8) {
1065                outShadow.write(bsBuffShadow >> 24); // write 8-bit
1066                bsBuffShadow <<= 8;
1067                bsLiveShadow -= 8;
1068            }
1069            bsBuffShadow |= curr << 32 - bsLiveShadow - 5;
1070            bsLiveShadow += 5;
1071
1072            for (int i = 0; i < alphaSize; i++) {
1073                final int lti = len_t[i] & 0xff;
1074                while (curr < lti) {
1075                    // inlined: bsW(2, 2);
1076                    while (bsLiveShadow >= 8) {
1077                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1078                        bsBuffShadow <<= 8;
1079                        bsLiveShadow -= 8;
1080                    }
1081                    bsBuffShadow |= 2 << 32 - bsLiveShadow - 2;
1082                    bsLiveShadow += 2;
1083
1084                    curr++; /* 10 */
1085                }
1086
1087                while (curr > lti) {
1088                    // inlined: bsW(2, 3);
1089                    while (bsLiveShadow >= 8) {
1090                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1091                        bsBuffShadow <<= 8;
1092                        bsLiveShadow -= 8;
1093                    }
1094                    bsBuffShadow |= 3 << 32 - bsLiveShadow - 2;
1095                    bsLiveShadow += 2;
1096
1097                    curr--; /* 11 */
1098                }
1099
1100                // inlined: bsW(1, 0);
1101                while (bsLiveShadow >= 8) {
1102                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1103                    bsBuffShadow <<= 8;
1104                    bsLiveShadow -= 8;
1105                }
1106                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1107                bsLiveShadow++;
1108            }
1109        }
1110
1111        this.bsBuff = bsBuffShadow;
1112        this.bsLive = bsLiveShadow;
1113    }
1114
1115    private void sendMTFValues7() throws IOException {
1116        final Data dataShadow = this.data;
1117        final byte[][] len = dataShadow.sendMTFValues_len;
1118        final int[][] code = dataShadow.sendMTFValues_code;
1119        final OutputStream outShadow = this.out;
1120        final byte[] selector = dataShadow.selector;
1121        final char[] sfmap = dataShadow.sfmap;
1122        final int nMTFShadow = this.nMTF;
1123
1124        int selCtr = 0;
1125
1126        int bsLiveShadow = this.bsLive;
1127        int bsBuffShadow = this.bsBuff;
1128
1129        for (int gs = 0; gs < nMTFShadow;) {
1130            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1131            final int selector_selCtr = selector[selCtr] & 0xff;
1132            final int[] code_selCtr = code[selector_selCtr];
1133            final byte[] len_selCtr = len[selector_selCtr];
1134
1135            while (gs <= ge) {
1136                final int sfmap_i = sfmap[gs];
1137
1138                //
1139                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1140                // code_selCtr[sfmap_i]);
1141                //
1142                while (bsLiveShadow >= 8) {
1143                    outShadow.write(bsBuffShadow >> 24);
1144                    bsBuffShadow <<= 8;
1145                    bsLiveShadow -= 8;
1146                }
1147                final int n = len_selCtr[sfmap_i] & 0xFF;
1148                bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n;
1149                bsLiveShadow += n;
1150
1151                gs++;
1152            }
1153
1154            gs = ge + 1;
1155            selCtr++;
1156        }
1157
1158        this.bsBuff = bsBuffShadow;
1159        this.bsLive = bsLiveShadow;
1160    }
1161
1162    @Override
1163    public void write(final byte[] buf, int offs, final int len) throws IOException {
1164        if (offs < 0) {
1165            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
1166        }
1167        if (len < 0) {
1168            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
1169        }
1170        if (offs + len > buf.length) {
1171            throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ").");
1172        }
1173        checkClosed();
1174        for (final int hi = offs + len; offs < hi;) {
1175            write0(buf[offs++]);
1176        }
1177    }
1178
1179    @Override
1180    public void write(final int b) throws IOException {
1181        checkClosed();
1182        write0(b);
1183    }
1184
1185    /**
1186     * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm.
1187     */
1188    private void write0(int b) throws IOException {
1189        if (this.currentChar != -1) {
1190            b &= 0xff;
1191            if (this.currentChar == b) {
1192                if (++this.runLength > 254) {
1193                    writeRun();
1194                    this.currentChar = -1;
1195                    this.runLength = 0;
1196                }
1197                // else nothing to do
1198            } else {
1199                writeRun();
1200                this.runLength = 1;
1201                this.currentChar = b;
1202            }
1203        } else {
1204            this.currentChar = b & 0xff;
1205            this.runLength++;
1206        }
1207    }
1208
1209    /**
1210     * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four
1211     * identical bytes).
1212     *
1213     * <p>
1214     * Flushes the current block before writing data if it is full.
1215     * </p>
1216     *
1217     * <p>
1218     * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last
1219     * to point to the last index written minus 1.
1220     * </p>
1221     */
1222    private void writeRun() throws IOException {
1223        final int lastShadow = this.last;
1224
1225        if (lastShadow < this.allowableBlockSize) {
1226            final int currentCharShadow = this.currentChar;
1227            final Data dataShadow = this.data;
1228            dataShadow.inUse[currentCharShadow] = true;
1229            final byte ch = (byte) currentCharShadow;
1230
1231            int runLengthShadow = this.runLength;
1232            this.crc.update(currentCharShadow, runLengthShadow);
1233
1234            switch (runLengthShadow) {
1235            case 1:
1236                dataShadow.block[lastShadow + 2] = ch;
1237                this.last = lastShadow + 1;
1238                break;
1239
1240            case 2:
1241                dataShadow.block[lastShadow + 2] = ch;
1242                dataShadow.block[lastShadow + 3] = ch;
1243                this.last = lastShadow + 2;
1244                break;
1245
1246            case 3: {
1247                final byte[] block = dataShadow.block;
1248                block[lastShadow + 2] = ch;
1249                block[lastShadow + 3] = ch;
1250                block[lastShadow + 4] = ch;
1251                this.last = lastShadow + 3;
1252            }
1253                break;
1254
1255            default: {
1256                runLengthShadow -= 4;
1257                dataShadow.inUse[runLengthShadow] = true;
1258                final byte[] block = dataShadow.block;
1259                block[lastShadow + 2] = ch;
1260                block[lastShadow + 3] = ch;
1261                block[lastShadow + 4] = ch;
1262                block[lastShadow + 5] = ch;
1263                block[lastShadow + 6] = (byte) runLengthShadow;
1264                this.last = lastShadow + 5;
1265            }
1266                break;
1267
1268            }
1269        } else {
1270            endBlock();
1271            initBlock();
1272            writeRun();
1273        }
1274    }
1275
1276}