001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.bzip2;
020
021import java.io.IOException;
022import java.io.OutputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026
027/**
028 * An output stream that compresses into the BZip2 format into another stream.
029 *
030 * <p>
031 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force
032 * {@code BZip2CompressorOutputStream} to release the allocated memory.
033 * </p>
034 *
035 * <p>
036 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower
037 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input.
038 * </p>
039 *
040 * <p>
041 * You can compute the memory usage for compressing by the following formula:
042 * </p>
043 *
044 * <pre>
045 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
046 * </pre>
047 *
048 * <p>
049 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use
050 * </p>
051 *
052 * <pre>
053 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
054 * </pre>
055 *
056 * <table style="width:100%" border="1">
057 * <caption>Memory usage by blocksize</caption>
058 * <tr>
059 * <th colspan="3">Memory usage by blocksize</th>
060 * </tr>
061 * <tr>
062 * <th style="text-align: right">Blocksize</th>
063 * <th style="text-align: right">Compression<br>
064 * memory usage</th>
065 * <th style="text-align: right">Decompression<br>
066 * memory usage</th>
067 * </tr>
068 * <tr>
069 * <td style="text-align: right">100k</td>
070 * <td style="text-align: right">1300k</td>
071 * <td style="text-align: right">565k</td>
072 * </tr>
073 * <tr>
074 * <td style="text-align: right">200k</td>
075 * <td style="text-align: right">2200k</td>
076 * <td style="text-align: right">1065k</td>
077 * </tr>
078 * <tr>
079 * <td style="text-align: right">300k</td>
080 * <td style="text-align: right">3100k</td>
081 * <td style="text-align: right">1565k</td>
082 * </tr>
083 * <tr>
084 * <td style="text-align: right">400k</td>
085 * <td style="text-align: right">4000k</td>
086 * <td style="text-align: right">2065k</td>
087 * </tr>
088 * <tr>
089 * <td style="text-align: right">500k</td>
090 * <td style="text-align: right">4900k</td>
091 * <td style="text-align: right">2565k</td>
092 * </tr>
093 * <tr>
094 * <td style="text-align: right">600k</td>
095 * <td style="text-align: right">5800k</td>
096 * <td style="text-align: right">3065k</td>
097 * </tr>
098 * <tr>
099 * <td style="text-align: right">700k</td>
100 * <td style="text-align: right">6700k</td>
101 * <td style="text-align: right">3565k</td>
102 * </tr>
103 * <tr>
104 * <td style="text-align: right">800k</td>
105 * <td style="text-align: right">7600k</td>
106 * <td style="text-align: right">4065k</td>
107 * </tr>
108 * <tr>
109 * <td style="text-align: right">900k</td>
110 * <td style="text-align: right">8500k</td>
111 * <td style="text-align: right">4565k</td>
112 * </tr>
113 * </table>
114 *
115 * <p>
116 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block.
117 * </p>
118 *
119 * <p>
120 * Instances of this class are not threadsafe.
121 * </p>
122 *
123 * <p>
124 * TODO: Update to BZip2 1.0.1
125 * </p>
126 *
127 * @NotThreadSafe
128 */
129public class BZip2CompressorOutputStream extends CompressorOutputStream implements BZip2Constants {
130
131    static final class Data {
132
133        // with blockSize 900k
134        /* maps unsigned byte => "does it occur in block" */
135        final boolean[] inUse = new boolean[256]; // 256 byte
136        final byte[] unseqToSeq = new byte[256]; // 256 byte
137        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
138        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
139        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
140
141        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
142        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
143        // byte
144        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
145        // byte
146        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
147        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
148        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
149        // byte
150        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
151        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
152
153        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
154        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
155        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
156
157        // ------------
158        // 333408 byte
159
160        /*
161         * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0.
162         */
163        final byte[] block; // 900021 byte
164        /*
165         * maps index in Burrows-Wheeler transformed block => index of byte in original block
166         */
167        final int[] fmap; // 3600000 byte
168        final char[] sfmap; // 3600000 byte
169        // ------------
170        // 8433529 byte
171        // ============
172
173        /**
174         * Index of original line in Burrows-Wheeler table.
175         *
176         * <p>
177         * This is the index in fmap that points to the last byte of the original data.
178         * </p>
179         */
180        int origPtr;
181
182        Data(final int blockSize100k) {
183            final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
184            this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES];
185            this.fmap = new int[n];
186            this.sfmap = new char[2 * n];
187        }
188
189    }
190
191    /**
192     * The minimum supported blocksize {@code  == 1}.
193     */
194    public static final int MIN_BLOCKSIZE = 1;
195
196    /**
197     * The maximum supported blocksize {@code  == 9}.
198     */
199    public static final int MAX_BLOCKSIZE = 9;
200    private static final int GREATER_ICOST = 15;
201
202    private static final int LESSER_ICOST = 0;
203
204    /**
205     * Chooses a blocksize based on the given length of the data to compress.
206     *
207     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns
208     *         {@code MAX_BLOCKSIZE} always.
209     *
210     * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}.
211     */
212    public static int chooseBlockSize(final long inputLength) {
213        return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE;
214    }
215
216    private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) {
217        int vec = 0;
218        for (int n = minLen; n <= maxLen; n++) {
219            for (int i = 0; i < alphaSize; i++) {
220                if ((length[i] & 0xff) == n) {
221                    code[i] = vec;
222                    vec++;
223                }
224            }
225            vec <<= 1;
226        }
227    }
228
229    private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) {
230        /*
231         * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel.
232         */
233        final int[] heap = dat.heap;
234        final int[] weight = dat.weight;
235        final int[] parent = dat.parent;
236
237        for (int i = alphaSize; --i >= 0;) {
238            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
239        }
240
241        for (boolean tooLong = true; tooLong;) {
242            tooLong = false;
243
244            int nNodes = alphaSize;
245            int nHeap = 0;
246            heap[0] = 0;
247            weight[0] = 0;
248            parent[0] = -2;
249
250            for (int i = 1; i <= alphaSize; i++) {
251                parent[i] = -1;
252                nHeap++;
253                heap[nHeap] = i;
254
255                int zz = nHeap;
256                final int tmp = heap[zz];
257                while (weight[tmp] < weight[heap[zz >> 1]]) {
258                    heap[zz] = heap[zz >> 1];
259                    zz >>= 1;
260                }
261                heap[zz] = tmp;
262            }
263
264            while (nHeap > 1) {
265                final int n1 = heap[1];
266                heap[1] = heap[nHeap];
267                nHeap--;
268
269                int yy = 0;
270                int zz = 1;
271                int tmp = heap[1];
272
273                while (true) {
274                    yy = zz << 1;
275
276                    if (yy > nHeap) {
277                        break;
278                    }
279
280                    if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
281                        yy++;
282                    }
283
284                    if (weight[tmp] < weight[heap[yy]]) {
285                        break;
286                    }
287
288                    heap[zz] = heap[yy];
289                    zz = yy;
290                }
291
292                heap[zz] = tmp;
293
294                final int n2 = heap[1];
295                heap[1] = heap[nHeap];
296                nHeap--;
297
298                yy = 0;
299                zz = 1;
300                tmp = heap[1];
301
302                while (true) {
303                    yy = zz << 1;
304
305                    if (yy > nHeap) {
306                        break;
307                    }
308
309                    if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
310                        yy++;
311                    }
312
313                    if (weight[tmp] < weight[heap[yy]]) {
314                        break;
315                    }
316
317                    heap[zz] = heap[yy];
318                    zz = yy;
319                }
320
321                heap[zz] = tmp;
322                nNodes++;
323                parent[n1] = parent[n2] = nNodes;
324
325                final int weight_n1 = weight[n1];
326                final int weight_n2 = weight[n2];
327                weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff);
328
329                parent[nNodes] = -1;
330                nHeap++;
331                heap[nHeap] = nNodes;
332
333                tmp = 0;
334                zz = nHeap;
335                tmp = heap[zz];
336                final int weight_tmp = weight[tmp];
337                while (weight_tmp < weight[heap[zz >> 1]]) {
338                    heap[zz] = heap[zz >> 1];
339                    zz >>= 1;
340                }
341                heap[zz] = tmp;
342
343            }
344
345            for (int i = 1; i <= alphaSize; i++) {
346                int j = 0;
347                int k = i;
348
349                for (int parent_k; (parent_k = parent[k]) >= 0;) {
350                    k = parent_k;
351                    j++;
352                }
353
354                len[i - 1] = (byte) j;
355                if (j > maxLen) {
356                    tooLong = true;
357                }
358            }
359
360            if (tooLong) {
361                for (int i = 1; i < alphaSize; i++) {
362                    int j = weight[i] >> 8;
363                    j = 1 + (j >> 1);
364                    weight[i] = j << 8;
365                }
366            }
367        }
368    }
369
370    /**
371     * Index of the last char in the block, so the block size == last + 1.
372     */
373    private int last;
374    /**
375     * Always: in the range 0 .. 9. The current block size is 100000 * this number.
376     */
377    private final int blockSize100k;
378
379    private int bsBuff;
380
381    private int bsLive;
382
383    private final CRC crc = new CRC();
384    private int nInUse;
385
386    private int nMTF;
387    private int currentChar = -1;
388    private int runLength;
389
390    private int combinedCRC;
391
392    private final int allowableBlockSize;
393    /**
394     * All memory intensive stuff.
395     */
396    private Data data;
397
398    private BlockSort blockSorter;
399
400    private OutputStream out;
401
402    private volatile boolean closed;
403
404    /**
405     * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
406     *
407     * @param out the destination stream.
408     *
409     * @throws IOException          if an I/O error occurs in the specified stream.
410     * @throws NullPointerException if {@code out == null}.
411     */
412    public BZip2CompressorOutputStream(final OutputStream out) throws IOException {
413        this(out, MAX_BLOCKSIZE);
414    }
415
416    /**
417     * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
418     *
419     * @param out       the destination stream.
420     * @param blockSize the blockSize as 100k units.
421     *
422     * @throws IOException              if an I/O error occurs in the specified stream.
423     * @throws IllegalArgumentException if {@code (blockSize &lt; 1) || (blockSize &gt; 9)}.
424     * @throws NullPointerException     if {@code out == null}.
425     *
426     * @see #MIN_BLOCKSIZE
427     * @see #MAX_BLOCKSIZE
428     */
429    public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
430        if (blockSize < 1) {
431            throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
432        }
433        if (blockSize > 9) {
434            throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
435        }
436
437        this.blockSize100k = blockSize;
438        this.out = out;
439
440        /* 20 is just a paranoia constant */
441        this.allowableBlockSize = this.blockSize100k * BZip2Constants.BASEBLOCKSIZE - 20;
442        init();
443    }
444
445    private void blockSort() {
446        blockSorter.blockSort(data, last);
447    }
448
449    private void bsFinishedWithStream() throws IOException {
450        while (this.bsLive > 0) {
451            final int ch = this.bsBuff >> 24;
452            this.out.write(ch); // write 8-bit
453            this.bsBuff <<= 8;
454            this.bsLive -= 8;
455        }
456    }
457
458    private void bsPutInt(final int u) throws IOException {
459        bsW(8, u >> 24 & 0xff);
460        bsW(8, u >> 16 & 0xff);
461        bsW(8, u >> 8 & 0xff);
462        bsW(8, u & 0xff);
463    }
464
465    private void bsPutUByte(final int c) throws IOException {
466        bsW(8, c);
467    }
468
469    private void bsW(final int n, final int v) throws IOException {
470        final OutputStream outShadow = this.out;
471        int bsLiveShadow = this.bsLive;
472        int bsBuffShadow = this.bsBuff;
473
474        while (bsLiveShadow >= 8) {
475            outShadow.write(bsBuffShadow >> 24); // write 8-bit
476            bsBuffShadow <<= 8;
477            bsLiveShadow -= 8;
478        }
479
480        this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n;
481        this.bsLive = bsLiveShadow + n;
482    }
483
484    @Override
485    public void close() throws IOException {
486        if (!closed) {
487            try (OutputStream outShadow = this.out) {
488                finish();
489            }
490        }
491    }
492
493    private void endBlock() throws IOException {
494        final int blockCRC = this.crc.getValue();
495        this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31;
496        this.combinedCRC ^= blockCRC;
497
498        // empty block at end of file
499        if (this.last == -1) {
500            return;
501        }
502
503        /* sort the block and establish posn of original string */
504        blockSort();
505
506        /*
507         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the
508         * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits,
509         * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal
510         * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files.
511         */
512        bsPutUByte(0x31);
513        bsPutUByte(0x41);
514        bsPutUByte(0x59);
515        bsPutUByte(0x26);
516        bsPutUByte(0x53);
517        bsPutUByte(0x59);
518
519        /* Now the block's CRC, so it is in a known place. */
520        bsPutInt(blockCRC);
521
522        /* Now a single bit indicating no randomisation. */
523        bsW(1, 0);
524
525        /* Finally, block's contents proper. */
526        moveToFrontCodeAndSend();
527    }
528
529    private void endCompression() throws IOException {
530        /*
531         * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it
532         * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.)
533         */
534        bsPutUByte(0x17);
535        bsPutUByte(0x72);
536        bsPutUByte(0x45);
537        bsPutUByte(0x38);
538        bsPutUByte(0x50);
539        bsPutUByte(0x90);
540
541        bsPutInt(this.combinedCRC);
542        bsFinishedWithStream();
543    }
544
545    public void finish() throws IOException {
546        if (!closed) {
547            closed = true;
548            try {
549                if (this.runLength > 0) {
550                    writeRun();
551                }
552                this.currentChar = -1;
553                endBlock();
554                endCompression();
555            } finally {
556                this.out = null;
557                this.blockSorter = null;
558                this.data = null;
559            }
560        }
561    }
562
563    @Override
564    public void flush() throws IOException {
565        final OutputStream outShadow = this.out;
566        if (outShadow != null) {
567            outShadow.flush();
568        }
569    }
570
571    /*
572     * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form.
573     *
574     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
575     */
576    private void generateMTFValues() {
577        final int lastShadow = this.last;
578        final Data dataShadow = this.data;
579        final boolean[] inUse = dataShadow.inUse;
580        final byte[] block = dataShadow.block;
581        final int[] fmap = dataShadow.fmap;
582        final char[] sfmap = dataShadow.sfmap;
583        final int[] mtfFreq = dataShadow.mtfFreq;
584        final byte[] unseqToSeq = dataShadow.unseqToSeq;
585        final byte[] yy = dataShadow.generateMTFValues_yy;
586
587        // make maps
588        int nInUseShadow = 0;
589        for (int i = 0; i < 256; i++) {
590            if (inUse[i]) {
591                unseqToSeq[i] = (byte) nInUseShadow;
592                nInUseShadow++;
593            }
594        }
595        this.nInUse = nInUseShadow;
596
597        final int eob = nInUseShadow + 1;
598
599        Arrays.fill(mtfFreq, 0, eob + 1, 0);
600
601        for (int i = nInUseShadow; --i >= 0;) {
602            yy[i] = (byte) i;
603        }
604
605        int wr = 0;
606        int zPend = 0;
607
608        for (int i = 0; i <= lastShadow; i++) {
609            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
610            byte tmp = yy[0];
611            int j = 0;
612
613            while (ll_i != tmp) {
614                j++;
615                final byte tmp2 = tmp;
616                tmp = yy[j];
617                yy[j] = tmp2;
618            }
619            yy[0] = tmp;
620
621            if (j == 0) {
622                zPend++;
623            } else {
624                if (zPend > 0) {
625                    zPend--;
626                    while (true) {
627                        if ((zPend & 1) == 0) {
628                            sfmap[wr] = RUNA;
629                            wr++;
630                            mtfFreq[RUNA]++;
631                        } else {
632                            sfmap[wr] = RUNB;
633                            wr++;
634                            mtfFreq[RUNB]++;
635                        }
636
637                        if (zPend < 2) {
638                            break;
639                        }
640                        zPend = zPend - 2 >> 1;
641                    }
642                    zPend = 0;
643                }
644                sfmap[wr] = (char) (j + 1);
645                wr++;
646                mtfFreq[j + 1]++;
647            }
648        }
649
650        if (zPend > 0) {
651            zPend--;
652            while (true) {
653                if ((zPend & 1) == 0) {
654                    sfmap[wr] = RUNA;
655                    wr++;
656                    mtfFreq[RUNA]++;
657                } else {
658                    sfmap[wr] = RUNB;
659                    wr++;
660                    mtfFreq[RUNB]++;
661                }
662
663                if (zPend < 2) {
664                    break;
665                }
666                zPend = zPend - 2 >> 1;
667            }
668        }
669
670        sfmap[wr] = (char) eob;
671        mtfFreq[eob]++;
672        this.nMTF = wr + 1;
673    }
674
675    /**
676     * Returns the blocksize parameter specified at construction time.
677     *
678     * @return the blocksize parameter specified at construction time
679     */
680    public final int getBlockSize() {
681        return this.blockSize100k;
682    }
683
684    /**
685     * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating
686     * blockSize100k.
687     *
688     * @throws IOException if the magic bytes could not been written
689     */
690    private void init() throws IOException {
691        bsPutUByte('B');
692        bsPutUByte('Z');
693
694        this.data = new Data(this.blockSize100k);
695        this.blockSorter = new BlockSort(this.data);
696
697        // huffmanized magic bytes
698        bsPutUByte('h');
699        bsPutUByte('0' + this.blockSize100k);
700
701        this.combinedCRC = 0;
702        initBlock();
703    }
704
705    private void initBlock() {
706        // blockNo++;
707        this.crc.reset();
708        this.last = -1;
709        // ch = 0;
710
711        final boolean[] inUse = this.data.inUse;
712        for (int i = 256; --i >= 0;) {
713            inUse[i] = false;
714        }
715
716    }
717
718    private void moveToFrontCodeAndSend() throws IOException {
719        bsW(24, this.data.origPtr);
720        generateMTFValues();
721        sendMTFValues();
722    }
723
724    private void sendMTFValues() throws IOException {
725        final byte[][] len = this.data.sendMTFValues_len;
726        final int alphaSize = this.nInUse + 2;
727
728        for (int t = N_GROUPS; --t >= 0;) {
729            final byte[] len_t = len[t];
730            for (int v = alphaSize; --v >= 0;) {
731                len_t[v] = GREATER_ICOST;
732            }
733        }
734
735        /* Decide how many coding tables to use */
736        // assert (this.nMTF > 0) : this.nMTF;
737        final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6;
738
739        /* Generate an initial set of coding tables */
740        sendMTFValues0(nGroups, alphaSize);
741
742        /*
743         * Iterate up to N_ITERS times to improve the tables.
744         */
745        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
746
747        /* Compute MTF values for the selectors. */
748        sendMTFValues2(nGroups, nSelectors);
749
750        /* Assign actual codes for the tables. */
751        sendMTFValues3(nGroups, alphaSize);
752
753        /* Transmit the mapping table. */
754        sendMTFValues4();
755
756        /* Now the selectors. */
757        sendMTFValues5(nGroups, nSelectors);
758
759        /* Now the coding tables. */
760        sendMTFValues6(nGroups, alphaSize);
761
762        /* And finally, the block data proper */
763        sendMTFValues7();
764    }
765
766    private void sendMTFValues0(final int nGroups, final int alphaSize) {
767        final byte[][] len = this.data.sendMTFValues_len;
768        final int[] mtfFreq = this.data.mtfFreq;
769
770        int remF = this.nMTF;
771        int gs = 0;
772
773        for (int nPart = nGroups; nPart > 0; nPart--) {
774            final int tFreq = remF / nPart;
775            int ge = gs - 1;
776            int aFreq = 0;
777
778            for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) {
779                aFreq += mtfFreq[++ge];
780            }
781
782            if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) {
783                aFreq -= mtfFreq[ge--];
784            }
785
786            final byte[] len_np = len[nPart - 1];
787            for (int v = alphaSize; --v >= 0;) {
788                if (v >= gs && v <= ge) {
789                    len_np[v] = LESSER_ICOST;
790                } else {
791                    len_np[v] = GREATER_ICOST;
792                }
793            }
794
795            gs = ge + 1;
796            remF -= aFreq;
797        }
798    }
799
800    private int sendMTFValues1(final int nGroups, final int alphaSize) {
801        final Data dataShadow = this.data;
802        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
803        final int[] fave = dataShadow.sendMTFValues_fave;
804        final short[] cost = dataShadow.sendMTFValues_cost;
805        final char[] sfmap = dataShadow.sfmap;
806        final byte[] selector = dataShadow.selector;
807        final byte[][] len = dataShadow.sendMTFValues_len;
808        final byte[] len_0 = len[0];
809        final byte[] len_1 = len[1];
810        final byte[] len_2 = len[2];
811        final byte[] len_3 = len[3];
812        final byte[] len_4 = len[4];
813        final byte[] len_5 = len[5];
814        final int nMTFShadow = this.nMTF;
815
816        int nSelectors = 0;
817
818        for (int iter = 0; iter < N_ITERS; iter++) {
819            for (int t = nGroups; --t >= 0;) {
820                fave[t] = 0;
821                final int[] rfreqt = rfreq[t];
822                for (int i = alphaSize; --i >= 0;) {
823                    rfreqt[i] = 0;
824                }
825            }
826
827            nSelectors = 0;
828
829            for (int gs = 0; gs < this.nMTF;) {
830                // Set group start & end marks.
831
832                // Calculate the cost of this group as coded by each of the
833                // coding tables.
834
835                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
836
837                final byte mask = (byte) 0xff;
838                if (nGroups == N_GROUPS) {
839                    // unrolled version of the else-block
840
841                    short cost0 = 0;
842                    short cost1 = 0;
843                    short cost2 = 0;
844                    short cost3 = 0;
845                    short cost4 = 0;
846                    short cost5 = 0;
847
848                    for (int i = gs; i <= ge; i++) {
849                        final int icv = sfmap[i];
850                        cost0 += (short) (len_0[icv] & mask);
851                        cost1 += (short) (len_1[icv] & mask);
852                        cost2 += (short) (len_2[icv] & mask);
853                        cost3 += (short) (len_3[icv] & mask);
854                        cost4 += (short) (len_4[icv] & mask);
855                        cost5 += (short) (len_5[icv] & mask);
856                    }
857
858                    cost[0] = cost0;
859                    cost[1] = cost1;
860                    cost[2] = cost2;
861                    cost[3] = cost3;
862                    cost[4] = cost4;
863                    cost[5] = cost5;
864
865                } else {
866                    for (int t = nGroups; --t >= 0;) {
867                        cost[t] = 0;
868                    }
869
870                    for (int i = gs; i <= ge; i++) {
871                        final int icv = sfmap[i];
872                        for (int t = nGroups; --t >= 0;) {
873                            cost[t] += (short) (len[t][icv] & mask);
874                        }
875                    }
876                }
877
878                /*
879                 * Find the coding table which is best for this group, and record its identity in the selector table.
880                 */
881                int bt = -1;
882                for (int t = nGroups, bc = 999999999; --t >= 0;) {
883                    final int cost_t = cost[t];
884                    if (cost_t < bc) {
885                        bc = cost_t;
886                        bt = t;
887                    }
888                }
889
890                fave[bt]++;
891                selector[nSelectors] = (byte) bt;
892                nSelectors++;
893
894                /*
895                 * Increment the symbol frequencies for the selected table.
896                 */
897                final int[] rfreq_bt = rfreq[bt];
898                for (int i = gs; i <= ge; i++) {
899                    rfreq_bt[sfmap[i]]++;
900                }
901
902                gs = ge + 1;
903            }
904
905            /*
906             * Recompute the tables based on the accumulated frequencies.
907             */
908            for (int t = 0; t < nGroups; t++) {
909                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
910            }
911        }
912
913        return nSelectors;
914    }
915
916    private void sendMTFValues2(final int nGroups, final int nSelectors) {
917        // assert (nGroups < 8) : nGroups;
918
919        final Data dataShadow = this.data;
920        final byte[] pos = dataShadow.sendMTFValues2_pos;
921
922        for (int i = nGroups; --i >= 0;) {
923            pos[i] = (byte) i;
924        }
925
926        for (int i = 0; i < nSelectors; i++) {
927            final byte ll_i = dataShadow.selector[i];
928            byte tmp = pos[0];
929            int j = 0;
930
931            while (ll_i != tmp) {
932                j++;
933                final byte tmp2 = tmp;
934                tmp = pos[j];
935                pos[j] = tmp2;
936            }
937
938            pos[0] = tmp;
939            dataShadow.selectorMtf[i] = (byte) j;
940        }
941    }
942
943    private void sendMTFValues3(final int nGroups, final int alphaSize) {
944        final int[][] code = this.data.sendMTFValues_code;
945        final byte[][] len = this.data.sendMTFValues_len;
946
947        for (int t = 0; t < nGroups; t++) {
948            int minLen = 32;
949            int maxLen = 0;
950            final byte[] len_t = len[t];
951            for (int i = alphaSize; --i >= 0;) {
952                final int l = len_t[i] & 0xff;
953                if (l > maxLen) {
954                    maxLen = l;
955                }
956                if (l < minLen) {
957                    minLen = l;
958                }
959            }
960
961            // assert (maxLen <= 20) : maxLen;
962            // assert (minLen >= 1) : minLen;
963
964            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
965        }
966    }
967
968    private void sendMTFValues4() throws IOException {
969        final boolean[] inUse = this.data.inUse;
970        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
971
972        for (int i = 16; --i >= 0;) {
973            inUse16[i] = false;
974            final int i16 = i * 16;
975            for (int j = 16; --j >= 0;) {
976                if (inUse[i16 + j]) {
977                    inUse16[i] = true;
978                    break;
979                }
980            }
981        }
982
983        for (int i = 0; i < 16; i++) {
984            bsW(1, inUse16[i] ? 1 : 0);
985        }
986
987        final OutputStream outShadow = this.out;
988        int bsLiveShadow = this.bsLive;
989        int bsBuffShadow = this.bsBuff;
990
991        for (int i = 0; i < 16; i++) {
992            if (inUse16[i]) {
993                final int i16 = i * 16;
994                for (int j = 0; j < 16; j++) {
995                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
996                    while (bsLiveShadow >= 8) {
997                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
998                        bsBuffShadow <<= 8;
999                        bsLiveShadow -= 8;
1000                    }
1001                    if (inUse[i16 + j]) {
1002                        bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1003                    }
1004                    bsLiveShadow++;
1005                }
1006            }
1007        }
1008
1009        this.bsBuff = bsBuffShadow;
1010        this.bsLive = bsLiveShadow;
1011    }
1012
1013    private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException {
1014        bsW(3, nGroups);
1015        bsW(15, nSelectors);
1016
1017        final OutputStream outShadow = this.out;
1018        final byte[] selectorMtf = this.data.selectorMtf;
1019
1020        int bsLiveShadow = this.bsLive;
1021        int bsBuffShadow = this.bsBuff;
1022
1023        for (int i = 0; i < nSelectors; i++) {
1024            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1025                // inlined: bsW(1, 1);
1026                while (bsLiveShadow >= 8) {
1027                    outShadow.write(bsBuffShadow >> 24);
1028                    bsBuffShadow <<= 8;
1029                    bsLiveShadow -= 8;
1030                }
1031                bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1032                bsLiveShadow++;
1033            }
1034
1035            // inlined: bsW(1, 0);
1036            while (bsLiveShadow >= 8) {
1037                outShadow.write(bsBuffShadow >> 24);
1038                bsBuffShadow <<= 8;
1039                bsLiveShadow -= 8;
1040            }
1041            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1042            bsLiveShadow++;
1043        }
1044
1045        this.bsBuff = bsBuffShadow;
1046        this.bsLive = bsLiveShadow;
1047    }
1048
1049    private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException {
1050        final byte[][] len = this.data.sendMTFValues_len;
1051        final OutputStream outShadow = this.out;
1052
1053        int bsLiveShadow = this.bsLive;
1054        int bsBuffShadow = this.bsBuff;
1055
1056        for (int t = 0; t < nGroups; t++) {
1057            final byte[] len_t = len[t];
1058            int curr = len_t[0] & 0xff;
1059
1060            // inlined: bsW(5, curr);
1061            while (bsLiveShadow >= 8) {
1062                outShadow.write(bsBuffShadow >> 24); // write 8-bit
1063                bsBuffShadow <<= 8;
1064                bsLiveShadow -= 8;
1065            }
1066            bsBuffShadow |= curr << 32 - bsLiveShadow - 5;
1067            bsLiveShadow += 5;
1068
1069            for (int i = 0; i < alphaSize; i++) {
1070                final int lti = len_t[i] & 0xff;
1071                while (curr < lti) {
1072                    // inlined: bsW(2, 2);
1073                    while (bsLiveShadow >= 8) {
1074                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1075                        bsBuffShadow <<= 8;
1076                        bsLiveShadow -= 8;
1077                    }
1078                    bsBuffShadow |= 2 << 32 - bsLiveShadow - 2;
1079                    bsLiveShadow += 2;
1080
1081                    curr++; /* 10 */
1082                }
1083
1084                while (curr > lti) {
1085                    // inlined: bsW(2, 3);
1086                    while (bsLiveShadow >= 8) {
1087                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1088                        bsBuffShadow <<= 8;
1089                        bsLiveShadow -= 8;
1090                    }
1091                    bsBuffShadow |= 3 << 32 - bsLiveShadow - 2;
1092                    bsLiveShadow += 2;
1093
1094                    curr--; /* 11 */
1095                }
1096
1097                // inlined: bsW(1, 0);
1098                while (bsLiveShadow >= 8) {
1099                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1100                    bsBuffShadow <<= 8;
1101                    bsLiveShadow -= 8;
1102                }
1103                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1104                bsLiveShadow++;
1105            }
1106        }
1107
1108        this.bsBuff = bsBuffShadow;
1109        this.bsLive = bsLiveShadow;
1110    }
1111
1112    private void sendMTFValues7() throws IOException {
1113        final Data dataShadow = this.data;
1114        final byte[][] len = dataShadow.sendMTFValues_len;
1115        final int[][] code = dataShadow.sendMTFValues_code;
1116        final OutputStream outShadow = this.out;
1117        final byte[] selector = dataShadow.selector;
1118        final char[] sfmap = dataShadow.sfmap;
1119        final int nMTFShadow = this.nMTF;
1120
1121        int selCtr = 0;
1122
1123        int bsLiveShadow = this.bsLive;
1124        int bsBuffShadow = this.bsBuff;
1125
1126        for (int gs = 0; gs < nMTFShadow;) {
1127            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1128            final int selector_selCtr = selector[selCtr] & 0xff;
1129            final int[] code_selCtr = code[selector_selCtr];
1130            final byte[] len_selCtr = len[selector_selCtr];
1131
1132            while (gs <= ge) {
1133                final int sfmap_i = sfmap[gs];
1134
1135                //
1136                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1137                // code_selCtr[sfmap_i]);
1138                //
1139                while (bsLiveShadow >= 8) {
1140                    outShadow.write(bsBuffShadow >> 24);
1141                    bsBuffShadow <<= 8;
1142                    bsLiveShadow -= 8;
1143                }
1144                final int n = len_selCtr[sfmap_i] & 0xFF;
1145                bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n;
1146                bsLiveShadow += n;
1147
1148                gs++;
1149            }
1150
1151            gs = ge + 1;
1152            selCtr++;
1153        }
1154
1155        this.bsBuff = bsBuffShadow;
1156        this.bsLive = bsLiveShadow;
1157    }
1158
1159    @Override
1160    public void write(final byte[] buf, int offs, final int len) throws IOException {
1161        if (offs < 0) {
1162            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
1163        }
1164        if (len < 0) {
1165            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
1166        }
1167        if (offs + len > buf.length) {
1168            throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ").");
1169        }
1170        if (closed) {
1171            throw new IOException("Stream closed");
1172        }
1173
1174        for (final int hi = offs + len; offs < hi;) {
1175            write0(buf[offs++]);
1176        }
1177    }
1178
1179    @Override
1180    public void write(final int b) throws IOException {
1181        if (closed) {
1182            throw new IOException("Closed");
1183        }
1184        write0(b);
1185    }
1186
1187    /**
1188     * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm.
1189     */
1190    private void write0(int b) throws IOException {
1191        if (this.currentChar != -1) {
1192            b &= 0xff;
1193            if (this.currentChar == b) {
1194                if (++this.runLength > 254) {
1195                    writeRun();
1196                    this.currentChar = -1;
1197                    this.runLength = 0;
1198                }
1199                // else nothing to do
1200            } else {
1201                writeRun();
1202                this.runLength = 1;
1203                this.currentChar = b;
1204            }
1205        } else {
1206            this.currentChar = b & 0xff;
1207            this.runLength++;
1208        }
1209    }
1210
1211    /**
1212     * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four
1213     * identical bytes).
1214     *
1215     * <p>
1216     * Flushes the current block before writing data if it is full.
1217     * </p>
1218     *
1219     * <p>
1220     * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last
1221     * to point to the last index written minus 1.
1222     * </p>
1223     */
1224    private void writeRun() throws IOException {
1225        final int lastShadow = this.last;
1226
1227        if (lastShadow < this.allowableBlockSize) {
1228            final int currentCharShadow = this.currentChar;
1229            final Data dataShadow = this.data;
1230            dataShadow.inUse[currentCharShadow] = true;
1231            final byte ch = (byte) currentCharShadow;
1232
1233            int runLengthShadow = this.runLength;
1234            this.crc.update(currentCharShadow, runLengthShadow);
1235
1236            switch (runLengthShadow) {
1237            case 1:
1238                dataShadow.block[lastShadow + 2] = ch;
1239                this.last = lastShadow + 1;
1240                break;
1241
1242            case 2:
1243                dataShadow.block[lastShadow + 2] = ch;
1244                dataShadow.block[lastShadow + 3] = ch;
1245                this.last = lastShadow + 2;
1246                break;
1247
1248            case 3: {
1249                final byte[] block = dataShadow.block;
1250                block[lastShadow + 2] = ch;
1251                block[lastShadow + 3] = ch;
1252                block[lastShadow + 4] = ch;
1253                this.last = lastShadow + 3;
1254            }
1255                break;
1256
1257            default: {
1258                runLengthShadow -= 4;
1259                dataShadow.inUse[runLengthShadow] = true;
1260                final byte[] block = dataShadow.block;
1261                block[lastShadow + 2] = ch;
1262                block[lastShadow + 3] = ch;
1263                block[lastShadow + 4] = ch;
1264                block[lastShadow + 5] = ch;
1265                block[lastShadow + 6] = (byte) runLengthShadow;
1266                this.last = lastShadow + 5;
1267            }
1268                break;
1269
1270            }
1271        } else {
1272            endBlock();
1273            initBlock();
1274            writeRun();
1275        }
1276    }
1277
1278}