View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  
24  import org.apache.commons.compress.compressors.CompressorOutputStream;
25  
26  /**
27   * An output stream that compresses into the BZip2 format into another stream.
28   *
29   * <p>
30   * The compression requires large amounts of memory. Thus you should call the
31   * {@link #close() close()} method as soon as possible, to force
32   * {@code BZip2CompressorOutputStream} to release the allocated memory.
33   * </p>
34   *
35   * <p> You can shrink the amount of allocated memory and maybe raise
36   * the compression speed by choosing a lower blocksize, which in turn
37   * may cause a lower compression ratio. You can avoid unnecessary
38   * memory allocation by avoiding using a blocksize which is bigger
39   * than the size of the input.  </p>
40   *
41   * <p> You can compute the memory usage for compressing by the
42   * following formula: </p>
43   *
44   * <pre>
45   * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
46   * </pre>
47   *
48   * <p> To get the memory required for decompression by {@link
49   * BZip2CompressorInputStream} use </p>
50   *
51   * <pre>
52   * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
53   * </pre>
54   *
55   * <table width="100%" border="1" summary="Memory usage by blocksize">
56   * <tr>
57   * <th colspan="3">Memory usage by blocksize</th>
58   * </tr>
59   * <tr>
60   * <th align="right">Blocksize</th> <th align="right">Compression<br>
61   * memory usage</th> <th align="right">Decompression<br>
62   * memory usage</th>
63   * </tr>
64   * <tr>
65   * <td align="right">100k</td>
66   * <td align="right">1300k</td>
67   * <td align="right">565k</td>
68   * </tr>
69   * <tr>
70   * <td align="right">200k</td>
71   * <td align="right">2200k</td>
72   * <td align="right">1065k</td>
73   * </tr>
74   * <tr>
75   * <td align="right">300k</td>
76   * <td align="right">3100k</td>
77   * <td align="right">1565k</td>
78   * </tr>
79   * <tr>
80   * <td align="right">400k</td>
81   * <td align="right">4000k</td>
82   * <td align="right">2065k</td>
83   * </tr>
84   * <tr>
85   * <td align="right">500k</td>
86   * <td align="right">4900k</td>
87   * <td align="right">2565k</td>
88   * </tr>
89   * <tr>
90   * <td align="right">600k</td>
91   * <td align="right">5800k</td>
92   * <td align="right">3065k</td>
93   * </tr>
94   * <tr>
95   * <td align="right">700k</td>
96   * <td align="right">6700k</td>
97   * <td align="right">3565k</td>
98   * </tr>
99   * <tr>
100  * <td align="right">800k</td>
101  * <td align="right">7600k</td>
102  * <td align="right">4065k</td>
103  * </tr>
104  * <tr>
105  * <td align="right">900k</td>
106  * <td align="right">8500k</td>
107  * <td align="right">4565k</td>
108  * </tr>
109  * </table>
110  *
111  * <p>
112  * For decompression {@code BZip2CompressorInputStream} allocates less memory if the
113  * bzipped input is smaller than one block.
114  * </p>
115  *
116  * <p>
117  * Instances of this class are not threadsafe.
118  * </p>
119  *
120  * <p>
121  * TODO: Update to BZip2 1.0.1
122  * </p>
123  * @NotThreadSafe
124  */
125 public class BZip2CompressorOutputStream extends CompressorOutputStream
126     implements BZip2Constants {
127 
128     /**
129      * The minimum supported blocksize {@code  == 1}.
130      */
131     public static final int MIN_BLOCKSIZE = 1;
132 
133     /**
134      * The maximum supported blocksize {@code  == 9}.
135      */
136     public static final int MAX_BLOCKSIZE = 9;
137 
138     private static final int GREATER_ICOST = 15;
139     private static final int LESSER_ICOST = 0;
140 
141     private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
142                                           final Data dat, final int alphaSize,
143                                           final int maxLen) {
144         /*
145          * Nodes and heap entries run from 1. Entry 0 for both the heap and
146          * nodes is a sentinel.
147          */
148         final int[] heap = dat.heap;
149         final int[] weight = dat.weight;
150         final int[] parent = dat.parent;
151 
152         for (int i = alphaSize; --i >= 0;) {
153             weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
154         }
155 
156         for (boolean tooLong = true; tooLong;) {
157             tooLong = false;
158 
159             int nNodes = alphaSize;
160             int nHeap = 0;
161             heap[0] = 0;
162             weight[0] = 0;
163             parent[0] = -2;
164 
165             for (int i = 1; i <= alphaSize; i++) {
166                 parent[i] = -1;
167                 nHeap++;
168                 heap[nHeap] = i;
169 
170                 int zz = nHeap;
171                 final int tmp = heap[zz];
172                 while (weight[tmp] < weight[heap[zz >> 1]]) {
173                     heap[zz] = heap[zz >> 1];
174                     zz >>= 1;
175                 }
176                 heap[zz] = tmp;
177             }
178 
179             while (nHeap > 1) {
180                 final int n1 = heap[1];
181                 heap[1] = heap[nHeap];
182                 nHeap--;
183 
184                 int yy = 0;
185                 int zz = 1;
186                 int tmp = heap[1];
187 
188                 while (true) {
189                     yy = zz << 1;
190 
191                     if (yy > nHeap) {
192                         break;
193                     }
194 
195                     if ((yy < nHeap)
196                         && (weight[heap[yy + 1]] < weight[heap[yy]])) {
197                         yy++;
198                     }
199 
200                     if (weight[tmp] < weight[heap[yy]]) {
201                         break;
202                     }
203 
204                     heap[zz] = heap[yy];
205                     zz = yy;
206                 }
207 
208                 heap[zz] = tmp;
209 
210                 final int n2 = heap[1];
211                 heap[1] = heap[nHeap];
212                 nHeap--;
213 
214                 yy = 0;
215                 zz = 1;
216                 tmp = heap[1];
217 
218                 while (true) {
219                     yy = zz << 1;
220 
221                     if (yy > nHeap) {
222                         break;
223                     }
224 
225                     if ((yy < nHeap)
226                         && (weight[heap[yy + 1]] < weight[heap[yy]])) {
227                         yy++;
228                     }
229 
230                     if (weight[tmp] < weight[heap[yy]]) {
231                         break;
232                     }
233 
234                     heap[zz] = heap[yy];
235                     zz = yy;
236                 }
237 
238                 heap[zz] = tmp;
239                 nNodes++;
240                 parent[n1] = parent[n2] = nNodes;
241 
242                 final int weight_n1 = weight[n1];
243                 final int weight_n2 = weight[n2];
244                 weight[nNodes] = ((weight_n1 & 0xffffff00)
245                                   + (weight_n2 & 0xffffff00))
246                     | (1 + (((weight_n1 & 0x000000ff)
247                              > (weight_n2 & 0x000000ff))
248                             ? (weight_n1 & 0x000000ff)
249                             : (weight_n2 & 0x000000ff)));
250 
251                 parent[nNodes] = -1;
252                 nHeap++;
253                 heap[nHeap] = nNodes;
254 
255                 tmp = 0;
256                 zz = nHeap;
257                 tmp = heap[zz];
258                 final int weight_tmp = weight[tmp];
259                 while (weight_tmp < weight[heap[zz >> 1]]) {
260                     heap[zz] = heap[zz >> 1];
261                     zz >>= 1;
262                 }
263                 heap[zz] = tmp;
264 
265             }
266 
267             for (int i = 1; i <= alphaSize; i++) {
268                 int j = 0;
269                 int k = i;
270 
271                 for (int parent_k; (parent_k = parent[k]) >= 0;) {
272                     k = parent_k;
273                     j++;
274                 }
275 
276                 len[i - 1] = (byte) j;
277                 if (j > maxLen) {
278                     tooLong = true;
279                 }
280             }
281 
282             if (tooLong) {
283                 for (int i = 1; i < alphaSize; i++) {
284                     int j = weight[i] >> 8;
285                     j = 1 + (j >> 1);
286                     weight[i] = j << 8;
287                 }
288             }
289         }
290     }
291 
292     /**
293      * Index of the last char in the block, so the block size == last + 1.
294      */
295     private int last;
296 
297     /**
298      * Always: in the range 0 .. 9. The current block size is 100000 * this
299      * number.
300      */
301     private final int blockSize100k;
302 
303     private int bsBuff;
304     private int bsLive;
305     private final CRC crc = new CRC();
306 
307     private int nInUse;
308 
309     private int nMTF;
310 
311     private int currentChar = -1;
312     private int runLength = 0;
313 
314     private int blockCRC;
315     private int combinedCRC;
316     private final int allowableBlockSize;
317 
318     /**
319      * All memory intensive stuff.
320      */
321     private Data data;
322     private BlockSort blockSorter;
323 
324     private OutputStream out;
325     private volatile boolean closed;
326 
327     /**
328      * Chooses a blocksize based on the given length of the data to compress.
329      *
330      * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
331      *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
332      *         {@code inputLength} this method returns {@code MAX_BLOCKSIZE}
333      *         always.
334      *
335      * @param inputLength
336      *            The length of the data which will be compressed by
337      *            {@code BZip2CompressorOutputStream}.
338      */
339     public static int chooseBlockSize(final long inputLength) {
340         return (inputLength > 0) ? (int) Math
341             .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
342     }
343 
344     /**
345      * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
346      *
347      * @param out 
348      *            the destination stream.
349      *
350      * @throws IOException
351      *             if an I/O error occurs in the specified stream.
352      * @throws NullPointerException
353      *             if <code>out == null</code>.
354      */
355     public BZip2CompressorOutputStream(final OutputStream out)
356         throws IOException {
357         this(out, MAX_BLOCKSIZE);
358     }
359 
360     /**
361      * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
362      *
363      * @param out
364      *            the destination stream.
365      * @param blockSize
366      *            the blockSize as 100k units.
367      *
368      * @throws IOException
369      *             if an I/O error occurs in the specified stream.
370      * @throws IllegalArgumentException
371      *             if <code>(blockSize &lt; 1) || (blockSize &gt; 9)</code>.
372      * @throws NullPointerException
373      *             if <code>out == null</code>.
374      *
375      * @see #MIN_BLOCKSIZE
376      * @see #MAX_BLOCKSIZE
377      */
378     public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
379         if (blockSize < 1) {
380             throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
381         }
382         if (blockSize > 9) {
383             throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
384         }
385 
386         this.blockSize100k = blockSize;
387         this.out = out;
388 
389         /* 20 is just a paranoia constant */
390         this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
391         init();
392     }
393 
394     @Override
395     public void write(final int b) throws IOException {
396         if (!closed) {
397             write0(b);
398         } else {
399             throw new IOException("closed");
400         }
401     }
402 
403     /**
404      * Writes the current byte to the buffer, run-length encoding it
405      * if it has been repeated at least four times (the first step
406      * RLEs sequences of four identical bytes).
407      *
408      * <p>Flushes the current block before writing data if it is
409      * full.</p>
410      *
411      * <p>"write to the buffer" means adding to data.buffer starting
412      * two steps "after" this.last - initially starting at index 1
413      * (not 0) - and updating this.last to point to the last index
414      * written minus 1.</p>
415      */
416     private void writeRun() throws IOException {
417         final int lastShadow = this.last;
418 
419         if (lastShadow < this.allowableBlockSize) {
420             final int currentCharShadow = this.currentChar;
421             final Data dataShadow = this.data;
422             dataShadow.inUse[currentCharShadow] = true;
423             final byte ch = (byte) currentCharShadow;
424 
425             int runLengthShadow = this.runLength;
426             this.crc.updateCRC(currentCharShadow, runLengthShadow);
427 
428             switch (runLengthShadow) {
429             case 1:
430                 dataShadow.block[lastShadow + 2] = ch;
431                 this.last = lastShadow + 1;
432                 break;
433 
434             case 2:
435                 dataShadow.block[lastShadow + 2] = ch;
436                 dataShadow.block[lastShadow + 3] = ch;
437                 this.last = lastShadow + 2;
438                 break;
439 
440             case 3: {
441                 final byte[] block = dataShadow.block;
442                 block[lastShadow + 2] = ch;
443                 block[lastShadow + 3] = ch;
444                 block[lastShadow + 4] = ch;
445                 this.last = lastShadow + 3;
446             }
447                 break;
448 
449             default: {
450                 runLengthShadow -= 4;
451                 dataShadow.inUse[runLengthShadow] = true;
452                 final byte[] block = dataShadow.block;
453                 block[lastShadow + 2] = ch;
454                 block[lastShadow + 3] = ch;
455                 block[lastShadow + 4] = ch;
456                 block[lastShadow + 5] = ch;
457                 block[lastShadow + 6] = (byte) runLengthShadow;
458                 this.last = lastShadow + 5;
459             }
460                 break;
461 
462             }
463         } else {
464             endBlock();
465             initBlock();
466             writeRun();
467         }
468     }
469 
470     /**
471      * Overriden to warn about an unclosed stream.
472      */
473     @Override
474     protected void finalize() throws Throwable {
475         if (!closed) {
476             System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it");
477         }
478         super.finalize();
479     }
480 
481 
482     public void finish() throws IOException {
483         if (!closed) {
484             closed = true;
485             try {
486                 if (this.runLength > 0) {
487                     writeRun();
488                 }
489                 this.currentChar = -1;
490                 endBlock();
491                 endCompression();
492             } finally {
493                 this.out = null;
494                 this.blockSorter = null;
495                 this.data = null;
496             }
497         }
498     }
499 
500     @Override
501     public void close() throws IOException {
502         if (!closed) {
503             final OutputStream outShadow = this.out;
504             finish();
505             outShadow.close();
506         }
507     }
508 
509     @Override
510     public void flush() throws IOException {
511         final OutputStream outShadow = this.out;
512         if (outShadow != null) {
513             outShadow.flush();
514         }
515     }
516 
517     /**
518      * Writes magic bytes like BZ on the first position of the stream
519      * and bytes indiciating the file-format, which is 
520      * huffmanised, followed by a digit indicating blockSize100k.
521      * @throws IOException if the magic bytes could not been written
522      */
523     private void init() throws IOException {
524         bsPutUByte('B');
525         bsPutUByte('Z');
526 
527         this.data = new Data(this.blockSize100k);
528         this.blockSorter = new BlockSort(this.data);
529 
530         // huffmanised magic bytes
531         bsPutUByte('h');
532         bsPutUByte('0' + this.blockSize100k);
533 
534         this.combinedCRC = 0;
535         initBlock();
536     }
537 
538     private void initBlock() {
539         // blockNo++;
540         this.crc.initialiseCRC();
541         this.last = -1;
542         // ch = 0;
543 
544         final boolean[] inUse = this.data.inUse;
545         for (int i = 256; --i >= 0;) {
546             inUse[i] = false;
547         }
548 
549     }
550 
551     private void endBlock() throws IOException {
552         this.blockCRC = this.crc.getFinalCRC();
553         this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
554         this.combinedCRC ^= this.blockCRC;
555 
556         // empty block at end of file
557         if (this.last == -1) {
558             return;
559         }
560 
561         /* sort the block and establish posn of original string */
562         blockSort();
563 
564         /*
565          * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
566          * :-). A 32 bit value does not really give a strong enough guarantee
567          * that the value will not appear by chance in the compressed
568          * datastream. Worst-case probability of this event, for a 900k block,
569          * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
570          * bits. For a compressed file of size 100Gb -- about 100000 blocks --
571          * only a 48-bit marker will do. NB: normal compression/ decompression
572          * donot rely on these statistical properties. They are only important
573          * when trying to recover blocks from damaged files.
574          */
575         bsPutUByte(0x31);
576         bsPutUByte(0x41);
577         bsPutUByte(0x59);
578         bsPutUByte(0x26);
579         bsPutUByte(0x53);
580         bsPutUByte(0x59);
581 
582         /* Now the block's CRC, so it is in a known place. */
583         bsPutInt(this.blockCRC);
584 
585         /* Now a single bit indicating no randomisation. */
586         bsW(1, 0);
587 
588         /* Finally, block's contents proper. */
589         moveToFrontCodeAndSend();
590     }
591 
592     private void endCompression() throws IOException {
593         /*
594          * Now another magic 48-bit number, 0x177245385090, to indicate the end
595          * of the last block. (sqrt(pi), if you want to know. I did want to use
596          * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
597          * to feel statistically comfortable. Call me paranoid.)
598          */
599         bsPutUByte(0x17);
600         bsPutUByte(0x72);
601         bsPutUByte(0x45);
602         bsPutUByte(0x38);
603         bsPutUByte(0x50);
604         bsPutUByte(0x90);
605 
606         bsPutInt(this.combinedCRC);
607         bsFinishedWithStream();
608     }
609 
610     /**
611      * Returns the blocksize parameter specified at construction time.
612      * @return the blocksize parameter specified at construction time
613      */
614     public final int getBlockSize() {
615         return this.blockSize100k;
616     }
617 
618     @Override
619     public void write(final byte[] buf, int offs, final int len)
620         throws IOException {
621         if (offs < 0) {
622             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
623         }
624         if (len < 0) {
625             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
626         }
627         if (offs + len > buf.length) {
628             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
629                                                 + len + ") > buf.length("
630                                                 + buf.length + ").");
631         }
632         if (closed) {
633             throw new IOException("stream closed");
634         }
635 
636         for (final int hi = offs + len; offs < hi;) {
637             write0(buf[offs++]);
638         }
639     }
640 
641     /**
642      * Keeps track of the last bytes written and implicitly performs
643      * run-length encoding as the first step of the bzip2 algorithm.
644      */
645     private void write0(int b) throws IOException {
646         if (this.currentChar != -1) {
647             b &= 0xff;
648             if (this.currentChar == b) {
649                 if (++this.runLength > 254) {
650                     writeRun();
651                     this.currentChar = -1;
652                     this.runLength = 0;
653                 }
654                 // else nothing to do
655             } else {
656                 writeRun();
657                 this.runLength = 1;
658                 this.currentChar = b;
659             }
660         } else {
661             this.currentChar = b & 0xff;
662             this.runLength++;
663         }
664     }
665 
666     private static void hbAssignCodes(final int[] code, final byte[] length,
667                                       final int minLen, final int maxLen,
668                                       final int alphaSize) {
669         int vec = 0;
670         for (int n = minLen; n <= maxLen; n++) {
671             for (int i = 0; i < alphaSize; i++) {
672                 if ((length[i] & 0xff) == n) {
673                     code[i] = vec;
674                     vec++;
675                 }
676             }
677             vec <<= 1;
678         }
679     }
680 
681     private void bsFinishedWithStream() throws IOException {
682         while (this.bsLive > 0) {
683             final int ch = this.bsBuff >> 24;
684             this.out.write(ch); // write 8-bit
685             this.bsBuff <<= 8;
686             this.bsLive -= 8;
687         }
688     }
689 
690     private void bsW(final int n, final int v) throws IOException {
691         final OutputStream outShadow = this.out;
692         int bsLiveShadow = this.bsLive;
693         int bsBuffShadow = this.bsBuff;
694 
695         while (bsLiveShadow >= 8) {
696             outShadow.write(bsBuffShadow >> 24); // write 8-bit
697             bsBuffShadow <<= 8;
698             bsLiveShadow -= 8;
699         }
700 
701         this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
702         this.bsLive = bsLiveShadow + n;
703     }
704 
705     private void bsPutUByte(final int c) throws IOException {
706         bsW(8, c);
707     }
708 
709     private void bsPutInt(final int u) throws IOException {
710         bsW(8, (u >> 24) & 0xff);
711         bsW(8, (u >> 16) & 0xff);
712         bsW(8, (u >> 8) & 0xff);
713         bsW(8, u & 0xff);
714     }
715 
716     private void sendMTFValues() throws IOException {
717         final byte[][] len = this.data.sendMTFValues_len;
718         final int alphaSize = this.nInUse + 2;
719 
720         for (int t = N_GROUPS; --t >= 0;) {
721             final byte[] len_t = len[t];
722             for (int v = alphaSize; --v >= 0;) {
723                 len_t[v] = GREATER_ICOST;
724             }
725         }
726 
727         /* Decide how many coding tables to use */
728         // assert (this.nMTF > 0) : this.nMTF;
729         final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
730             : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
731 
732         /* Generate an initial set of coding tables */
733         sendMTFValues0(nGroups, alphaSize);
734 
735         /*
736          * Iterate up to N_ITERS times to improve the tables.
737          */
738         final int nSelectors = sendMTFValues1(nGroups, alphaSize);
739 
740         /* Compute MTF values for the selectors. */
741         sendMTFValues2(nGroups, nSelectors);
742 
743         /* Assign actual codes for the tables. */
744         sendMTFValues3(nGroups, alphaSize);
745 
746         /* Transmit the mapping table. */
747         sendMTFValues4();
748 
749         /* Now the selectors. */
750         sendMTFValues5(nGroups, nSelectors);
751 
752         /* Now the coding tables. */
753         sendMTFValues6(nGroups, alphaSize);
754 
755         /* And finally, the block data proper */
756         sendMTFValues7();
757     }
758 
759     private void sendMTFValues0(final int nGroups, final int alphaSize) {
760         final byte[][] len = this.data.sendMTFValues_len;
761         final int[] mtfFreq = this.data.mtfFreq;
762 
763         int remF = this.nMTF;
764         int gs = 0;
765 
766         for (int nPart = nGroups; nPart > 0; nPart--) {
767             final int tFreq = remF / nPart;
768             int ge = gs - 1;
769             int aFreq = 0;
770 
771             for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
772                 aFreq += mtfFreq[++ge];
773             }
774 
775             if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
776                 && (((nGroups - nPart) & 1) != 0)) {
777                 aFreq -= mtfFreq[ge--];
778             }
779 
780             final byte[] len_np = len[nPart - 1];
781             for (int v = alphaSize; --v >= 0;) {
782                 if ((v >= gs) && (v <= ge)) {
783                     len_np[v] = LESSER_ICOST;
784                 } else {
785                     len_np[v] = GREATER_ICOST;
786                 }
787             }
788 
789             gs = ge + 1;
790             remF -= aFreq;
791         }
792     }
793 
794     private int sendMTFValues1(final int nGroups, final int alphaSize) {
795         final Data dataShadow = this.data;
796         final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
797         final int[] fave = dataShadow.sendMTFValues_fave;
798         final short[] cost = dataShadow.sendMTFValues_cost;
799         final char[] sfmap = dataShadow.sfmap;
800         final byte[] selector = dataShadow.selector;
801         final byte[][] len = dataShadow.sendMTFValues_len;
802         final byte[] len_0 = len[0];
803         final byte[] len_1 = len[1];
804         final byte[] len_2 = len[2];
805         final byte[] len_3 = len[3];
806         final byte[] len_4 = len[4];
807         final byte[] len_5 = len[5];
808         final int nMTFShadow = this.nMTF;
809 
810         int nSelectors = 0;
811 
812         for (int iter = 0; iter < N_ITERS; iter++) {
813             for (int t = nGroups; --t >= 0;) {
814                 fave[t] = 0;
815                 final int[] rfreqt = rfreq[t];
816                 for (int i = alphaSize; --i >= 0;) {
817                     rfreqt[i] = 0;
818                 }
819             }
820 
821             nSelectors = 0;
822 
823             for (int gs = 0; gs < this.nMTF;) {
824                 /* Set group start & end marks. */
825 
826                 /*
827                  * Calculate the cost of this group as coded by each of the
828                  * coding tables.
829                  */
830 
831                 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
832 
833                 if (nGroups == N_GROUPS) {
834                     // unrolled version of the else-block
835 
836                     short cost0 = 0;
837                     short cost1 = 0;
838                     short cost2 = 0;
839                     short cost3 = 0;
840                     short cost4 = 0;
841                     short cost5 = 0;
842 
843                     for (int i = gs; i <= ge; i++) {
844                         final int icv = sfmap[i];
845                         cost0 += len_0[icv] & 0xff;
846                         cost1 += len_1[icv] & 0xff;
847                         cost2 += len_2[icv] & 0xff;
848                         cost3 += len_3[icv] & 0xff;
849                         cost4 += len_4[icv] & 0xff;
850                         cost5 += len_5[icv] & 0xff;
851                     }
852 
853                     cost[0] = cost0;
854                     cost[1] = cost1;
855                     cost[2] = cost2;
856                     cost[3] = cost3;
857                     cost[4] = cost4;
858                     cost[5] = cost5;
859 
860                 } else {
861                     for (int t = nGroups; --t >= 0;) {
862                         cost[t] = 0;
863                     }
864 
865                     for (int i = gs; i <= ge; i++) {
866                         final int icv = sfmap[i];
867                         for (int t = nGroups; --t >= 0;) {
868                             cost[t] += len[t][icv] & 0xff;
869                         }
870                     }
871                 }
872 
873                 /*
874                  * Find the coding table which is best for this group, and
875                  * record its identity in the selector table.
876                  */
877                 int bt = -1;
878                 for (int t = nGroups, bc = 999999999; --t >= 0;) {
879                     final int cost_t = cost[t];
880                     if (cost_t < bc) {
881                         bc = cost_t;
882                         bt = t;
883                     }
884                 }
885 
886                 fave[bt]++;
887                 selector[nSelectors] = (byte) bt;
888                 nSelectors++;
889 
890                 /*
891                  * Increment the symbol frequencies for the selected table.
892                  */
893                 final int[] rfreq_bt = rfreq[bt];
894                 for (int i = gs; i <= ge; i++) {
895                     rfreq_bt[sfmap[i]]++;
896                 }
897 
898                 gs = ge + 1;
899             }
900 
901             /*
902              * Recompute the tables based on the accumulated frequencies.
903              */
904             for (int t = 0; t < nGroups; t++) {
905                 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
906             }
907         }
908 
909         return nSelectors;
910     }
911 
912     private void sendMTFValues2(final int nGroups, final int nSelectors) {
913         // assert (nGroups < 8) : nGroups;
914 
915         final Data dataShadow = this.data;
916         final byte[] pos = dataShadow.sendMTFValues2_pos;
917 
918         for (int i = nGroups; --i >= 0;) {
919             pos[i] = (byte) i;
920         }
921 
922         for (int i = 0; i < nSelectors; i++) {
923             final byte ll_i = dataShadow.selector[i];
924             byte tmp = pos[0];
925             int j = 0;
926 
927             while (ll_i != tmp) {
928                 j++;
929                 final byte tmp2 = tmp;
930                 tmp = pos[j];
931                 pos[j] = tmp2;
932             }
933 
934             pos[0] = tmp;
935             dataShadow.selectorMtf[i] = (byte) j;
936         }
937     }
938 
939     private void sendMTFValues3(final int nGroups, final int alphaSize) {
940         final int[][] code = this.data.sendMTFValues_code;
941         final byte[][] len = this.data.sendMTFValues_len;
942 
943         for (int t = 0; t < nGroups; t++) {
944             int minLen = 32;
945             int maxLen = 0;
946             final byte[] len_t = len[t];
947             for (int i = alphaSize; --i >= 0;) {
948                 final int l = len_t[i] & 0xff;
949                 if (l > maxLen) {
950                     maxLen = l;
951                 }
952                 if (l < minLen) {
953                     minLen = l;
954                 }
955             }
956 
957             // assert (maxLen <= 20) : maxLen;
958             // assert (minLen >= 1) : minLen;
959 
960             hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
961         }
962     }
963 
964     private void sendMTFValues4() throws IOException {
965         final boolean[] inUse = this.data.inUse;
966         final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
967 
968         for (int i = 16; --i >= 0;) {
969             inUse16[i] = false;
970             final int i16 = i * 16;
971             for (int j = 16; --j >= 0;) {
972                 if (inUse[i16 + j]) {
973                     inUse16[i] = true;
974                 }
975             }
976         }
977 
978         for (int i = 0; i < 16; i++) {
979             bsW(1, inUse16[i] ? 1 : 0);
980         }
981 
982         final OutputStream outShadow = this.out;
983         int bsLiveShadow = this.bsLive;
984         int bsBuffShadow = this.bsBuff;
985 
986         for (int i = 0; i < 16; i++) {
987             if (inUse16[i]) {
988                 final int i16 = i * 16;
989                 for (int j = 0; j < 16; j++) {
990                     // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
991                     while (bsLiveShadow >= 8) {
992                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
993                         bsBuffShadow <<= 8;
994                         bsLiveShadow -= 8;
995                     }
996                     if (inUse[i16 + j]) {
997                         bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
998                     }
999                     bsLiveShadow++;
1000                 }
1001             }
1002         }
1003 
1004         this.bsBuff = bsBuffShadow;
1005         this.bsLive = bsLiveShadow;
1006     }
1007 
1008     private void sendMTFValues5(final int nGroups, final int nSelectors)
1009         throws IOException {
1010         bsW(3, nGroups);
1011         bsW(15, nSelectors);
1012 
1013         final OutputStream outShadow = this.out;
1014         final byte[] selectorMtf = this.data.selectorMtf;
1015 
1016         int bsLiveShadow = this.bsLive;
1017         int bsBuffShadow = this.bsBuff;
1018 
1019         for (int i = 0; i < nSelectors; i++) {
1020             for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1021                 // inlined: bsW(1, 1);
1022                 while (bsLiveShadow >= 8) {
1023                     outShadow.write(bsBuffShadow >> 24);
1024                     bsBuffShadow <<= 8;
1025                     bsLiveShadow -= 8;
1026                 }
1027                 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1028                 bsLiveShadow++;
1029             }
1030 
1031             // inlined: bsW(1, 0);
1032             while (bsLiveShadow >= 8) {
1033                 outShadow.write(bsBuffShadow >> 24);
1034                 bsBuffShadow <<= 8;
1035                 bsLiveShadow -= 8;
1036             }
1037             // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1038             bsLiveShadow++;
1039         }
1040 
1041         this.bsBuff = bsBuffShadow;
1042         this.bsLive = bsLiveShadow;
1043     }
1044 
1045     private void sendMTFValues6(final int nGroups, final int alphaSize)
1046         throws IOException {
1047         final byte[][] len = this.data.sendMTFValues_len;
1048         final OutputStream outShadow = this.out;
1049 
1050         int bsLiveShadow = this.bsLive;
1051         int bsBuffShadow = this.bsBuff;
1052 
1053         for (int t = 0; t < nGroups; t++) {
1054             final byte[] len_t = len[t];
1055             int curr = len_t[0] & 0xff;
1056 
1057             // inlined: bsW(5, curr);
1058             while (bsLiveShadow >= 8) {
1059                 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1060                 bsBuffShadow <<= 8;
1061                 bsLiveShadow -= 8;
1062             }
1063             bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1064             bsLiveShadow += 5;
1065 
1066             for (int i = 0; i < alphaSize; i++) {
1067                 final int lti = len_t[i] & 0xff;
1068                 while (curr < lti) {
1069                     // inlined: bsW(2, 2);
1070                     while (bsLiveShadow >= 8) {
1071                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1072                         bsBuffShadow <<= 8;
1073                         bsLiveShadow -= 8;
1074                     }
1075                     bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1076                     bsLiveShadow += 2;
1077 
1078                     curr++; /* 10 */
1079                 }
1080 
1081                 while (curr > lti) {
1082                     // inlined: bsW(2, 3);
1083                     while (bsLiveShadow >= 8) {
1084                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1085                         bsBuffShadow <<= 8;
1086                         bsLiveShadow -= 8;
1087                     }
1088                     bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1089                     bsLiveShadow += 2;
1090 
1091                     curr--; /* 11 */
1092                 }
1093 
1094                 // inlined: bsW(1, 0);
1095                 while (bsLiveShadow >= 8) {
1096                     outShadow.write(bsBuffShadow >> 24); // write 8-bit
1097                     bsBuffShadow <<= 8;
1098                     bsLiveShadow -= 8;
1099                 }
1100                 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1101                 bsLiveShadow++;
1102             }
1103         }
1104 
1105         this.bsBuff = bsBuffShadow;
1106         this.bsLive = bsLiveShadow;
1107     }
1108 
1109     private void sendMTFValues7() throws IOException {
1110         final Data dataShadow = this.data;
1111         final byte[][] len = dataShadow.sendMTFValues_len;
1112         final int[][] code = dataShadow.sendMTFValues_code;
1113         final OutputStream outShadow = this.out;
1114         final byte[] selector = dataShadow.selector;
1115         final char[] sfmap = dataShadow.sfmap;
1116         final int nMTFShadow = this.nMTF;
1117 
1118         int selCtr = 0;
1119 
1120         int bsLiveShadow = this.bsLive;
1121         int bsBuffShadow = this.bsBuff;
1122 
1123         for (int gs = 0; gs < nMTFShadow;) {
1124             final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1125             final int selector_selCtr = selector[selCtr] & 0xff;
1126             final int[] code_selCtr = code[selector_selCtr];
1127             final byte[] len_selCtr = len[selector_selCtr];
1128 
1129             while (gs <= ge) {
1130                 final int sfmap_i = sfmap[gs];
1131 
1132                 //
1133                 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1134                 // code_selCtr[sfmap_i]);
1135                 //
1136                 while (bsLiveShadow >= 8) {
1137                     outShadow.write(bsBuffShadow >> 24);
1138                     bsBuffShadow <<= 8;
1139                     bsLiveShadow -= 8;
1140                 }
1141                 final int n = len_selCtr[sfmap_i] & 0xFF;
1142                 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1143                 bsLiveShadow += n;
1144 
1145                 gs++;
1146             }
1147 
1148             gs = ge + 1;
1149             selCtr++;
1150         }
1151 
1152         this.bsBuff = bsBuffShadow;
1153         this.bsLive = bsLiveShadow;
1154     }
1155 
1156     private void moveToFrontCodeAndSend() throws IOException {
1157         bsW(24, this.data.origPtr);
1158         generateMTFValues();
1159         sendMTFValues();
1160     }
1161 
1162     private void blockSort() {
1163         blockSorter.blockSort(data, last);
1164     }
1165 
1166     /*
1167      * Performs Move-To-Front on the Burrows-Wheeler transformed
1168      * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1169      * run-length-encoded form.
1170      *
1171      * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1172      */
1173     private void generateMTFValues() {
1174         final int lastShadow = this.last;
1175         final Data dataShadow = this.data;
1176         final boolean[] inUse = dataShadow.inUse;
1177         final byte[] block = dataShadow.block;
1178         final int[] fmap = dataShadow.fmap;
1179         final char[] sfmap = dataShadow.sfmap;
1180         final int[] mtfFreq = dataShadow.mtfFreq;
1181         final byte[] unseqToSeq = dataShadow.unseqToSeq;
1182         final byte[] yy = dataShadow.generateMTFValues_yy;
1183 
1184         // make maps
1185         int nInUseShadow = 0;
1186         for (int i = 0; i < 256; i++) {
1187             if (inUse[i]) {
1188                 unseqToSeq[i] = (byte) nInUseShadow;
1189                 nInUseShadow++;
1190             }
1191         }
1192         this.nInUse = nInUseShadow;
1193 
1194         final int eob = nInUseShadow + 1;
1195 
1196         for (int i = eob; i >= 0; i--) {
1197             mtfFreq[i] = 0;
1198         }
1199 
1200         for (int i = nInUseShadow; --i >= 0;) {
1201             yy[i] = (byte) i;
1202         }
1203 
1204         int wr = 0;
1205         int zPend = 0;
1206 
1207         for (int i = 0; i <= lastShadow; i++) {
1208             final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1209             byte tmp = yy[0];
1210             int j = 0;
1211 
1212             while (ll_i != tmp) {
1213                 j++;
1214                 final byte tmp2 = tmp;
1215                 tmp = yy[j];
1216                 yy[j] = tmp2;
1217             }
1218             yy[0] = tmp;
1219 
1220             if (j == 0) {
1221                 zPend++;
1222             } else {
1223                 if (zPend > 0) {
1224                     zPend--;
1225                     while (true) {
1226                         if ((zPend & 1) == 0) {
1227                             sfmap[wr] = RUNA;
1228                             wr++;
1229                             mtfFreq[RUNA]++;
1230                         } else {
1231                             sfmap[wr] = RUNB;
1232                             wr++;
1233                             mtfFreq[RUNB]++;
1234                         }
1235 
1236                         if (zPend >= 2) {
1237                             zPend = (zPend - 2) >> 1;
1238                         } else {
1239                             break;
1240                         }
1241                     }
1242                     zPend = 0;
1243                 }
1244                 sfmap[wr] = (char) (j + 1);
1245                 wr++;
1246                 mtfFreq[j + 1]++;
1247             }
1248         }
1249 
1250         if (zPend > 0) {
1251             zPend--;
1252             while (true) {
1253                 if ((zPend & 1) == 0) {
1254                     sfmap[wr] = RUNA;
1255                     wr++;
1256                     mtfFreq[RUNA]++;
1257                 } else {
1258                     sfmap[wr] = RUNB;
1259                     wr++;
1260                     mtfFreq[RUNB]++;
1261                 }
1262 
1263                 if (zPend >= 2) {
1264                     zPend = (zPend - 2) >> 1;
1265                 } else {
1266                     break;
1267                 }
1268             }
1269         }
1270 
1271         sfmap[wr] = (char) eob;
1272         mtfFreq[eob]++;
1273         this.nMTF = wr + 1;
1274     }
1275 
1276     static final class Data {
1277 
1278         // with blockSize 900k
1279         /* maps unsigned byte => "does it occur in block" */
1280         final boolean[] inUse = new boolean[256]; // 256 byte
1281         final byte[] unseqToSeq = new byte[256]; // 256 byte
1282         final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1283         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1284         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1285 
1286         final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1287         final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1288         // byte
1289         final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1290         // byte
1291         final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1292         final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1293         final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1294         // byte
1295         final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1296         final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1297 
1298         final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1299         final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1300         final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1301 
1302         // ------------
1303         // 333408 byte
1304 
1305         /* holds the RLEd block of original data starting at index 1.
1306          * After sorting the last byte added to the buffer is at index
1307          * 0. */
1308         final byte[] block; // 900021 byte
1309         /* maps index in Burrows-Wheeler transformed block => index of
1310          * byte in original block */
1311         final int[] fmap; // 3600000 byte
1312         final char[] sfmap; // 3600000 byte
1313         // ------------
1314         // 8433529 byte
1315         // ============
1316 
1317         /**
1318          * Index of original line in Burrows-Wheeler table.
1319          *
1320          * <p>This is the index in fmap that points to the last byte
1321          * of the original data.</p>
1322          */
1323         int origPtr;
1324 
1325         Data(final int blockSize100k) {
1326             final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1327             this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1328             this.fmap = new int[n];
1329             this.sfmap = new char[2 * n];
1330         }
1331 
1332     }
1333 
1334 }