View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  
24  import org.apache.commons.compress.compressors.CompressorOutputStream;
25  
26  /**
27   * An output stream that compresses into the BZip2 format into another stream.
28   *
29   * <p>
30   * The compression requires large amounts of memory. Thus you should call the
31   * {@link #close() close()} method as soon as possible, to force
32   * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory.
33   * </p>
34   *
35   * <p> You can shrink the amount of allocated memory and maybe raise
36   * the compression speed by choosing a lower blocksize, which in turn
37   * may cause a lower compression ratio. You can avoid unnecessary
38   * memory allocation by avoiding using a blocksize which is bigger
39   * than the size of the input.  </p>
40   *
41   * <p> You can compute the memory usage for compressing by the
42   * following formula: </p>
43   *
44   * <pre>
45   * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
46   * </pre>
47   *
48   * <p> To get the memory required for decompression by {@link
49   * BZip2CompressorInputStream} use </p>
50   *
51   * <pre>
52   * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
53   * </pre>
54   *
55   * <table width="100%" border="1" summary="Memory usage by blocksize">
56   * <tr>
57   * <th colspan="3">Memory usage by blocksize</th>
58   * </tr>
59   * <tr>
60   * <th align="right">Blocksize</th> <th align="right">Compression<br>
61   * memory usage</th> <th align="right">Decompression<br>
62   * memory usage</th>
63   * </tr>
64   * <tr>
65   * <td align="right">100k</td>
66   * <td align="right">1300k</td>
67   * <td align="right">565k</td>
68   * </tr>
69   * <tr>
70   * <td align="right">200k</td>
71   * <td align="right">2200k</td>
72   * <td align="right">1065k</td>
73   * </tr>
74   * <tr>
75   * <td align="right">300k</td>
76   * <td align="right">3100k</td>
77   * <td align="right">1565k</td>
78   * </tr>
79   * <tr>
80   * <td align="right">400k</td>
81   * <td align="right">4000k</td>
82   * <td align="right">2065k</td>
83   * </tr>
84   * <tr>
85   * <td align="right">500k</td>
86   * <td align="right">4900k</td>
87   * <td align="right">2565k</td>
88   * </tr>
89   * <tr>
90   * <td align="right">600k</td>
91   * <td align="right">5800k</td>
92   * <td align="right">3065k</td>
93   * </tr>
94   * <tr>
95   * <td align="right">700k</td>
96   * <td align="right">6700k</td>
97   * <td align="right">3565k</td>
98   * </tr>
99   * <tr>
100  * <td align="right">800k</td>
101  * <td align="right">7600k</td>
102  * <td align="right">4065k</td>
103  * </tr>
104  * <tr>
105  * <td align="right">900k</td>
106  * <td align="right">8500k</td>
107  * <td align="right">4565k</td>
108  * </tr>
109  * </table>
110  *
111  * <p>
112  * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the
113  * bzipped input is smaller than one block.
114  * </p>
115  *
116  * <p>
117  * Instances of this class are not threadsafe.
118  * </p>
119  *
120  * <p>
121  * TODO: Update to BZip2 1.0.1
122  * </p>
123  * @NotThreadSafe
124  */
125 public class BZip2CompressorOutputStream extends CompressorOutputStream
126     implements BZip2Constants {
127 
128     /**
129      * The minimum supported blocksize <tt> == 1</tt>.
130      */
131     public static final int MIN_BLOCKSIZE = 1;
132 
133     /**
134      * The maximum supported blocksize <tt> == 9</tt>.
135      */
136     public static final int MAX_BLOCKSIZE = 9;
137 
138     private static final int GREATER_ICOST = 15;
139     private static final int LESSER_ICOST = 0;
140 
141     private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
142                                           final Data dat, final int alphaSize,
143                                           final int maxLen) {
144         /*
145          * Nodes and heap entries run from 1. Entry 0 for both the heap and
146          * nodes is a sentinel.
147          */
148         final int[] heap = dat.heap;
149         final int[] weight = dat.weight;
150         final int[] parent = dat.parent;
151 
152         for (int i = alphaSize; --i >= 0;) {
153             weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
154         }
155 
156         for (boolean tooLong = true; tooLong;) {
157             tooLong = false;
158 
159             int nNodes = alphaSize;
160             int nHeap = 0;
161             heap[0] = 0;
162             weight[0] = 0;
163             parent[0] = -2;
164 
165             for (int i = 1; i <= alphaSize; i++) {
166                 parent[i] = -1;
167                 nHeap++;
168                 heap[nHeap] = i;
169 
170                 int zz = nHeap;
171                 int tmp = heap[zz];
172                 while (weight[tmp] < weight[heap[zz >> 1]]) {
173                     heap[zz] = heap[zz >> 1];
174                     zz >>= 1;
175                 }
176                 heap[zz] = tmp;
177             }
178 
179             while (nHeap > 1) {
180                 int n1 = heap[1];
181                 heap[1] = heap[nHeap];
182                 nHeap--;
183 
184                 int yy = 0;
185                 int zz = 1;
186                 int tmp = heap[1];
187 
188                 while (true) {
189                     yy = zz << 1;
190 
191                     if (yy > nHeap) {
192                         break;
193                     }
194 
195                     if ((yy < nHeap)
196                         && (weight[heap[yy + 1]] < weight[heap[yy]])) {
197                         yy++;
198                     }
199 
200                     if (weight[tmp] < weight[heap[yy]]) {
201                         break;
202                     }
203 
204                     heap[zz] = heap[yy];
205                     zz = yy;
206                 }
207 
208                 heap[zz] = tmp;
209 
210                 int n2 = heap[1];
211                 heap[1] = heap[nHeap];
212                 nHeap--;
213 
214                 yy = 0;
215                 zz = 1;
216                 tmp = heap[1];
217 
218                 while (true) {
219                     yy = zz << 1;
220 
221                     if (yy > nHeap) {
222                         break;
223                     }
224 
225                     if ((yy < nHeap)
226                         && (weight[heap[yy + 1]] < weight[heap[yy]])) {
227                         yy++;
228                     }
229 
230                     if (weight[tmp] < weight[heap[yy]]) {
231                         break;
232                     }
233 
234                     heap[zz] = heap[yy];
235                     zz = yy;
236                 }
237 
238                 heap[zz] = tmp;
239                 nNodes++;
240                 parent[n1] = parent[n2] = nNodes;
241 
242                 final int weight_n1 = weight[n1];
243                 final int weight_n2 = weight[n2];
244                 weight[nNodes] = ((weight_n1 & 0xffffff00)
245                                   + (weight_n2 & 0xffffff00))
246                     | (1 + (((weight_n1 & 0x000000ff)
247                              > (weight_n2 & 0x000000ff))
248                             ? (weight_n1 & 0x000000ff)
249                             : (weight_n2 & 0x000000ff)));
250 
251                 parent[nNodes] = -1;
252                 nHeap++;
253                 heap[nHeap] = nNodes;
254 
255                 tmp = 0;
256                 zz = nHeap;
257                 tmp = heap[zz];
258                 final int weight_tmp = weight[tmp];
259                 while (weight_tmp < weight[heap[zz >> 1]]) {
260                     heap[zz] = heap[zz >> 1];
261                     zz >>= 1;
262                 }
263                 heap[zz] = tmp;
264 
265             }
266 
267             for (int i = 1; i <= alphaSize; i++) {
268                 int j = 0;
269                 int k = i;
270 
271                 for (int parent_k; (parent_k = parent[k]) >= 0;) {
272                     k = parent_k;
273                     j++;
274                 }
275 
276                 len[i - 1] = (byte) j;
277                 if (j > maxLen) {
278                     tooLong = true;
279                 }
280             }
281 
282             if (tooLong) {
283                 for (int i = 1; i < alphaSize; i++) {
284                     int j = weight[i] >> 8;
285                     j = 1 + (j >> 1);
286                     weight[i] = j << 8;
287                 }
288             }
289         }
290     }
291 
292     /**
293      * Index of the last char in the block, so the block size == last + 1.
294      */
295     private int last;
296 
297     /**
298      * Always: in the range 0 .. 9. The current block size is 100000 * this
299      * number.
300      */
301     private final int blockSize100k;
302 
303     private int bsBuff;
304     private int bsLive;
305     private final CRC crc = new CRC();
306 
307     private int nInUse;
308 
309     private int nMTF;
310 
311     private int currentChar = -1;
312     private int runLength = 0;
313 
314     private int blockCRC;
315     private int combinedCRC;
316     private final int allowableBlockSize;
317 
318     /**
319      * All memory intensive stuff.
320      */
321     private Data data;
322     private BlockSort blockSorter;
323 
324     private OutputStream out;
325 
326     /**
327      * Chooses a blocksize based on the given length of the data to compress.
328      *
329      * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
330      *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
331      *         <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
332      *         always.
333      *
334      * @param inputLength
335      *            The length of the data which will be compressed by
336      *            <tt>BZip2CompressorOutputStream</tt>.
337      */
338     public static int chooseBlockSize(long inputLength) {
339         return (inputLength > 0) ? (int) Math
340             .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
341     }
342 
343     /**
344      * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k.
345      *
346      * @param out 
347      *            the destination stream.
348      *
349      * @throws IOException
350      *             if an I/O error occurs in the specified stream.
351      * @throws NullPointerException
352      *             if <code>out == null</code>.
353      */
354     public BZip2CompressorOutputStream(final OutputStream out)
355         throws IOException {
356         this(out, MAX_BLOCKSIZE);
357     }
358 
359     /**
360      * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize.
361      *
362      * @param out
363      *            the destination stream.
364      * @param blockSize
365      *            the blockSize as 100k units.
366      *
367      * @throws IOException
368      *             if an I/O error occurs in the specified stream.
369      * @throws IllegalArgumentException
370      *             if <code>(blockSize &lt; 1) || (blockSize &gt; 9)</code>.
371      * @throws NullPointerException
372      *             if <code>out == null</code>.
373      *
374      * @see #MIN_BLOCKSIZE
375      * @see #MAX_BLOCKSIZE
376      */
377     public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
378         if (blockSize < 1) {
379             throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
380         }
381         if (blockSize > 9) {
382             throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
383         }
384 
385         this.blockSize100k = blockSize;
386         this.out = out;
387 
388         /* 20 is just a paranoia constant */
389         this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
390         init();
391     }
392 
393     @Override
394     public void write(final int b) throws IOException {
395         if (this.out != null) {
396             write0(b);
397         } else {
398             throw new IOException("closed");
399         }
400     }
401 
402     /**
403      * Writes the current byte to the buffer, run-length encoding it
404      * if it has been repeated at least four times (the first step
405      * RLEs sequences of four identical bytes).
406      *
407      * <p>Flushes the current block before writing data if it is
408      * full.</p>
409      *
410      * <p>"write to the buffer" means adding to data.buffer starting
411      * two steps "after" this.last - initially starting at index 1
412      * (not 0) - and updating this.last to point to the last index
413      * written minus 1.</p>
414      */
415     private void writeRun() throws IOException {
416         final int lastShadow = this.last;
417 
418         if (lastShadow < this.allowableBlockSize) {
419             final int currentCharShadow = this.currentChar;
420             final Data dataShadow = this.data;
421             dataShadow.inUse[currentCharShadow] = true;
422             final byte ch = (byte) currentCharShadow;
423 
424             int runLengthShadow = this.runLength;
425             this.crc.updateCRC(currentCharShadow, runLengthShadow);
426 
427             switch (runLengthShadow) {
428             case 1:
429                 dataShadow.block[lastShadow + 2] = ch;
430                 this.last = lastShadow + 1;
431                 break;
432 
433             case 2:
434                 dataShadow.block[lastShadow + 2] = ch;
435                 dataShadow.block[lastShadow + 3] = ch;
436                 this.last = lastShadow + 2;
437                 break;
438 
439             case 3: {
440                 final byte[] block = dataShadow.block;
441                 block[lastShadow + 2] = ch;
442                 block[lastShadow + 3] = ch;
443                 block[lastShadow + 4] = ch;
444                 this.last = lastShadow + 3;
445             }
446                 break;
447 
448             default: {
449                 runLengthShadow -= 4;
450                 dataShadow.inUse[runLengthShadow] = true;
451                 final byte[] block = dataShadow.block;
452                 block[lastShadow + 2] = ch;
453                 block[lastShadow + 3] = ch;
454                 block[lastShadow + 4] = ch;
455                 block[lastShadow + 5] = ch;
456                 block[lastShadow + 6] = (byte) runLengthShadow;
457                 this.last = lastShadow + 5;
458             }
459                 break;
460 
461             }
462         } else {
463             endBlock();
464             initBlock();
465             writeRun();
466         }
467     }
468 
469     /**
470      * Overriden to close the stream.
471      */
472     @Override
473     protected void finalize() throws Throwable {
474         finish();
475         super.finalize();
476     }
477 
478 
479     public void finish() throws IOException {
480         if (out != null) {
481             try {
482                 if (this.runLength > 0) {
483                     writeRun();
484                 }
485                 this.currentChar = -1;
486                 endBlock();
487                 endCompression();
488             } finally {
489                 this.out = null;
490                 this.data = null;
491                 this.blockSorter = null;
492             }
493         }
494     }
495 
496     @Override
497     public void close() throws IOException {
498         if (out != null) {
499             OutputStream outShadow = this.out;
500             finish();
501             outShadow.close();
502         }
503     }
504 
505     @Override
506     public void flush() throws IOException {
507         OutputStream outShadow = this.out;
508         if (outShadow != null) {
509             outShadow.flush();
510         }
511     }
512 
513     /**
514      * Writes magic bytes like BZ on the first position of the stream
515      * and bytes indiciating the file-format, which is 
516      * huffmanised, followed by a digit indicating blockSize100k.
517      * @throws IOException if the magic bytes could not been written
518      */
519     private void init() throws IOException {
520         bsPutUByte('B');
521         bsPutUByte('Z');
522 
523         this.data = new Data(this.blockSize100k);
524         this.blockSorter = new BlockSort(this.data);
525 
526         // huffmanised magic bytes
527         bsPutUByte('h');
528         bsPutUByte('0' + this.blockSize100k);
529 
530         this.combinedCRC = 0;
531         initBlock();
532     }
533 
534     private void initBlock() {
535         // blockNo++;
536         this.crc.initialiseCRC();
537         this.last = -1;
538         // ch = 0;
539 
540         boolean[] inUse = this.data.inUse;
541         for (int i = 256; --i >= 0;) {
542             inUse[i] = false;
543         }
544 
545     }
546 
547     private void endBlock() throws IOException {
548         this.blockCRC = this.crc.getFinalCRC();
549         this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
550         this.combinedCRC ^= this.blockCRC;
551 
552         // empty block at end of file
553         if (this.last == -1) {
554             return;
555         }
556 
557         /* sort the block and establish posn of original string */
558         blockSort();
559 
560         /*
561          * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
562          * :-). A 32 bit value does not really give a strong enough guarantee
563          * that the value will not appear by chance in the compressed
564          * datastream. Worst-case probability of this event, for a 900k block,
565          * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
566          * bits. For a compressed file of size 100Gb -- about 100000 blocks --
567          * only a 48-bit marker will do. NB: normal compression/ decompression
568          * donot rely on these statistical properties. They are only important
569          * when trying to recover blocks from damaged files.
570          */
571         bsPutUByte(0x31);
572         bsPutUByte(0x41);
573         bsPutUByte(0x59);
574         bsPutUByte(0x26);
575         bsPutUByte(0x53);
576         bsPutUByte(0x59);
577 
578         /* Now the block's CRC, so it is in a known place. */
579         bsPutInt(this.blockCRC);
580 
581         /* Now a single bit indicating no randomisation. */
582         bsW(1, 0);
583 
584         /* Finally, block's contents proper. */
585         moveToFrontCodeAndSend();
586     }
587 
588     private void endCompression() throws IOException {
589         /*
590          * Now another magic 48-bit number, 0x177245385090, to indicate the end
591          * of the last block. (sqrt(pi), if you want to know. I did want to use
592          * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
593          * to feel statistically comfortable. Call me paranoid.)
594          */
595         bsPutUByte(0x17);
596         bsPutUByte(0x72);
597         bsPutUByte(0x45);
598         bsPutUByte(0x38);
599         bsPutUByte(0x50);
600         bsPutUByte(0x90);
601 
602         bsPutInt(this.combinedCRC);
603         bsFinishedWithStream();
604     }
605 
606     /**
607      * Returns the blocksize parameter specified at construction time.
608      */
609     public final int getBlockSize() {
610         return this.blockSize100k;
611     }
612 
613     @Override
614     public void write(final byte[] buf, int offs, final int len)
615         throws IOException {
616         if (offs < 0) {
617             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
618         }
619         if (len < 0) {
620             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
621         }
622         if (offs + len > buf.length) {
623             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
624                                                 + len + ") > buf.length("
625                                                 + buf.length + ").");
626         }
627         if (this.out == null) {
628             throw new IOException("stream closed");
629         }
630 
631         for (int hi = offs + len; offs < hi;) {
632             write0(buf[offs++]);
633         }
634     }
635 
636     /**
637      * Keeps track of the last bytes written and implicitly performs
638      * run-length encoding as the first step of the bzip2 algorithm.
639      */
640     private void write0(int b) throws IOException {
641         if (this.currentChar != -1) {
642             b &= 0xff;
643             if (this.currentChar == b) {
644                 if (++this.runLength > 254) {
645                     writeRun();
646                     this.currentChar = -1;
647                     this.runLength = 0;
648                 }
649                 // else nothing to do
650             } else {
651                 writeRun();
652                 this.runLength = 1;
653                 this.currentChar = b;
654             }
655         } else {
656             this.currentChar = b & 0xff;
657             this.runLength++;
658         }
659     }
660 
661     private static void hbAssignCodes(final int[] code, final byte[] length,
662                                       final int minLen, final int maxLen,
663                                       final int alphaSize) {
664         int vec = 0;
665         for (int n = minLen; n <= maxLen; n++) {
666             for (int i = 0; i < alphaSize; i++) {
667                 if ((length[i] & 0xff) == n) {
668                     code[i] = vec;
669                     vec++;
670                 }
671             }
672             vec <<= 1;
673         }
674     }
675 
676     private void bsFinishedWithStream() throws IOException {
677         while (this.bsLive > 0) {
678             int ch = this.bsBuff >> 24;
679             this.out.write(ch); // write 8-bit
680             this.bsBuff <<= 8;
681             this.bsLive -= 8;
682         }
683     }
684 
685     private void bsW(final int n, final int v) throws IOException {
686         final OutputStream outShadow = this.out;
687         int bsLiveShadow = this.bsLive;
688         int bsBuffShadow = this.bsBuff;
689 
690         while (bsLiveShadow >= 8) {
691             outShadow.write(bsBuffShadow >> 24); // write 8-bit
692             bsBuffShadow <<= 8;
693             bsLiveShadow -= 8;
694         }
695 
696         this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
697         this.bsLive = bsLiveShadow + n;
698     }
699 
700     private void bsPutUByte(final int c) throws IOException {
701         bsW(8, c);
702     }
703 
704     private void bsPutInt(final int u) throws IOException {
705         bsW(8, (u >> 24) & 0xff);
706         bsW(8, (u >> 16) & 0xff);
707         bsW(8, (u >> 8) & 0xff);
708         bsW(8, u & 0xff);
709     }
710 
711     private void sendMTFValues() throws IOException {
712         final byte[][] len = this.data.sendMTFValues_len;
713         final int alphaSize = this.nInUse + 2;
714 
715         for (int t = N_GROUPS; --t >= 0;) {
716             byte[] len_t = len[t];
717             for (int v = alphaSize; --v >= 0;) {
718                 len_t[v] = GREATER_ICOST;
719             }
720         }
721 
722         /* Decide how many coding tables to use */
723         // assert (this.nMTF > 0) : this.nMTF;
724         final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
725             : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
726 
727         /* Generate an initial set of coding tables */
728         sendMTFValues0(nGroups, alphaSize);
729 
730         /*
731          * Iterate up to N_ITERS times to improve the tables.
732          */
733         final int nSelectors = sendMTFValues1(nGroups, alphaSize);
734 
735         /* Compute MTF values for the selectors. */
736         sendMTFValues2(nGroups, nSelectors);
737 
738         /* Assign actual codes for the tables. */
739         sendMTFValues3(nGroups, alphaSize);
740 
741         /* Transmit the mapping table. */
742         sendMTFValues4();
743 
744         /* Now the selectors. */
745         sendMTFValues5(nGroups, nSelectors);
746 
747         /* Now the coding tables. */
748         sendMTFValues6(nGroups, alphaSize);
749 
750         /* And finally, the block data proper */
751         sendMTFValues7();
752     }
753 
754     private void sendMTFValues0(final int nGroups, final int alphaSize) {
755         final byte[][] len = this.data.sendMTFValues_len;
756         final int[] mtfFreq = this.data.mtfFreq;
757 
758         int remF = this.nMTF;
759         int gs = 0;
760 
761         for (int nPart = nGroups; nPart > 0; nPart--) {
762             final int tFreq = remF / nPart;
763             int ge = gs - 1;
764             int aFreq = 0;
765 
766             for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
767                 aFreq += mtfFreq[++ge];
768             }
769 
770             if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
771                 && (((nGroups - nPart) & 1) != 0)) {
772                 aFreq -= mtfFreq[ge--];
773             }
774 
775             final byte[] len_np = len[nPart - 1];
776             for (int v = alphaSize; --v >= 0;) {
777                 if ((v >= gs) && (v <= ge)) {
778                     len_np[v] = LESSER_ICOST;
779                 } else {
780                     len_np[v] = GREATER_ICOST;
781                 }
782             }
783 
784             gs = ge + 1;
785             remF -= aFreq;
786         }
787     }
788 
789     private int sendMTFValues1(final int nGroups, final int alphaSize) {
790         final Data dataShadow = this.data;
791         final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
792         final int[] fave = dataShadow.sendMTFValues_fave;
793         final short[] cost = dataShadow.sendMTFValues_cost;
794         final char[] sfmap = dataShadow.sfmap;
795         final byte[] selector = dataShadow.selector;
796         final byte[][] len = dataShadow.sendMTFValues_len;
797         final byte[] len_0 = len[0];
798         final byte[] len_1 = len[1];
799         final byte[] len_2 = len[2];
800         final byte[] len_3 = len[3];
801         final byte[] len_4 = len[4];
802         final byte[] len_5 = len[5];
803         final int nMTFShadow = this.nMTF;
804 
805         int nSelectors = 0;
806 
807         for (int iter = 0; iter < N_ITERS; iter++) {
808             for (int t = nGroups; --t >= 0;) {
809                 fave[t] = 0;
810                 int[] rfreqt = rfreq[t];
811                 for (int i = alphaSize; --i >= 0;) {
812                     rfreqt[i] = 0;
813                 }
814             }
815 
816             nSelectors = 0;
817 
818             for (int gs = 0; gs < this.nMTF;) {
819                 /* Set group start & end marks. */
820 
821                 /*
822                  * Calculate the cost of this group as coded by each of the
823                  * coding tables.
824                  */
825 
826                 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
827 
828                 if (nGroups == N_GROUPS) {
829                     // unrolled version of the else-block
830 
831                     short cost0 = 0;
832                     short cost1 = 0;
833                     short cost2 = 0;
834                     short cost3 = 0;
835                     short cost4 = 0;
836                     short cost5 = 0;
837 
838                     for (int i = gs; i <= ge; i++) {
839                         final int icv = sfmap[i];
840                         cost0 += len_0[icv] & 0xff;
841                         cost1 += len_1[icv] & 0xff;
842                         cost2 += len_2[icv] & 0xff;
843                         cost3 += len_3[icv] & 0xff;
844                         cost4 += len_4[icv] & 0xff;
845                         cost5 += len_5[icv] & 0xff;
846                     }
847 
848                     cost[0] = cost0;
849                     cost[1] = cost1;
850                     cost[2] = cost2;
851                     cost[3] = cost3;
852                     cost[4] = cost4;
853                     cost[5] = cost5;
854 
855                 } else {
856                     for (int t = nGroups; --t >= 0;) {
857                         cost[t] = 0;
858                     }
859 
860                     for (int i = gs; i <= ge; i++) {
861                         final int icv = sfmap[i];
862                         for (int t = nGroups; --t >= 0;) {
863                             cost[t] += len[t][icv] & 0xff;
864                         }
865                     }
866                 }
867 
868                 /*
869                  * Find the coding table which is best for this group, and
870                  * record its identity in the selector table.
871                  */
872                 int bt = -1;
873                 for (int t = nGroups, bc = 999999999; --t >= 0;) {
874                     final int cost_t = cost[t];
875                     if (cost_t < bc) {
876                         bc = cost_t;
877                         bt = t;
878                     }
879                 }
880 
881                 fave[bt]++;
882                 selector[nSelectors] = (byte) bt;
883                 nSelectors++;
884 
885                 /*
886                  * Increment the symbol frequencies for the selected table.
887                  */
888                 final int[] rfreq_bt = rfreq[bt];
889                 for (int i = gs; i <= ge; i++) {
890                     rfreq_bt[sfmap[i]]++;
891                 }
892 
893                 gs = ge + 1;
894             }
895 
896             /*
897              * Recompute the tables based on the accumulated frequencies.
898              */
899             for (int t = 0; t < nGroups; t++) {
900                 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
901             }
902         }
903 
904         return nSelectors;
905     }
906 
907     private void sendMTFValues2(final int nGroups, final int nSelectors) {
908         // assert (nGroups < 8) : nGroups;
909 
910         final Data dataShadow = this.data;
911         byte[] pos = dataShadow.sendMTFValues2_pos;
912 
913         for (int i = nGroups; --i >= 0;) {
914             pos[i] = (byte) i;
915         }
916 
917         for (int i = 0; i < nSelectors; i++) {
918             final byte ll_i = dataShadow.selector[i];
919             byte tmp = pos[0];
920             int j = 0;
921 
922             while (ll_i != tmp) {
923                 j++;
924                 byte tmp2 = tmp;
925                 tmp = pos[j];
926                 pos[j] = tmp2;
927             }
928 
929             pos[0] = tmp;
930             dataShadow.selectorMtf[i] = (byte) j;
931         }
932     }
933 
934     private void sendMTFValues3(final int nGroups, final int alphaSize) {
935         int[][] code = this.data.sendMTFValues_code;
936         byte[][] len = this.data.sendMTFValues_len;
937 
938         for (int t = 0; t < nGroups; t++) {
939             int minLen = 32;
940             int maxLen = 0;
941             final byte[] len_t = len[t];
942             for (int i = alphaSize; --i >= 0;) {
943                 final int l = len_t[i] & 0xff;
944                 if (l > maxLen) {
945                     maxLen = l;
946                 }
947                 if (l < minLen) {
948                     minLen = l;
949                 }
950             }
951 
952             // assert (maxLen <= 20) : maxLen;
953             // assert (minLen >= 1) : minLen;
954 
955             hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
956         }
957     }
958 
959     private void sendMTFValues4() throws IOException {
960         final boolean[] inUse = this.data.inUse;
961         final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
962 
963         for (int i = 16; --i >= 0;) {
964             inUse16[i] = false;
965             final int i16 = i * 16;
966             for (int j = 16; --j >= 0;) {
967                 if (inUse[i16 + j]) {
968                     inUse16[i] = true;
969                 }
970             }
971         }
972 
973         for (int i = 0; i < 16; i++) {
974             bsW(1, inUse16[i] ? 1 : 0);
975         }
976 
977         final OutputStream outShadow = this.out;
978         int bsLiveShadow = this.bsLive;
979         int bsBuffShadow = this.bsBuff;
980 
981         for (int i = 0; i < 16; i++) {
982             if (inUse16[i]) {
983                 final int i16 = i * 16;
984                 for (int j = 0; j < 16; j++) {
985                     // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
986                     while (bsLiveShadow >= 8) {
987                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
988                         bsBuffShadow <<= 8;
989                         bsLiveShadow -= 8;
990                     }
991                     if (inUse[i16 + j]) {
992                         bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
993                     }
994                     bsLiveShadow++;
995                 }
996             }
997         }
998 
999         this.bsBuff = bsBuffShadow;
1000         this.bsLive = bsLiveShadow;
1001     }
1002 
1003     private void sendMTFValues5(final int nGroups, final int nSelectors)
1004         throws IOException {
1005         bsW(3, nGroups);
1006         bsW(15, nSelectors);
1007 
1008         final OutputStream outShadow = this.out;
1009         final byte[] selectorMtf = this.data.selectorMtf;
1010 
1011         int bsLiveShadow = this.bsLive;
1012         int bsBuffShadow = this.bsBuff;
1013 
1014         for (int i = 0; i < nSelectors; i++) {
1015             for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1016                 // inlined: bsW(1, 1);
1017                 while (bsLiveShadow >= 8) {
1018                     outShadow.write(bsBuffShadow >> 24);
1019                     bsBuffShadow <<= 8;
1020                     bsLiveShadow -= 8;
1021                 }
1022                 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1023                 bsLiveShadow++;
1024             }
1025 
1026             // inlined: bsW(1, 0);
1027             while (bsLiveShadow >= 8) {
1028                 outShadow.write(bsBuffShadow >> 24);
1029                 bsBuffShadow <<= 8;
1030                 bsLiveShadow -= 8;
1031             }
1032             // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1033             bsLiveShadow++;
1034         }
1035 
1036         this.bsBuff = bsBuffShadow;
1037         this.bsLive = bsLiveShadow;
1038     }
1039 
1040     private void sendMTFValues6(final int nGroups, final int alphaSize)
1041         throws IOException {
1042         final byte[][] len = this.data.sendMTFValues_len;
1043         final OutputStream outShadow = this.out;
1044 
1045         int bsLiveShadow = this.bsLive;
1046         int bsBuffShadow = this.bsBuff;
1047 
1048         for (int t = 0; t < nGroups; t++) {
1049             byte[] len_t = len[t];
1050             int curr = len_t[0] & 0xff;
1051 
1052             // inlined: bsW(5, curr);
1053             while (bsLiveShadow >= 8) {
1054                 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1055                 bsBuffShadow <<= 8;
1056                 bsLiveShadow -= 8;
1057             }
1058             bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1059             bsLiveShadow += 5;
1060 
1061             for (int i = 0; i < alphaSize; i++) {
1062                 int lti = len_t[i] & 0xff;
1063                 while (curr < lti) {
1064                     // inlined: bsW(2, 2);
1065                     while (bsLiveShadow >= 8) {
1066                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1067                         bsBuffShadow <<= 8;
1068                         bsLiveShadow -= 8;
1069                     }
1070                     bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1071                     bsLiveShadow += 2;
1072 
1073                     curr++; /* 10 */
1074                 }
1075 
1076                 while (curr > lti) {
1077                     // inlined: bsW(2, 3);
1078                     while (bsLiveShadow >= 8) {
1079                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1080                         bsBuffShadow <<= 8;
1081                         bsLiveShadow -= 8;
1082                     }
1083                     bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1084                     bsLiveShadow += 2;
1085 
1086                     curr--; /* 11 */
1087                 }
1088 
1089                 // inlined: bsW(1, 0);
1090                 while (bsLiveShadow >= 8) {
1091                     outShadow.write(bsBuffShadow >> 24); // write 8-bit
1092                     bsBuffShadow <<= 8;
1093                     bsLiveShadow -= 8;
1094                 }
1095                 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1096                 bsLiveShadow++;
1097             }
1098         }
1099 
1100         this.bsBuff = bsBuffShadow;
1101         this.bsLive = bsLiveShadow;
1102     }
1103 
1104     private void sendMTFValues7() throws IOException {
1105         final Data dataShadow = this.data;
1106         final byte[][] len = dataShadow.sendMTFValues_len;
1107         final int[][] code = dataShadow.sendMTFValues_code;
1108         final OutputStream outShadow = this.out;
1109         final byte[] selector = dataShadow.selector;
1110         final char[] sfmap = dataShadow.sfmap;
1111         final int nMTFShadow = this.nMTF;
1112 
1113         int selCtr = 0;
1114 
1115         int bsLiveShadow = this.bsLive;
1116         int bsBuffShadow = this.bsBuff;
1117 
1118         for (int gs = 0; gs < nMTFShadow;) {
1119             final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1120             final int selector_selCtr = selector[selCtr] & 0xff;
1121             final int[] code_selCtr = code[selector_selCtr];
1122             final byte[] len_selCtr = len[selector_selCtr];
1123 
1124             while (gs <= ge) {
1125                 final int sfmap_i = sfmap[gs];
1126 
1127                 //
1128                 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1129                 // code_selCtr[sfmap_i]);
1130                 //
1131                 while (bsLiveShadow >= 8) {
1132                     outShadow.write(bsBuffShadow >> 24);
1133                     bsBuffShadow <<= 8;
1134                     bsLiveShadow -= 8;
1135                 }
1136                 final int n = len_selCtr[sfmap_i] & 0xFF;
1137                 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1138                 bsLiveShadow += n;
1139 
1140                 gs++;
1141             }
1142 
1143             gs = ge + 1;
1144             selCtr++;
1145         }
1146 
1147         this.bsBuff = bsBuffShadow;
1148         this.bsLive = bsLiveShadow;
1149     }
1150 
1151     private void moveToFrontCodeAndSend() throws IOException {
1152         bsW(24, this.data.origPtr);
1153         generateMTFValues();
1154         sendMTFValues();
1155     }
1156 
1157     private void blockSort() {
1158         blockSorter.blockSort(data, last);
1159     }
1160 
1161     /*
1162      * Performs Move-To-Front on the Burrows-Wheeler transformed
1163      * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1164      * run-length-encoded form.
1165      *
1166      * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1167      */
1168     private void generateMTFValues() {
1169         final int lastShadow = this.last;
1170         final Data dataShadow = this.data;
1171         final boolean[] inUse = dataShadow.inUse;
1172         final byte[] block = dataShadow.block;
1173         final int[] fmap = dataShadow.fmap;
1174         final char[] sfmap = dataShadow.sfmap;
1175         final int[] mtfFreq = dataShadow.mtfFreq;
1176         final byte[] unseqToSeq = dataShadow.unseqToSeq;
1177         final byte[] yy = dataShadow.generateMTFValues_yy;
1178 
1179         // make maps
1180         int nInUseShadow = 0;
1181         for (int i = 0; i < 256; i++) {
1182             if (inUse[i]) {
1183                 unseqToSeq[i] = (byte) nInUseShadow;
1184                 nInUseShadow++;
1185             }
1186         }
1187         this.nInUse = nInUseShadow;
1188 
1189         final int eob = nInUseShadow + 1;
1190 
1191         for (int i = eob; i >= 0; i--) {
1192             mtfFreq[i] = 0;
1193         }
1194 
1195         for (int i = nInUseShadow; --i >= 0;) {
1196             yy[i] = (byte) i;
1197         }
1198 
1199         int wr = 0;
1200         int zPend = 0;
1201 
1202         for (int i = 0; i <= lastShadow; i++) {
1203             final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1204             byte tmp = yy[0];
1205             int j = 0;
1206 
1207             while (ll_i != tmp) {
1208                 j++;
1209                 byte tmp2 = tmp;
1210                 tmp = yy[j];
1211                 yy[j] = tmp2;
1212             }
1213             yy[0] = tmp;
1214 
1215             if (j == 0) {
1216                 zPend++;
1217             } else {
1218                 if (zPend > 0) {
1219                     zPend--;
1220                     while (true) {
1221                         if ((zPend & 1) == 0) {
1222                             sfmap[wr] = RUNA;
1223                             wr++;
1224                             mtfFreq[RUNA]++;
1225                         } else {
1226                             sfmap[wr] = RUNB;
1227                             wr++;
1228                             mtfFreq[RUNB]++;
1229                         }
1230 
1231                         if (zPend >= 2) {
1232                             zPend = (zPend - 2) >> 1;
1233                         } else {
1234                             break;
1235                         }
1236                     }
1237                     zPend = 0;
1238                 }
1239                 sfmap[wr] = (char) (j + 1);
1240                 wr++;
1241                 mtfFreq[j + 1]++;
1242             }
1243         }
1244 
1245         if (zPend > 0) {
1246             zPend--;
1247             while (true) {
1248                 if ((zPend & 1) == 0) {
1249                     sfmap[wr] = RUNA;
1250                     wr++;
1251                     mtfFreq[RUNA]++;
1252                 } else {
1253                     sfmap[wr] = RUNB;
1254                     wr++;
1255                     mtfFreq[RUNB]++;
1256                 }
1257 
1258                 if (zPend >= 2) {
1259                     zPend = (zPend - 2) >> 1;
1260                 } else {
1261                     break;
1262                 }
1263             }
1264         }
1265 
1266         sfmap[wr] = (char) eob;
1267         mtfFreq[eob]++;
1268         this.nMTF = wr + 1;
1269     }
1270 
1271     static final class Data extends Object {
1272 
1273         // with blockSize 900k
1274         /* maps unsigned byte => "does it occur in block" */
1275         final boolean[] inUse = new boolean[256]; // 256 byte
1276         final byte[] unseqToSeq = new byte[256]; // 256 byte
1277         final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1278         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1279         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1280 
1281         final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1282         final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1283         // byte
1284         final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1285         // byte
1286         final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1287         final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1288         final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1289         // byte
1290         final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1291         final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1292 
1293         final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1294         final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1295         final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1296 
1297         // ------------
1298         // 333408 byte
1299 
1300         /* holds the RLEd block of original data starting at index 1.
1301          * After sorting the last byte added to the buffer is at index
1302          * 0. */
1303         final byte[] block; // 900021 byte
1304         /* maps index in Burrows-Wheeler transformed block => index of
1305          * byte in original block */
1306         final int[] fmap; // 3600000 byte
1307         final char[] sfmap; // 3600000 byte
1308         // ------------
1309         // 8433529 byte
1310         // ============
1311 
1312         /**
1313          * Index of original line in Burrows-Wheeler table.
1314          *
1315          * <p>This is the index in fmap that points to the last byte
1316          * of the original data.</p>
1317          */
1318         int origPtr;
1319 
1320         Data(int blockSize100k) {
1321             final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1322             this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1323             this.fmap = new int[n];
1324             this.sfmap = new char[2 * n];
1325         }
1326 
1327     }
1328 
1329 }