View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  
24  import org.apache.commons.compress.compressors.CompressorOutputStream;
25  
26  /**
27   * An output stream that compresses into the BZip2 format into another stream.
28   *
29   * <p>
30   * The compression requires large amounts of memory. Thus you should call the
31   * {@link #close() close()} method as soon as possible, to force
32   * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory.
33   * </p>
34   *
35   * <p> You can shrink the amount of allocated memory and maybe raise
36   * the compression speed by choosing a lower blocksize, which in turn
37   * may cause a lower compression ratio. You can avoid unnecessary
38   * memory allocation by avoiding using a blocksize which is bigger
39   * than the size of the input.  </p>
40   *
41   * <p> You can compute the memory usage for compressing by the
42   * following formula: </p>
43   *
44   * <pre>
45   * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
46   * </pre>
47   *
48   * <p> To get the memory required for decompression by {@link
49   * BZip2CompressorInputStream} use </p>
50   *
51   * <pre>
52   * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
53   * </pre>
54   *
55   * <table width="100%" border="1">
56   * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" />
57   * </colgroup>
58   * <tr>
59   * <th colspan="3">Memory usage by blocksize</th>
60   * </tr>
61   * <tr>
62   * <th align="right">Blocksize</th> <th align="right">Compression<br>
63   * memory usage</th> <th align="right">Decompression<br>
64   * memory usage</th>
65   * </tr>
66   * <tr>
67   * <td align="right">100k</td>
68   * <td align="right">1300k</td>
69   * <td align="right">565k</td>
70   * </tr>
71   * <tr>
72   * <td align="right">200k</td>
73   * <td align="right">2200k</td>
74   * <td align="right">1065k</td>
75   * </tr>
76   * <tr>
77   * <td align="right">300k</td>
78   * <td align="right">3100k</td>
79   * <td align="right">1565k</td>
80   * </tr>
81   * <tr>
82   * <td align="right">400k</td>
83   * <td align="right">4000k</td>
84   * <td align="right">2065k</td>
85   * </tr>
86   * <tr>
87   * <td align="right">500k</td>
88   * <td align="right">4900k</td>
89   * <td align="right">2565k</td>
90   * </tr>
91   * <tr>
92   * <td align="right">600k</td>
93   * <td align="right">5800k</td>
94   * <td align="right">3065k</td>
95   * </tr>
96   * <tr>
97   * <td align="right">700k</td>
98   * <td align="right">6700k</td>
99   * <td align="right">3565k</td>
100  * </tr>
101  * <tr>
102  * <td align="right">800k</td>
103  * <td align="right">7600k</td>
104  * <td align="right">4065k</td>
105  * </tr>
106  * <tr>
107  * <td align="right">900k</td>
108  * <td align="right">8500k</td>
109  * <td align="right">4565k</td>
110  * </tr>
111  * </table>
112  *
113  * <p>
114  * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the
115  * bzipped input is smaller than one block.
116  * </p>
117  *
118  * <p>
119  * Instances of this class are not threadsafe.
120  * </p>
121  *
122  * <p>
123  * TODO: Update to BZip2 1.0.1
124  * </p>
125  * @NotThreadSafe
126  */
127 public class BZip2CompressorOutputStream extends CompressorOutputStream
128     implements BZip2Constants {
129 
130     /**
131      * The minimum supported blocksize <tt> == 1</tt>.
132      */
133     public static final int MIN_BLOCKSIZE = 1;
134 
135     /**
136      * The maximum supported blocksize <tt> == 9</tt>.
137      */
138     public static final int MAX_BLOCKSIZE = 9;
139 
140     private static final int GREATER_ICOST = 15;
141     private static final int LESSER_ICOST = 0;
142 
143     private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
144                                           final Data dat, final int alphaSize,
145                                           final int maxLen) {
146         /*
147          * Nodes and heap entries run from 1. Entry 0 for both the heap and
148          * nodes is a sentinel.
149          */
150         final int[] heap = dat.heap;
151         final int[] weight = dat.weight;
152         final int[] parent = dat.parent;
153 
154         for (int i = alphaSize; --i >= 0;) {
155             weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
156         }
157 
158         for (boolean tooLong = true; tooLong;) {
159             tooLong = false;
160 
161             int nNodes = alphaSize;
162             int nHeap = 0;
163             heap[0] = 0;
164             weight[0] = 0;
165             parent[0] = -2;
166 
167             for (int i = 1; i <= alphaSize; i++) {
168                 parent[i] = -1;
169                 nHeap++;
170                 heap[nHeap] = i;
171 
172                 int zz = nHeap;
173                 int tmp = heap[zz];
174                 while (weight[tmp] < weight[heap[zz >> 1]]) {
175                     heap[zz] = heap[zz >> 1];
176                     zz >>= 1;
177                 }
178                 heap[zz] = tmp;
179             }
180 
181             while (nHeap > 1) {
182                 int n1 = heap[1];
183                 heap[1] = heap[nHeap];
184                 nHeap--;
185 
186                 int yy = 0;
187                 int zz = 1;
188                 int tmp = heap[1];
189 
190                 while (true) {
191                     yy = zz << 1;
192 
193                     if (yy > nHeap) {
194                         break;
195                     }
196 
197                     if ((yy < nHeap)
198                         && (weight[heap[yy + 1]] < weight[heap[yy]])) {
199                         yy++;
200                     }
201 
202                     if (weight[tmp] < weight[heap[yy]]) {
203                         break;
204                     }
205 
206                     heap[zz] = heap[yy];
207                     zz = yy;
208                 }
209 
210                 heap[zz] = tmp;
211 
212                 int n2 = heap[1];
213                 heap[1] = heap[nHeap];
214                 nHeap--;
215 
216                 yy = 0;
217                 zz = 1;
218                 tmp = heap[1];
219 
220                 while (true) {
221                     yy = zz << 1;
222 
223                     if (yy > nHeap) {
224                         break;
225                     }
226 
227                     if ((yy < nHeap)
228                         && (weight[heap[yy + 1]] < weight[heap[yy]])) {
229                         yy++;
230                     }
231 
232                     if (weight[tmp] < weight[heap[yy]]) {
233                         break;
234                     }
235 
236                     heap[zz] = heap[yy];
237                     zz = yy;
238                 }
239 
240                 heap[zz] = tmp;
241                 nNodes++;
242                 parent[n1] = parent[n2] = nNodes;
243 
244                 final int weight_n1 = weight[n1];
245                 final int weight_n2 = weight[n2];
246                 weight[nNodes] = ((weight_n1 & 0xffffff00)
247                                   + (weight_n2 & 0xffffff00))
248                     | (1 + (((weight_n1 & 0x000000ff)
249                              > (weight_n2 & 0x000000ff))
250                             ? (weight_n1 & 0x000000ff)
251                             : (weight_n2 & 0x000000ff)));
252 
253                 parent[nNodes] = -1;
254                 nHeap++;
255                 heap[nHeap] = nNodes;
256 
257                 tmp = 0;
258                 zz = nHeap;
259                 tmp = heap[zz];
260                 final int weight_tmp = weight[tmp];
261                 while (weight_tmp < weight[heap[zz >> 1]]) {
262                     heap[zz] = heap[zz >> 1];
263                     zz >>= 1;
264                 }
265                 heap[zz] = tmp;
266 
267             }
268 
269             for (int i = 1; i <= alphaSize; i++) {
270                 int j = 0;
271                 int k = i;
272 
273                 for (int parent_k; (parent_k = parent[k]) >= 0;) {
274                     k = parent_k;
275                     j++;
276                 }
277 
278                 len[i - 1] = (byte) j;
279                 if (j > maxLen) {
280                     tooLong = true;
281                 }
282             }
283 
284             if (tooLong) {
285                 for (int i = 1; i < alphaSize; i++) {
286                     int j = weight[i] >> 8;
287                     j = 1 + (j >> 1);
288                     weight[i] = j << 8;
289                 }
290             }
291         }
292     }
293 
294     /**
295      * Index of the last char in the block, so the block size == last + 1.
296      */
297     private int last;
298 
299     /**
300      * Always: in the range 0 .. 9. The current block size is 100000 * this
301      * number.
302      */
303     private final int blockSize100k;
304 
305     private int bsBuff;
306     private int bsLive;
307     private final CRC crc = new CRC();
308 
309     private int nInUse;
310 
311     private int nMTF;
312 
313     private int currentChar = -1;
314     private int runLength = 0;
315 
316     private int blockCRC;
317     private int combinedCRC;
318     private final int allowableBlockSize;
319 
320     /**
321      * All memory intensive stuff.
322      */
323     private Data data;
324     private BlockSort blockSorter;
325 
326     private OutputStream out;
327 
328     /**
329      * Chooses a blocksize based on the given length of the data to compress.
330      *
331      * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
332      *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
333      *         <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
334      *         always.
335      *
336      * @param inputLength
337      *            The length of the data which will be compressed by
338      *            <tt>BZip2CompressorOutputStream</tt>.
339      */
340     public static int chooseBlockSize(long inputLength) {
341         return (inputLength > 0) ? (int) Math
342             .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
343     }
344 
345     /**
346      * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k.
347      *
348      * @param out 
349      *            the destination stream.
350      *
351      * @throws IOException
352      *             if an I/O error occurs in the specified stream.
353      * @throws NullPointerException
354      *             if <code>out == null</code>.
355      */
356     public BZip2CompressorOutputStream(final OutputStream out)
357         throws IOException {
358         this(out, MAX_BLOCKSIZE);
359     }
360 
361     /**
362      * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize.
363      *
364      * @param out
365      *            the destination stream.
366      * @param blockSize
367      *            the blockSize as 100k units.
368      *
369      * @throws IOException
370      *             if an I/O error occurs in the specified stream.
371      * @throws IllegalArgumentException
372      *             if <code>(blockSize < 1) || (blockSize > 9)</code>.
373      * @throws NullPointerException
374      *             if <code>out == null</code>.
375      *
376      * @see #MIN_BLOCKSIZE
377      * @see #MAX_BLOCKSIZE
378      */
379     public BZip2CompressorOutputStream(final OutputStream out,
380                                        final int blockSize)
381         throws IOException {
382         super();
383 
384         if (blockSize < 1) {
385             throw new IllegalArgumentException("blockSize(" + blockSize
386                                                + ") < 1");
387         }
388         if (blockSize > 9) {
389             throw new IllegalArgumentException("blockSize(" + blockSize
390                                                + ") > 9");
391         }
392 
393         this.blockSize100k = blockSize;
394         this.out = out;
395 
396         /* 20 is just a paranoia constant */
397         this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
398         init();
399     }
400 
401     /** {@inheritDoc} */
402     @Override
403     public void write(final int b) throws IOException {
404         if (this.out != null) {
405             write0(b);
406         } else {
407             throw new IOException("closed");
408         }
409     }
410 
411     /**
412      * Writes the current byte to the buffer, run-length encoding it
413      * if it has been repeated at least four times (the first step
414      * RLEs sequences of four identical bytes).
415      *
416      * <p>Flushes the current block before writing data if it is
417      * full.</p>
418      *
419      * <p>"write to the buffer" means adding to data.buffer starting
420      * two steps "after" this.last - initially starting at index 1
421      * (not 0) - and updating this.last to point to the last index
422      * written minus 1.</p>
423      */
424     private void writeRun() throws IOException {
425         final int lastShadow = this.last;
426 
427         if (lastShadow < this.allowableBlockSize) {
428             final int currentCharShadow = this.currentChar;
429             final Data dataShadow = this.data;
430             dataShadow.inUse[currentCharShadow] = true;
431             final byte ch = (byte) currentCharShadow;
432 
433             int runLengthShadow = this.runLength;
434             this.crc.updateCRC(currentCharShadow, runLengthShadow);
435 
436             switch (runLengthShadow) {
437             case 1:
438                 dataShadow.block[lastShadow + 2] = ch;
439                 this.last = lastShadow + 1;
440                 break;
441 
442             case 2:
443                 dataShadow.block[lastShadow + 2] = ch;
444                 dataShadow.block[lastShadow + 3] = ch;
445                 this.last = lastShadow + 2;
446                 break;
447 
448             case 3: {
449                 final byte[] block = dataShadow.block;
450                 block[lastShadow + 2] = ch;
451                 block[lastShadow + 3] = ch;
452                 block[lastShadow + 4] = ch;
453                 this.last = lastShadow + 3;
454             }
455                 break;
456 
457             default: {
458                 runLengthShadow -= 4;
459                 dataShadow.inUse[runLengthShadow] = true;
460                 final byte[] block = dataShadow.block;
461                 block[lastShadow + 2] = ch;
462                 block[lastShadow + 3] = ch;
463                 block[lastShadow + 4] = ch;
464                 block[lastShadow + 5] = ch;
465                 block[lastShadow + 6] = (byte) runLengthShadow;
466                 this.last = lastShadow + 5;
467             }
468                 break;
469 
470             }
471         } else {
472             endBlock();
473             initBlock();
474             writeRun();
475         }
476     }
477 
478     /**
479      * Overriden to close the stream.
480      */
481     @Override
482     protected void finalize() throws Throwable {
483         finish();
484         super.finalize();
485     }
486 
487 
488     public void finish() throws IOException {
489         if (out != null) {
490             try {
491                 if (this.runLength > 0) {
492                     writeRun();
493                 }
494                 this.currentChar = -1;
495                 endBlock();
496                 endCompression();
497             } finally {
498                 this.out = null;
499                 this.data = null;
500                 this.blockSorter = null;
501             }
502         }
503     }
504 
505     @Override
506     public void close() throws IOException {
507         if (out != null) {
508             OutputStream outShadow = this.out;
509             finish();
510             outShadow.close();
511         }
512     }
513 
514     @Override
515     public void flush() throws IOException {
516         OutputStream outShadow = this.out;
517         if (outShadow != null) {
518             outShadow.flush();
519         }
520     }
521 
522     /**
523      * Writes magic bytes like BZ on the first position of the stream
524      * and bytes indiciating the file-format, which is 
525      * huffmanised, followed by a digit indicating blockSize100k.
526      * @throws IOException if the magic bytes could not been written
527      */
528     private void init() throws IOException {
529         bsPutUByte('B');
530         bsPutUByte('Z');
531 
532         this.data = new Data(this.blockSize100k);
533         this.blockSorter = new BlockSort(this.data);
534 
535         // huffmanised magic bytes
536         bsPutUByte('h');
537         bsPutUByte('0' + this.blockSize100k);
538 
539         this.combinedCRC = 0;
540         initBlock();
541     }
542 
543     private void initBlock() {
544         // blockNo++;
545         this.crc.initialiseCRC();
546         this.last = -1;
547         // ch = 0;
548 
549         boolean[] inUse = this.data.inUse;
550         for (int i = 256; --i >= 0;) {
551             inUse[i] = false;
552         }
553 
554     }
555 
556     private void endBlock() throws IOException {
557         this.blockCRC = this.crc.getFinalCRC();
558         this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
559         this.combinedCRC ^= this.blockCRC;
560 
561         // empty block at end of file
562         if (this.last == -1) {
563             return;
564         }
565 
566         /* sort the block and establish posn of original string */
567         blockSort();
568 
569         /*
570          * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
571          * :-). A 32 bit value does not really give a strong enough guarantee
572          * that the value will not appear by chance in the compressed
573          * datastream. Worst-case probability of this event, for a 900k block,
574          * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
575          * bits. For a compressed file of size 100Gb -- about 100000 blocks --
576          * only a 48-bit marker will do. NB: normal compression/ decompression
577          * donot rely on these statistical properties. They are only important
578          * when trying to recover blocks from damaged files.
579          */
580         bsPutUByte(0x31);
581         bsPutUByte(0x41);
582         bsPutUByte(0x59);
583         bsPutUByte(0x26);
584         bsPutUByte(0x53);
585         bsPutUByte(0x59);
586 
587         /* Now the block's CRC, so it is in a known place. */
588         bsPutInt(this.blockCRC);
589 
590         /* Now a single bit indicating no randomisation. */
591         bsW(1, 0);
592 
593         /* Finally, block's contents proper. */
594         moveToFrontCodeAndSend();
595     }
596 
597     private void endCompression() throws IOException {
598         /*
599          * Now another magic 48-bit number, 0x177245385090, to indicate the end
600          * of the last block. (sqrt(pi), if you want to know. I did want to use
601          * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
602          * to feel statistically comfortable. Call me paranoid.)
603          */
604         bsPutUByte(0x17);
605         bsPutUByte(0x72);
606         bsPutUByte(0x45);
607         bsPutUByte(0x38);
608         bsPutUByte(0x50);
609         bsPutUByte(0x90);
610 
611         bsPutInt(this.combinedCRC);
612         bsFinishedWithStream();
613     }
614 
615     /**
616      * Returns the blocksize parameter specified at construction time.
617      */
618     public final int getBlockSize() {
619         return this.blockSize100k;
620     }
621 
622     @Override
623     public void write(final byte[] buf, int offs, final int len)
624         throws IOException {
625         if (offs < 0) {
626             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
627         }
628         if (len < 0) {
629             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
630         }
631         if (offs + len > buf.length) {
632             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
633                                                 + len + ") > buf.length("
634                                                 + buf.length + ").");
635         }
636         if (this.out == null) {
637             throw new IOException("stream closed");
638         }
639 
640         for (int hi = offs + len; offs < hi;) {
641             write0(buf[offs++]);
642         }
643     }
644 
645     /**
646      * Keeps track of the last bytes written and implicitly performs
647      * run-length encoding as the first step of the bzip2 algorithm.
648      */
649     private void write0(int b) throws IOException {
650         if (this.currentChar != -1) {
651             b &= 0xff;
652             if (this.currentChar == b) {
653                 if (++this.runLength > 254) {
654                     writeRun();
655                     this.currentChar = -1;
656                     this.runLength = 0;
657                 }
658                 // else nothing to do
659             } else {
660                 writeRun();
661                 this.runLength = 1;
662                 this.currentChar = b;
663             }
664         } else {
665             this.currentChar = b & 0xff;
666             this.runLength++;
667         }
668     }
669 
670     private static void hbAssignCodes(final int[] code, final byte[] length,
671                                       final int minLen, final int maxLen,
672                                       final int alphaSize) {
673         int vec = 0;
674         for (int n = minLen; n <= maxLen; n++) {
675             for (int i = 0; i < alphaSize; i++) {
676                 if ((length[i] & 0xff) == n) {
677                     code[i] = vec;
678                     vec++;
679                 }
680             }
681             vec <<= 1;
682         }
683     }
684 
685     private void bsFinishedWithStream() throws IOException {
686         while (this.bsLive > 0) {
687             int ch = this.bsBuff >> 24;
688             this.out.write(ch); // write 8-bit
689             this.bsBuff <<= 8;
690             this.bsLive -= 8;
691         }
692     }
693 
694     private void bsW(final int n, final int v) throws IOException {
695         final OutputStream outShadow = this.out;
696         int bsLiveShadow = this.bsLive;
697         int bsBuffShadow = this.bsBuff;
698 
699         while (bsLiveShadow >= 8) {
700             outShadow.write(bsBuffShadow >> 24); // write 8-bit
701             bsBuffShadow <<= 8;
702             bsLiveShadow -= 8;
703         }
704 
705         this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
706         this.bsLive = bsLiveShadow + n;
707     }
708 
709     private void bsPutUByte(final int c) throws IOException {
710         bsW(8, c);
711     }
712 
713     private void bsPutInt(final int u) throws IOException {
714         bsW(8, (u >> 24) & 0xff);
715         bsW(8, (u >> 16) & 0xff);
716         bsW(8, (u >> 8) & 0xff);
717         bsW(8, u & 0xff);
718     }
719 
720     private void sendMTFValues() throws IOException {
721         final byte[][] len = this.data.sendMTFValues_len;
722         final int alphaSize = this.nInUse + 2;
723 
724         for (int t = N_GROUPS; --t >= 0;) {
725             byte[] len_t = len[t];
726             for (int v = alphaSize; --v >= 0;) {
727                 len_t[v] = GREATER_ICOST;
728             }
729         }
730 
731         /* Decide how many coding tables to use */
732         // assert (this.nMTF > 0) : this.nMTF;
733         final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
734             : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
735 
736         /* Generate an initial set of coding tables */
737         sendMTFValues0(nGroups, alphaSize);
738 
739         /*
740          * Iterate up to N_ITERS times to improve the tables.
741          */
742         final int nSelectors = sendMTFValues1(nGroups, alphaSize);
743 
744         /* Compute MTF values for the selectors. */
745         sendMTFValues2(nGroups, nSelectors);
746 
747         /* Assign actual codes for the tables. */
748         sendMTFValues3(nGroups, alphaSize);
749 
750         /* Transmit the mapping table. */
751         sendMTFValues4();
752 
753         /* Now the selectors. */
754         sendMTFValues5(nGroups, nSelectors);
755 
756         /* Now the coding tables. */
757         sendMTFValues6(nGroups, alphaSize);
758 
759         /* And finally, the block data proper */
760         sendMTFValues7();
761     }
762 
763     private void sendMTFValues0(final int nGroups, final int alphaSize) {
764         final byte[][] len = this.data.sendMTFValues_len;
765         final int[] mtfFreq = this.data.mtfFreq;
766 
767         int remF = this.nMTF;
768         int gs = 0;
769 
770         for (int nPart = nGroups; nPart > 0; nPart--) {
771             final int tFreq = remF / nPart;
772             int ge = gs - 1;
773             int aFreq = 0;
774 
775             for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
776                 aFreq += mtfFreq[++ge];
777             }
778 
779             if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
780                 && (((nGroups - nPart) & 1) != 0)) {
781                 aFreq -= mtfFreq[ge--];
782             }
783 
784             final byte[] len_np = len[nPart - 1];
785             for (int v = alphaSize; --v >= 0;) {
786                 if ((v >= gs) && (v <= ge)) {
787                     len_np[v] = LESSER_ICOST;
788                 } else {
789                     len_np[v] = GREATER_ICOST;
790                 }
791             }
792 
793             gs = ge + 1;
794             remF -= aFreq;
795         }
796     }
797 
798     private int sendMTFValues1(final int nGroups, final int alphaSize) {
799         final Data dataShadow = this.data;
800         final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
801         final int[] fave = dataShadow.sendMTFValues_fave;
802         final short[] cost = dataShadow.sendMTFValues_cost;
803         final char[] sfmap = dataShadow.sfmap;
804         final byte[] selector = dataShadow.selector;
805         final byte[][] len = dataShadow.sendMTFValues_len;
806         final byte[] len_0 = len[0];
807         final byte[] len_1 = len[1];
808         final byte[] len_2 = len[2];
809         final byte[] len_3 = len[3];
810         final byte[] len_4 = len[4];
811         final byte[] len_5 = len[5];
812         final int nMTFShadow = this.nMTF;
813 
814         int nSelectors = 0;
815 
816         for (int iter = 0; iter < N_ITERS; iter++) {
817             for (int t = nGroups; --t >= 0;) {
818                 fave[t] = 0;
819                 int[] rfreqt = rfreq[t];
820                 for (int i = alphaSize; --i >= 0;) {
821                     rfreqt[i] = 0;
822                 }
823             }
824 
825             nSelectors = 0;
826 
827             for (int gs = 0; gs < this.nMTF;) {
828                 /* Set group start & end marks. */
829 
830                 /*
831                  * Calculate the cost of this group as coded by each of the
832                  * coding tables.
833                  */
834 
835                 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
836 
837                 if (nGroups == N_GROUPS) {
838                     // unrolled version of the else-block
839 
840                     short cost0 = 0;
841                     short cost1 = 0;
842                     short cost2 = 0;
843                     short cost3 = 0;
844                     short cost4 = 0;
845                     short cost5 = 0;
846 
847                     for (int i = gs; i <= ge; i++) {
848                         final int icv = sfmap[i];
849                         cost0 += len_0[icv] & 0xff;
850                         cost1 += len_1[icv] & 0xff;
851                         cost2 += len_2[icv] & 0xff;
852                         cost3 += len_3[icv] & 0xff;
853                         cost4 += len_4[icv] & 0xff;
854                         cost5 += len_5[icv] & 0xff;
855                     }
856 
857                     cost[0] = cost0;
858                     cost[1] = cost1;
859                     cost[2] = cost2;
860                     cost[3] = cost3;
861                     cost[4] = cost4;
862                     cost[5] = cost5;
863 
864                 } else {
865                     for (int t = nGroups; --t >= 0;) {
866                         cost[t] = 0;
867                     }
868 
869                     for (int i = gs; i <= ge; i++) {
870                         final int icv = sfmap[i];
871                         for (int t = nGroups; --t >= 0;) {
872                             cost[t] += len[t][icv] & 0xff;
873                         }
874                     }
875                 }
876 
877                 /*
878                  * Find the coding table which is best for this group, and
879                  * record its identity in the selector table.
880                  */
881                 int bt = -1;
882                 for (int t = nGroups, bc = 999999999; --t >= 0;) {
883                     final int cost_t = cost[t];
884                     if (cost_t < bc) {
885                         bc = cost_t;
886                         bt = t;
887                     }
888                 }
889 
890                 fave[bt]++;
891                 selector[nSelectors] = (byte) bt;
892                 nSelectors++;
893 
894                 /*
895                  * Increment the symbol frequencies for the selected table.
896                  */
897                 final int[] rfreq_bt = rfreq[bt];
898                 for (int i = gs; i <= ge; i++) {
899                     rfreq_bt[sfmap[i]]++;
900                 }
901 
902                 gs = ge + 1;
903             }
904 
905             /*
906              * Recompute the tables based on the accumulated frequencies.
907              */
908             for (int t = 0; t < nGroups; t++) {
909                 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
910             }
911         }
912 
913         return nSelectors;
914     }
915 
916     private void sendMTFValues2(final int nGroups, final int nSelectors) {
917         // assert (nGroups < 8) : nGroups;
918 
919         final Data dataShadow = this.data;
920         byte[] pos = dataShadow.sendMTFValues2_pos;
921 
922         for (int i = nGroups; --i >= 0;) {
923             pos[i] = (byte) i;
924         }
925 
926         for (int i = 0; i < nSelectors; i++) {
927             final byte ll_i = dataShadow.selector[i];
928             byte tmp = pos[0];
929             int j = 0;
930 
931             while (ll_i != tmp) {
932                 j++;
933                 byte tmp2 = tmp;
934                 tmp = pos[j];
935                 pos[j] = tmp2;
936             }
937 
938             pos[0] = tmp;
939             dataShadow.selectorMtf[i] = (byte) j;
940         }
941     }
942 
943     private void sendMTFValues3(final int nGroups, final int alphaSize) {
944         int[][] code = this.data.sendMTFValues_code;
945         byte[][] len = this.data.sendMTFValues_len;
946 
947         for (int t = 0; t < nGroups; t++) {
948             int minLen = 32;
949             int maxLen = 0;
950             final byte[] len_t = len[t];
951             for (int i = alphaSize; --i >= 0;) {
952                 final int l = len_t[i] & 0xff;
953                 if (l > maxLen) {
954                     maxLen = l;
955                 }
956                 if (l < minLen) {
957                     minLen = l;
958                 }
959             }
960 
961             // assert (maxLen <= 20) : maxLen;
962             // assert (minLen >= 1) : minLen;
963 
964             hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
965         }
966     }
967 
968     private void sendMTFValues4() throws IOException {
969         final boolean[] inUse = this.data.inUse;
970         final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
971 
972         for (int i = 16; --i >= 0;) {
973             inUse16[i] = false;
974             final int i16 = i * 16;
975             for (int j = 16; --j >= 0;) {
976                 if (inUse[i16 + j]) {
977                     inUse16[i] = true;
978                 }
979             }
980         }
981 
982         for (int i = 0; i < 16; i++) {
983             bsW(1, inUse16[i] ? 1 : 0);
984         }
985 
986         final OutputStream outShadow = this.out;
987         int bsLiveShadow = this.bsLive;
988         int bsBuffShadow = this.bsBuff;
989 
990         for (int i = 0; i < 16; i++) {
991             if (inUse16[i]) {
992                 final int i16 = i * 16;
993                 for (int j = 0; j < 16; j++) {
994                     // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
995                     while (bsLiveShadow >= 8) {
996                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
997                         bsBuffShadow <<= 8;
998                         bsLiveShadow -= 8;
999                     }
1000                     if (inUse[i16 + j]) {
1001                         bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1002                     }
1003                     bsLiveShadow++;
1004                 }
1005             }
1006         }
1007 
1008         this.bsBuff = bsBuffShadow;
1009         this.bsLive = bsLiveShadow;
1010     }
1011 
1012     private void sendMTFValues5(final int nGroups, final int nSelectors)
1013         throws IOException {
1014         bsW(3, nGroups);
1015         bsW(15, nSelectors);
1016 
1017         final OutputStream outShadow = this.out;
1018         final byte[] selectorMtf = this.data.selectorMtf;
1019 
1020         int bsLiveShadow = this.bsLive;
1021         int bsBuffShadow = this.bsBuff;
1022 
1023         for (int i = 0; i < nSelectors; i++) {
1024             for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1025                 // inlined: bsW(1, 1);
1026                 while (bsLiveShadow >= 8) {
1027                     outShadow.write(bsBuffShadow >> 24);
1028                     bsBuffShadow <<= 8;
1029                     bsLiveShadow -= 8;
1030                 }
1031                 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1032                 bsLiveShadow++;
1033             }
1034 
1035             // inlined: bsW(1, 0);
1036             while (bsLiveShadow >= 8) {
1037                 outShadow.write(bsBuffShadow >> 24);
1038                 bsBuffShadow <<= 8;
1039                 bsLiveShadow -= 8;
1040             }
1041             // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1042             bsLiveShadow++;
1043         }
1044 
1045         this.bsBuff = bsBuffShadow;
1046         this.bsLive = bsLiveShadow;
1047     }
1048 
1049     private void sendMTFValues6(final int nGroups, final int alphaSize)
1050         throws IOException {
1051         final byte[][] len = this.data.sendMTFValues_len;
1052         final OutputStream outShadow = this.out;
1053 
1054         int bsLiveShadow = this.bsLive;
1055         int bsBuffShadow = this.bsBuff;
1056 
1057         for (int t = 0; t < nGroups; t++) {
1058             byte[] len_t = len[t];
1059             int curr = len_t[0] & 0xff;
1060 
1061             // inlined: bsW(5, curr);
1062             while (bsLiveShadow >= 8) {
1063                 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1064                 bsBuffShadow <<= 8;
1065                 bsLiveShadow -= 8;
1066             }
1067             bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1068             bsLiveShadow += 5;
1069 
1070             for (int i = 0; i < alphaSize; i++) {
1071                 int lti = len_t[i] & 0xff;
1072                 while (curr < lti) {
1073                     // inlined: bsW(2, 2);
1074                     while (bsLiveShadow >= 8) {
1075                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1076                         bsBuffShadow <<= 8;
1077                         bsLiveShadow -= 8;
1078                     }
1079                     bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1080                     bsLiveShadow += 2;
1081 
1082                     curr++; /* 10 */
1083                 }
1084 
1085                 while (curr > lti) {
1086                     // inlined: bsW(2, 3);
1087                     while (bsLiveShadow >= 8) {
1088                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1089                         bsBuffShadow <<= 8;
1090                         bsLiveShadow -= 8;
1091                     }
1092                     bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1093                     bsLiveShadow += 2;
1094 
1095                     curr--; /* 11 */
1096                 }
1097 
1098                 // inlined: bsW(1, 0);
1099                 while (bsLiveShadow >= 8) {
1100                     outShadow.write(bsBuffShadow >> 24); // write 8-bit
1101                     bsBuffShadow <<= 8;
1102                     bsLiveShadow -= 8;
1103                 }
1104                 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1105                 bsLiveShadow++;
1106             }
1107         }
1108 
1109         this.bsBuff = bsBuffShadow;
1110         this.bsLive = bsLiveShadow;
1111     }
1112 
1113     private void sendMTFValues7() throws IOException {
1114         final Data dataShadow = this.data;
1115         final byte[][] len = dataShadow.sendMTFValues_len;
1116         final int[][] code = dataShadow.sendMTFValues_code;
1117         final OutputStream outShadow = this.out;
1118         final byte[] selector = dataShadow.selector;
1119         final char[] sfmap = dataShadow.sfmap;
1120         final int nMTFShadow = this.nMTF;
1121 
1122         int selCtr = 0;
1123 
1124         int bsLiveShadow = this.bsLive;
1125         int bsBuffShadow = this.bsBuff;
1126 
1127         for (int gs = 0; gs < nMTFShadow;) {
1128             final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1129             final int selector_selCtr = selector[selCtr] & 0xff;
1130             final int[] code_selCtr = code[selector_selCtr];
1131             final byte[] len_selCtr = len[selector_selCtr];
1132 
1133             while (gs <= ge) {
1134                 final int sfmap_i = sfmap[gs];
1135 
1136                 //
1137                 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1138                 // code_selCtr[sfmap_i]);
1139                 //
1140                 while (bsLiveShadow >= 8) {
1141                     outShadow.write(bsBuffShadow >> 24);
1142                     bsBuffShadow <<= 8;
1143                     bsLiveShadow -= 8;
1144                 }
1145                 final int n = len_selCtr[sfmap_i] & 0xFF;
1146                 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1147                 bsLiveShadow += n;
1148 
1149                 gs++;
1150             }
1151 
1152             gs = ge + 1;
1153             selCtr++;
1154         }
1155 
1156         this.bsBuff = bsBuffShadow;
1157         this.bsLive = bsLiveShadow;
1158     }
1159 
1160     private void moveToFrontCodeAndSend() throws IOException {
1161         bsW(24, this.data.origPtr);
1162         generateMTFValues();
1163         sendMTFValues();
1164     }
1165 
1166     private void blockSort() {
1167         blockSorter.blockSort(data, last);
1168     }
1169 
1170     /*
1171      * Performs Move-To-Front on the Burrows-Wheeler transformed
1172      * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1173      * run-length-encoded form.
1174      *
1175      * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1176      */
1177     private void generateMTFValues() {
1178         final int lastShadow = this.last;
1179         final Data dataShadow = this.data;
1180         final boolean[] inUse = dataShadow.inUse;
1181         final byte[] block = dataShadow.block;
1182         final int[] fmap = dataShadow.fmap;
1183         final char[] sfmap = dataShadow.sfmap;
1184         final int[] mtfFreq = dataShadow.mtfFreq;
1185         final byte[] unseqToSeq = dataShadow.unseqToSeq;
1186         final byte[] yy = dataShadow.generateMTFValues_yy;
1187 
1188         // make maps
1189         int nInUseShadow = 0;
1190         for (int i = 0; i < 256; i++) {
1191             if (inUse[i]) {
1192                 unseqToSeq[i] = (byte) nInUseShadow;
1193                 nInUseShadow++;
1194             }
1195         }
1196         this.nInUse = nInUseShadow;
1197 
1198         final int eob = nInUseShadow + 1;
1199 
1200         for (int i = eob; i >= 0; i--) {
1201             mtfFreq[i] = 0;
1202         }
1203 
1204         for (int i = nInUseShadow; --i >= 0;) {
1205             yy[i] = (byte) i;
1206         }
1207 
1208         int wr = 0;
1209         int zPend = 0;
1210 
1211         for (int i = 0; i <= lastShadow; i++) {
1212             final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1213             byte tmp = yy[0];
1214             int j = 0;
1215 
1216             while (ll_i != tmp) {
1217                 j++;
1218                 byte tmp2 = tmp;
1219                 tmp = yy[j];
1220                 yy[j] = tmp2;
1221             }
1222             yy[0] = tmp;
1223 
1224             if (j == 0) {
1225                 zPend++;
1226             } else {
1227                 if (zPend > 0) {
1228                     zPend--;
1229                     while (true) {
1230                         if ((zPend & 1) == 0) {
1231                             sfmap[wr] = RUNA;
1232                             wr++;
1233                             mtfFreq[RUNA]++;
1234                         } else {
1235                             sfmap[wr] = RUNB;
1236                             wr++;
1237                             mtfFreq[RUNB]++;
1238                         }
1239 
1240                         if (zPend >= 2) {
1241                             zPend = (zPend - 2) >> 1;
1242                         } else {
1243                             break;
1244                         }
1245                     }
1246                     zPend = 0;
1247                 }
1248                 sfmap[wr] = (char) (j + 1);
1249                 wr++;
1250                 mtfFreq[j + 1]++;
1251             }
1252         }
1253 
1254         if (zPend > 0) {
1255             zPend--;
1256             while (true) {
1257                 if ((zPend & 1) == 0) {
1258                     sfmap[wr] = RUNA;
1259                     wr++;
1260                     mtfFreq[RUNA]++;
1261                 } else {
1262                     sfmap[wr] = RUNB;
1263                     wr++;
1264                     mtfFreq[RUNB]++;
1265                 }
1266 
1267                 if (zPend >= 2) {
1268                     zPend = (zPend - 2) >> 1;
1269                 } else {
1270                     break;
1271                 }
1272             }
1273         }
1274 
1275         sfmap[wr] = (char) eob;
1276         mtfFreq[eob]++;
1277         this.nMTF = wr + 1;
1278     }
1279 
1280     static final class Data extends Object {
1281 
1282         // with blockSize 900k
1283         /* maps unsigned byte => "does it occur in block" */
1284         final boolean[] inUse = new boolean[256]; // 256 byte
1285         final byte[] unseqToSeq = new byte[256]; // 256 byte
1286         final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1287         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1288         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1289 
1290         final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1291         final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1292         // byte
1293         final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1294         // byte
1295         final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1296         final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1297         final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1298         // byte
1299         final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1300         final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1301 
1302         final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1303         final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1304         final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1305 
1306         // ------------
1307         // 333408 byte
1308 
1309         /* holds the RLEd block of original data starting at index 1.
1310          * After sorting the last byte added to the buffer is at index
1311          * 0. */
1312         final byte[] block; // 900021 byte
1313         /* maps index in Burrows-Wheeler transformed block => index of
1314          * byte in original block */
1315         final int[] fmap; // 3600000 byte
1316         final char[] sfmap; // 3600000 byte
1317         // ------------
1318         // 8433529 byte
1319         // ============
1320 
1321         /**
1322          * Index of original line in Burrows-Wheeler table.
1323          *
1324          * <p>This is the index in fmap that points to the last byte
1325          * of the original data.</p>
1326          */
1327         int origPtr;
1328 
1329         Data(int blockSize100k) {
1330             super();
1331 
1332             final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1333             this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1334             this.fmap = new int[n];
1335             this.sfmap = new char[2 * n];
1336         }
1337 
1338     }
1339 
1340 }