View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  import java.util.Arrays;
24  
25  import org.apache.commons.compress.compressors.CompressorOutputStream;
26  
27  /**
28   * An output stream that compresses into the BZip2 format into another stream.
29   *
30   * <p>
31   * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force
32   * {@code BZip2CompressorOutputStream} to release the allocated memory.
33   * </p>
34   *
35   * <p>
36   * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower
37   * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input.
38   * </p>
39   *
40   * <p>
41   * You can compute the memory usage for compressing by the following formula:
42   * </p>
43   *
44   * <pre>
45   * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
46   * </pre>
47   *
48   * <p>
49   * To get the memory required for decompression by {@link BZip2CompressorInputStream} use
50   * </p>
51   *
52   * <pre>
53   * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
54   * </pre>
55   *
56   * <table style="width:100%" border="1">
57   * <caption>Memory usage by blocksize</caption>
58   * <tr>
59   * <th colspan="3">Memory usage by blocksize</th>
60   * </tr>
61   * <tr>
62   * <th style="text-align: right">Blocksize</th>
63   * <th style="text-align: right">Compression<br>
64   * memory usage</th>
65   * <th style="text-align: right">Decompression<br>
66   * memory usage</th>
67   * </tr>
68   * <tr>
69   * <td style="text-align: right">100k</td>
70   * <td style="text-align: right">1300k</td>
71   * <td style="text-align: right">565k</td>
72   * </tr>
73   * <tr>
74   * <td style="text-align: right">200k</td>
75   * <td style="text-align: right">2200k</td>
76   * <td style="text-align: right">1065k</td>
77   * </tr>
78   * <tr>
79   * <td style="text-align: right">300k</td>
80   * <td style="text-align: right">3100k</td>
81   * <td style="text-align: right">1565k</td>
82   * </tr>
83   * <tr>
84   * <td style="text-align: right">400k</td>
85   * <td style="text-align: right">4000k</td>
86   * <td style="text-align: right">2065k</td>
87   * </tr>
88   * <tr>
89   * <td style="text-align: right">500k</td>
90   * <td style="text-align: right">4900k</td>
91   * <td style="text-align: right">2565k</td>
92   * </tr>
93   * <tr>
94   * <td style="text-align: right">600k</td>
95   * <td style="text-align: right">5800k</td>
96   * <td style="text-align: right">3065k</td>
97   * </tr>
98   * <tr>
99   * <td style="text-align: right">700k</td>
100  * <td style="text-align: right">6700k</td>
101  * <td style="text-align: right">3565k</td>
102  * </tr>
103  * <tr>
104  * <td style="text-align: right">800k</td>
105  * <td style="text-align: right">7600k</td>
106  * <td style="text-align: right">4065k</td>
107  * </tr>
108  * <tr>
109  * <td style="text-align: right">900k</td>
110  * <td style="text-align: right">8500k</td>
111  * <td style="text-align: right">4565k</td>
112  * </tr>
113  * </table>
114  *
115  * <p>
116  * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block.
117  * </p>
118  *
119  * <p>
120  * Instances of this class are not threadsafe.
121  * </p>
122  *
123  * <p>
124  * TODO: Update to BZip2 1.0.1
125  * </p>
126  *
127  * @NotThreadSafe
128  */
129 public class BZip2CompressorOutputStream extends CompressorOutputStream<OutputStream> implements BZip2Constants {
130 
131     static final class Data {
132 
133         // with blockSize 900k
134         /* maps unsigned byte => "does it occur in block" */
135         final boolean[] inUse = new boolean[256]; // 256 byte
136         final byte[] unseqToSeq = new byte[256]; // 256 byte
137         final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
138         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
139         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
140 
141         final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
142         final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
143         // byte
144         final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
145         // byte
146         final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
147         final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
148         final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
149         // byte
150         final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
151         final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
152 
153         final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
154         final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
155         final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
156 
157         // ------------
158         // 333408 byte
159 
160         /*
161          * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0.
162          */
163         final byte[] block; // 900021 byte
164         /*
165          * maps index in Burrows-Wheeler transformed block => index of byte in original block
166          */
167         final int[] fmap; // 3600000 byte
168         final char[] sfmap; // 3600000 byte
169         // ------------
170         // 8433529 byte
171         // ============
172 
173         /**
174          * Index of original line in Burrows-Wheeler table.
175          *
176          * <p>
177          * This is the index in fmap that points to the last byte of the original data.
178          * </p>
179          */
180         int origPtr;
181 
182         Data(final int blockSize100k) {
183             final int n = blockSize100k * BASEBLOCKSIZE;
184             this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES];
185             this.fmap = new int[n];
186             this.sfmap = new char[2 * n];
187         }
188 
189     }
190 
191     /**
192      * The minimum supported blocksize {@code  == 1}.
193      */
194     public static final int MIN_BLOCKSIZE = 1;
195 
196     /**
197      * The maximum supported blocksize {@code  == 9}.
198      */
199     public static final int MAX_BLOCKSIZE = 9;
200     private static final int GREATER_ICOST = 15;
201 
202     private static final int LESSER_ICOST = 0;
203 
204     /**
205      * Chooses a blocksize based on the given length of the data to compress.
206      *
207      * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns
208      *         {@code MAX_BLOCKSIZE} always.
209      *
210      * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}.
211      */
212     public static int chooseBlockSize(final long inputLength) {
213         return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE;
214     }
215 
216     private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) {
217         int vec = 0;
218         for (int n = minLen; n <= maxLen; n++) {
219             for (int i = 0; i < alphaSize; i++) {
220                 if ((length[i] & 0xff) == n) {
221                     code[i] = vec;
222                     vec++;
223                 }
224             }
225             vec <<= 1;
226         }
227     }
228 
229     private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) {
230         /*
231          * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel.
232          */
233         final int[] heap = dat.heap;
234         final int[] weight = dat.weight;
235         final int[] parent = dat.parent;
236 
237         for (int i = alphaSize; --i >= 0;) {
238             weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
239         }
240 
241         for (boolean tooLong = true; tooLong;) {
242             tooLong = false;
243 
244             int nNodes = alphaSize;
245             int nHeap = 0;
246             heap[0] = 0;
247             weight[0] = 0;
248             parent[0] = -2;
249 
250             for (int i = 1; i <= alphaSize; i++) {
251                 parent[i] = -1;
252                 nHeap++;
253                 heap[nHeap] = i;
254 
255                 int zz = nHeap;
256                 final int tmp = heap[zz];
257                 while (weight[tmp] < weight[heap[zz >> 1]]) {
258                     heap[zz] = heap[zz >> 1];
259                     zz >>= 1;
260                 }
261                 heap[zz] = tmp;
262             }
263 
264             while (nHeap > 1) {
265                 final int n1 = heap[1];
266                 heap[1] = heap[nHeap];
267                 nHeap--;
268 
269                 int yy = 0;
270                 int zz = 1;
271                 int tmp = heap[1];
272 
273                 while (true) {
274                     yy = zz << 1;
275 
276                     if (yy > nHeap) {
277                         break;
278                     }
279 
280                     if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
281                         yy++;
282                     }
283 
284                     if (weight[tmp] < weight[heap[yy]]) {
285                         break;
286                     }
287 
288                     heap[zz] = heap[yy];
289                     zz = yy;
290                 }
291 
292                 heap[zz] = tmp;
293 
294                 final int n2 = heap[1];
295                 heap[1] = heap[nHeap];
296                 nHeap--;
297 
298                 yy = 0;
299                 zz = 1;
300                 tmp = heap[1];
301 
302                 while (true) {
303                     yy = zz << 1;
304 
305                     if (yy > nHeap) {
306                         break;
307                     }
308 
309                     if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
310                         yy++;
311                     }
312 
313                     if (weight[tmp] < weight[heap[yy]]) {
314                         break;
315                     }
316 
317                     heap[zz] = heap[yy];
318                     zz = yy;
319                 }
320 
321                 heap[zz] = tmp;
322                 nNodes++;
323                 parent[n1] = parent[n2] = nNodes;
324 
325                 final int weight_n1 = weight[n1];
326                 final int weight_n2 = weight[n2];
327                 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff);
328 
329                 parent[nNodes] = -1;
330                 nHeap++;
331                 heap[nHeap] = nNodes;
332 
333                 tmp = 0;
334                 zz = nHeap;
335                 tmp = heap[zz];
336                 final int weight_tmp = weight[tmp];
337                 while (weight_tmp < weight[heap[zz >> 1]]) {
338                     heap[zz] = heap[zz >> 1];
339                     zz >>= 1;
340                 }
341                 heap[zz] = tmp;
342 
343             }
344 
345             for (int i = 1; i <= alphaSize; i++) {
346                 int j = 0;
347                 int k = i;
348 
349                 for (int parent_k; (parent_k = parent[k]) >= 0;) {
350                     k = parent_k;
351                     j++;
352                 }
353 
354                 len[i - 1] = (byte) j;
355                 if (j > maxLen) {
356                     tooLong = true;
357                 }
358             }
359 
360             if (tooLong) {
361                 for (int i = 1; i < alphaSize; i++) {
362                     int j = weight[i] >> 8;
363                     j = 1 + (j >> 1);
364                     weight[i] = j << 8;
365                 }
366             }
367         }
368     }
369 
370     /**
371      * Index of the last char in the block, so the block size == last + 1.
372      */
373     private int last;
374     /**
375      * Always: in the range 0 .. 9. The current block size is 100000 * this number.
376      */
377     private final int blockSize100k;
378 
379     private int bsBuff;
380 
381     private int bsLive;
382 
383     private final CRC crc = new CRC();
384     private int nInUse;
385 
386     private int nMTF;
387     private int currentChar = -1;
388     private int runLength;
389 
390     private int combinedCRC;
391 
392     private final int allowableBlockSize;
393     /**
394      * All memory intensive stuff.
395      */
396     private Data data;
397 
398     private BlockSort blockSorter;
399 
400     /**
401      * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
402      *
403      * @param out the destination stream.
404      * @throws IOException          if an I/O error occurs in the specified stream.
405      * @throws NullPointerException if {@code out == null}.
406      */
407     public BZip2CompressorOutputStream(final OutputStream out) throws IOException {
408         this(out, MAX_BLOCKSIZE);
409     }
410 
411     /**
412      * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
413      *
414      * @param out       the destination stream.
415      * @param blockSize the blockSize as 100k units.
416      * @throws IOException              if an I/O error occurs in the specified stream.
417      * @throws IllegalArgumentException if {@code (blockSize &lt; 1) || (blockSize &gt; 9)}.
418      * @throws NullPointerException     if {@code out == null}.
419      * @see #MIN_BLOCKSIZE
420      * @see #MAX_BLOCKSIZE
421      */
422     public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
423         super(out);
424         if (blockSize < 1) {
425             throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
426         }
427         if (blockSize > 9) {
428             throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
429         }
430         this.blockSize100k = blockSize;
431         /* 20 is just a paranoia constant */
432         this.allowableBlockSize = this.blockSize100k * BASEBLOCKSIZE - 20;
433         init();
434     }
435 
436     private void blockSort() {
437         blockSorter.blockSort(data, last);
438     }
439 
440     private void bsFinishedWithStream() throws IOException {
441         while (this.bsLive > 0) {
442             final int ch = this.bsBuff >> 24;
443             this.out.write(ch); // write 8-bit
444             this.bsBuff <<= 8;
445             this.bsLive -= 8;
446         }
447     }
448 
449     private void bsPutInt(final int u) throws IOException {
450         bsW(8, u >> 24 & 0xff);
451         bsW(8, u >> 16 & 0xff);
452         bsW(8, u >> 8 & 0xff);
453         bsW(8, u & 0xff);
454     }
455 
456     private void bsPutUByte(final int c) throws IOException {
457         bsW(8, c);
458     }
459 
460     private void bsW(final int n, final int v) throws IOException {
461         final OutputStream outShadow = this.out;
462         int bsLiveShadow = this.bsLive;
463         int bsBuffShadow = this.bsBuff;
464 
465         while (bsLiveShadow >= 8) {
466             outShadow.write(bsBuffShadow >> 24); // write 8-bit
467             bsBuffShadow <<= 8;
468             bsLiveShadow -= 8;
469         }
470 
471         this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n;
472         this.bsLive = bsLiveShadow + n;
473     }
474 
475     @Override
476     public void close() throws IOException {
477         if (!isClosed()) {
478             try {
479                 finish();
480             } finally {
481                 super.close();
482             }
483         }
484     }
485 
486     private void endBlock() throws IOException {
487         final int blockCRC = this.crc.getValue();
488         this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31;
489         this.combinedCRC ^= blockCRC;
490 
491         // empty block at end of file
492         if (this.last == -1) {
493             return;
494         }
495 
496         /* sort the block and establish posn of original string */
497         blockSort();
498 
499         /*
500          * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the
501          * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits,
502          * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal
503          * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files.
504          */
505         bsPutUByte(0x31);
506         bsPutUByte(0x41);
507         bsPutUByte(0x59);
508         bsPutUByte(0x26);
509         bsPutUByte(0x53);
510         bsPutUByte(0x59);
511 
512         /* Now the block's CRC, so it is in a known place. */
513         bsPutInt(blockCRC);
514 
515         /* Now a single bit indicating no randomization. */
516         bsW(1, 0);
517 
518         /* Finally, block's contents proper. */
519         moveToFrontCodeAndSend();
520     }
521 
522     private void endCompression() throws IOException {
523         /*
524          * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it
525          * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.)
526          */
527         bsPutUByte(0x17);
528         bsPutUByte(0x72);
529         bsPutUByte(0x45);
530         bsPutUByte(0x38);
531         bsPutUByte(0x50);
532         bsPutUByte(0x90);
533 
534         bsPutInt(this.combinedCRC);
535         bsFinishedWithStream();
536     }
537 
538     @Override
539     public void finish() throws IOException {
540         if (!isClosed()) {
541             try {
542                 if (this.runLength > 0) {
543                     writeRun();
544                 }
545                 this.currentChar = -1;
546                 endBlock();
547                 endCompression();
548             } finally {
549                 this.blockSorter = null;
550                 this.data = null;
551             }
552         }
553     }
554 
555     @Override
556     public void flush() throws IOException {
557         if (out != null) {
558             super.flush();
559         }
560     }
561 
562     /*
563      * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form.
564      *
565      * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
566      */
567     private void generateMTFValues() {
568         final int lastShadow = this.last;
569         final Data dataShadow = this.data;
570         final boolean[] inUse = dataShadow.inUse;
571         final byte[] block = dataShadow.block;
572         final int[] fmap = dataShadow.fmap;
573         final char[] sfmap = dataShadow.sfmap;
574         final int[] mtfFreq = dataShadow.mtfFreq;
575         final byte[] unseqToSeq = dataShadow.unseqToSeq;
576         final byte[] yy = dataShadow.generateMTFValues_yy;
577 
578         // make maps
579         int nInUseShadow = 0;
580         for (int i = 0; i < 256; i++) {
581             if (inUse[i]) {
582                 unseqToSeq[i] = (byte) nInUseShadow;
583                 nInUseShadow++;
584             }
585         }
586         this.nInUse = nInUseShadow;
587 
588         final int eob = nInUseShadow + 1;
589 
590         Arrays.fill(mtfFreq, 0, eob + 1, 0);
591 
592         for (int i = nInUseShadow; --i >= 0;) {
593             yy[i] = (byte) i;
594         }
595 
596         int wr = 0;
597         int zPend = 0;
598 
599         for (int i = 0; i <= lastShadow; i++) {
600             final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
601             byte tmp = yy[0];
602             int j = 0;
603 
604             while (ll_i != tmp) {
605                 j++;
606                 final byte tmp2 = tmp;
607                 tmp = yy[j];
608                 yy[j] = tmp2;
609             }
610             yy[0] = tmp;
611 
612             if (j == 0) {
613                 zPend++;
614             } else {
615                 if (zPend > 0) {
616                     zPend--;
617                     while (true) {
618                         if ((zPend & 1) == 0) {
619                             sfmap[wr] = RUNA;
620                             wr++;
621                             mtfFreq[RUNA]++;
622                         } else {
623                             sfmap[wr] = RUNB;
624                             wr++;
625                             mtfFreq[RUNB]++;
626                         }
627 
628                         if (zPend < 2) {
629                             break;
630                         }
631                         zPend = zPend - 2 >> 1;
632                     }
633                     zPend = 0;
634                 }
635                 sfmap[wr] = (char) (j + 1);
636                 wr++;
637                 mtfFreq[j + 1]++;
638             }
639         }
640 
641         if (zPend > 0) {
642             zPend--;
643             while (true) {
644                 if ((zPend & 1) == 0) {
645                     sfmap[wr] = RUNA;
646                     wr++;
647                     mtfFreq[RUNA]++;
648                 } else {
649                     sfmap[wr] = RUNB;
650                     wr++;
651                     mtfFreq[RUNB]++;
652                 }
653 
654                 if (zPend < 2) {
655                     break;
656                 }
657                 zPend = zPend - 2 >> 1;
658             }
659         }
660 
661         sfmap[wr] = (char) eob;
662         mtfFreq[eob]++;
663         this.nMTF = wr + 1;
664     }
665 
666     /**
667      * Returns the blocksize parameter specified at construction time.
668      *
669      * @return the blocksize parameter specified at construction time
670      */
671     public final int getBlockSize() {
672         return this.blockSize100k;
673     }
674 
675     /**
676      * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating
677      * blockSize100k.
678      *
679      * @throws IOException if the magic bytes could not been written
680      */
681     private void init() throws IOException {
682         bsPutUByte('B');
683         bsPutUByte('Z');
684 
685         this.data = new Data(this.blockSize100k);
686         this.blockSorter = new BlockSort(this.data);
687 
688         // huffmanized magic bytes
689         bsPutUByte('h');
690         bsPutUByte('0' + this.blockSize100k);
691 
692         this.combinedCRC = 0;
693         initBlock();
694     }
695 
696     private void initBlock() {
697         // blockNo++;
698         this.crc.reset();
699         this.last = -1;
700         // ch = 0;
701 
702         final boolean[] inUse = this.data.inUse;
703         for (int i = 256; --i >= 0;) {
704             inUse[i] = false;
705         }
706 
707     }
708 
709     private void moveToFrontCodeAndSend() throws IOException {
710         bsW(24, this.data.origPtr);
711         generateMTFValues();
712         sendMTFValues();
713     }
714 
715     private void sendMTFValues() throws IOException {
716         final byte[][] len = this.data.sendMTFValues_len;
717         final int alphaSize = this.nInUse + 2;
718 
719         for (int t = N_GROUPS; --t >= 0;) {
720             final byte[] len_t = len[t];
721             for (int v = alphaSize; --v >= 0;) {
722                 len_t[v] = GREATER_ICOST;
723             }
724         }
725 
726         /* Decide how many coding tables to use */
727         // assert (this.nMTF > 0) : this.nMTF;
728         final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6;
729 
730         /* Generate an initial set of coding tables */
731         sendMTFValues0(nGroups, alphaSize);
732 
733         /*
734          * Iterate up to N_ITERS times to improve the tables.
735          */
736         final int nSelectors = sendMTFValues1(nGroups, alphaSize);
737 
738         /* Compute MTF values for the selectors. */
739         sendMTFValues2(nGroups, nSelectors);
740 
741         /* Assign actual codes for the tables. */
742         sendMTFValues3(nGroups, alphaSize);
743 
744         /* Transmit the mapping table. */
745         sendMTFValues4();
746 
747         /* Now the selectors. */
748         sendMTFValues5(nGroups, nSelectors);
749 
750         /* Now the coding tables. */
751         sendMTFValues6(nGroups, alphaSize);
752 
753         /* And finally, the block data proper */
754         sendMTFValues7();
755     }
756 
757     private void sendMTFValues0(final int nGroups, final int alphaSize) {
758         final byte[][] len = this.data.sendMTFValues_len;
759         final int[] mtfFreq = this.data.mtfFreq;
760 
761         int remF = this.nMTF;
762         int gs = 0;
763 
764         for (int nPart = nGroups; nPart > 0; nPart--) {
765             final int tFreq = remF / nPart;
766             int ge = gs - 1;
767             int aFreq = 0;
768 
769             for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) {
770                 aFreq += mtfFreq[++ge];
771             }
772 
773             if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) {
774                 aFreq -= mtfFreq[ge--];
775             }
776 
777             final byte[] len_np = len[nPart - 1];
778             for (int v = alphaSize; --v >= 0;) {
779                 if (v >= gs && v <= ge) {
780                     len_np[v] = LESSER_ICOST;
781                 } else {
782                     len_np[v] = GREATER_ICOST;
783                 }
784             }
785 
786             gs = ge + 1;
787             remF -= aFreq;
788         }
789     }
790 
791     private int sendMTFValues1(final int nGroups, final int alphaSize) {
792         final Data dataShadow = this.data;
793         final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
794         final int[] fave = dataShadow.sendMTFValues_fave;
795         final short[] cost = dataShadow.sendMTFValues_cost;
796         final char[] sfmap = dataShadow.sfmap;
797         final byte[] selector = dataShadow.selector;
798         final byte[][] len = dataShadow.sendMTFValues_len;
799         final byte[] len_0 = len[0];
800         final byte[] len_1 = len[1];
801         final byte[] len_2 = len[2];
802         final byte[] len_3 = len[3];
803         final byte[] len_4 = len[4];
804         final byte[] len_5 = len[5];
805         final int nMTFShadow = this.nMTF;
806 
807         int nSelectors = 0;
808 
809         for (int iter = 0; iter < N_ITERS; iter++) {
810             for (int t = nGroups; --t >= 0;) {
811                 fave[t] = 0;
812                 final int[] rfreqt = rfreq[t];
813                 for (int i = alphaSize; --i >= 0;) {
814                     rfreqt[i] = 0;
815                 }
816             }
817 
818             nSelectors = 0;
819 
820             for (int gs = 0; gs < this.nMTF;) {
821                 // Set group start & end marks.
822 
823                 // Calculate the cost of this group as coded by each of the
824                 // coding tables.
825 
826                 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
827 
828                 final byte mask = (byte) 0xff;
829                 if (nGroups == N_GROUPS) {
830                     // unrolled version of the else-block
831 
832                     short cost0 = 0;
833                     short cost1 = 0;
834                     short cost2 = 0;
835                     short cost3 = 0;
836                     short cost4 = 0;
837                     short cost5 = 0;
838 
839                     for (int i = gs; i <= ge; i++) {
840                         final int icv = sfmap[i];
841                         cost0 += (short) (len_0[icv] & mask);
842                         cost1 += (short) (len_1[icv] & mask);
843                         cost2 += (short) (len_2[icv] & mask);
844                         cost3 += (short) (len_3[icv] & mask);
845                         cost4 += (short) (len_4[icv] & mask);
846                         cost5 += (short) (len_5[icv] & mask);
847                     }
848 
849                     cost[0] = cost0;
850                     cost[1] = cost1;
851                     cost[2] = cost2;
852                     cost[3] = cost3;
853                     cost[4] = cost4;
854                     cost[5] = cost5;
855 
856                 } else {
857                     for (int t = nGroups; --t >= 0;) {
858                         cost[t] = 0;
859                     }
860 
861                     for (int i = gs; i <= ge; i++) {
862                         final int icv = sfmap[i];
863                         for (int t = nGroups; --t >= 0;) {
864                             cost[t] += (short) (len[t][icv] & mask);
865                         }
866                     }
867                 }
868 
869                 /*
870                  * Find the coding table which is best for this group, and record its identity in the selector table.
871                  */
872                 int bt = -1;
873                 for (int t = nGroups, bc = 999999999; --t >= 0;) {
874                     final int cost_t = cost[t];
875                     if (cost_t < bc) {
876                         bc = cost_t;
877                         bt = t;
878                     }
879                 }
880 
881                 fave[bt]++;
882                 selector[nSelectors] = (byte) bt;
883                 nSelectors++;
884 
885                 /*
886                  * Increment the symbol frequencies for the selected table.
887                  */
888                 final int[] rfreq_bt = rfreq[bt];
889                 for (int i = gs; i <= ge; i++) {
890                     rfreq_bt[sfmap[i]]++;
891                 }
892 
893                 gs = ge + 1;
894             }
895 
896             /*
897              * Recompute the tables based on the accumulated frequencies.
898              */
899             for (int t = 0; t < nGroups; t++) {
900                 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
901             }
902         }
903 
904         return nSelectors;
905     }
906 
907     private void sendMTFValues2(final int nGroups, final int nSelectors) {
908         // assert (nGroups < 8) : nGroups;
909 
910         final Data dataShadow = this.data;
911         final byte[] pos = dataShadow.sendMTFValues2_pos;
912 
913         for (int i = nGroups; --i >= 0;) {
914             pos[i] = (byte) i;
915         }
916 
917         for (int i = 0; i < nSelectors; i++) {
918             final byte ll_i = dataShadow.selector[i];
919             byte tmp = pos[0];
920             int j = 0;
921 
922             while (ll_i != tmp) {
923                 j++;
924                 final byte tmp2 = tmp;
925                 tmp = pos[j];
926                 pos[j] = tmp2;
927             }
928 
929             pos[0] = tmp;
930             dataShadow.selectorMtf[i] = (byte) j;
931         }
932     }
933 
934     private void sendMTFValues3(final int nGroups, final int alphaSize) {
935         final int[][] code = this.data.sendMTFValues_code;
936         final byte[][] len = this.data.sendMTFValues_len;
937 
938         for (int t = 0; t < nGroups; t++) {
939             int minLen = 32;
940             int maxLen = 0;
941             final byte[] len_t = len[t];
942             for (int i = alphaSize; --i >= 0;) {
943                 final int l = len_t[i] & 0xff;
944                 if (l > maxLen) {
945                     maxLen = l;
946                 }
947                 if (l < minLen) {
948                     minLen = l;
949                 }
950             }
951 
952             // assert (maxLen <= 20) : maxLen;
953             // assert (minLen >= 1) : minLen;
954 
955             hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
956         }
957     }
958 
959     private void sendMTFValues4() throws IOException {
960         final boolean[] inUse = this.data.inUse;
961         final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
962 
963         for (int i = 16; --i >= 0;) {
964             inUse16[i] = false;
965             final int i16 = i * 16;
966             for (int j = 16; --j >= 0;) {
967                 if (inUse[i16 + j]) {
968                     inUse16[i] = true;
969                     break;
970                 }
971             }
972         }
973 
974         for (int i = 0; i < 16; i++) {
975             bsW(1, inUse16[i] ? 1 : 0);
976         }
977 
978         final OutputStream outShadow = this.out;
979         int bsLiveShadow = this.bsLive;
980         int bsBuffShadow = this.bsBuff;
981 
982         for (int i = 0; i < 16; i++) {
983             if (inUse16[i]) {
984                 final int i16 = i * 16;
985                 for (int j = 0; j < 16; j++) {
986                     // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
987                     while (bsLiveShadow >= 8) {
988                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
989                         bsBuffShadow <<= 8;
990                         bsLiveShadow -= 8;
991                     }
992                     if (inUse[i16 + j]) {
993                         bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
994                     }
995                     bsLiveShadow++;
996                 }
997             }
998         }
999 
1000         this.bsBuff = bsBuffShadow;
1001         this.bsLive = bsLiveShadow;
1002     }
1003 
1004     private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException {
1005         bsW(3, nGroups);
1006         bsW(15, nSelectors);
1007 
1008         final OutputStream outShadow = this.out;
1009         final byte[] selectorMtf = this.data.selectorMtf;
1010 
1011         int bsLiveShadow = this.bsLive;
1012         int bsBuffShadow = this.bsBuff;
1013 
1014         for (int i = 0; i < nSelectors; i++) {
1015             for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1016                 // inlined: bsW(1, 1);
1017                 while (bsLiveShadow >= 8) {
1018                     outShadow.write(bsBuffShadow >> 24);
1019                     bsBuffShadow <<= 8;
1020                     bsLiveShadow -= 8;
1021                 }
1022                 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1023                 bsLiveShadow++;
1024             }
1025 
1026             // inlined: bsW(1, 0);
1027             while (bsLiveShadow >= 8) {
1028                 outShadow.write(bsBuffShadow >> 24);
1029                 bsBuffShadow <<= 8;
1030                 bsLiveShadow -= 8;
1031             }
1032             // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1033             bsLiveShadow++;
1034         }
1035 
1036         this.bsBuff = bsBuffShadow;
1037         this.bsLive = bsLiveShadow;
1038     }
1039 
1040     private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException {
1041         final byte[][] len = this.data.sendMTFValues_len;
1042         final OutputStream outShadow = this.out;
1043 
1044         int bsLiveShadow = this.bsLive;
1045         int bsBuffShadow = this.bsBuff;
1046 
1047         for (int t = 0; t < nGroups; t++) {
1048             final byte[] len_t = len[t];
1049             int curr = len_t[0] & 0xff;
1050 
1051             // inlined: bsW(5, curr);
1052             while (bsLiveShadow >= 8) {
1053                 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1054                 bsBuffShadow <<= 8;
1055                 bsLiveShadow -= 8;
1056             }
1057             bsBuffShadow |= curr << 32 - bsLiveShadow - 5;
1058             bsLiveShadow += 5;
1059 
1060             for (int i = 0; i < alphaSize; i++) {
1061                 final int lti = len_t[i] & 0xff;
1062                 while (curr < lti) {
1063                     // inlined: bsW(2, 2);
1064                     while (bsLiveShadow >= 8) {
1065                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1066                         bsBuffShadow <<= 8;
1067                         bsLiveShadow -= 8;
1068                     }
1069                     bsBuffShadow |= 2 << 32 - bsLiveShadow - 2;
1070                     bsLiveShadow += 2;
1071 
1072                     curr++; /* 10 */
1073                 }
1074 
1075                 while (curr > lti) {
1076                     // inlined: bsW(2, 3);
1077                     while (bsLiveShadow >= 8) {
1078                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1079                         bsBuffShadow <<= 8;
1080                         bsLiveShadow -= 8;
1081                     }
1082                     bsBuffShadow |= 3 << 32 - bsLiveShadow - 2;
1083                     bsLiveShadow += 2;
1084 
1085                     curr--; /* 11 */
1086                 }
1087 
1088                 // inlined: bsW(1, 0);
1089                 while (bsLiveShadow >= 8) {
1090                     outShadow.write(bsBuffShadow >> 24); // write 8-bit
1091                     bsBuffShadow <<= 8;
1092                     bsLiveShadow -= 8;
1093                 }
1094                 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1095                 bsLiveShadow++;
1096             }
1097         }
1098 
1099         this.bsBuff = bsBuffShadow;
1100         this.bsLive = bsLiveShadow;
1101     }
1102 
1103     private void sendMTFValues7() throws IOException {
1104         final Data dataShadow = this.data;
1105         final byte[][] len = dataShadow.sendMTFValues_len;
1106         final int[][] code = dataShadow.sendMTFValues_code;
1107         final OutputStream outShadow = this.out;
1108         final byte[] selector = dataShadow.selector;
1109         final char[] sfmap = dataShadow.sfmap;
1110         final int nMTFShadow = this.nMTF;
1111 
1112         int selCtr = 0;
1113 
1114         int bsLiveShadow = this.bsLive;
1115         int bsBuffShadow = this.bsBuff;
1116 
1117         for (int gs = 0; gs < nMTFShadow;) {
1118             final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1119             final int selector_selCtr = selector[selCtr] & 0xff;
1120             final int[] code_selCtr = code[selector_selCtr];
1121             final byte[] len_selCtr = len[selector_selCtr];
1122 
1123             while (gs <= ge) {
1124                 final int sfmap_i = sfmap[gs];
1125 
1126                 //
1127                 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1128                 // code_selCtr[sfmap_i]);
1129                 //
1130                 while (bsLiveShadow >= 8) {
1131                     outShadow.write(bsBuffShadow >> 24);
1132                     bsBuffShadow <<= 8;
1133                     bsLiveShadow -= 8;
1134                 }
1135                 final int n = len_selCtr[sfmap_i] & 0xFF;
1136                 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n;
1137                 bsLiveShadow += n;
1138 
1139                 gs++;
1140             }
1141 
1142             gs = ge + 1;
1143             selCtr++;
1144         }
1145 
1146         this.bsBuff = bsBuffShadow;
1147         this.bsLive = bsLiveShadow;
1148     }
1149 
1150     @Override
1151     public void write(final byte[] buf, int offs, final int len) throws IOException {
1152         if (offs < 0) {
1153             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
1154         }
1155         if (len < 0) {
1156             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
1157         }
1158         if (offs + len > buf.length) {
1159             throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ").");
1160         }
1161         checkOpen();
1162         for (final int hi = offs + len; offs < hi;) {
1163             write0(buf[offs++]);
1164         }
1165     }
1166 
1167     @Override
1168     public void write(final int b) throws IOException {
1169         checkOpen();
1170         write0(b);
1171     }
1172 
1173     /**
1174      * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm.
1175      */
1176     private void write0(int b) throws IOException {
1177         if (this.currentChar != -1) {
1178             b &= 0xff;
1179             if (this.currentChar == b) {
1180                 if (++this.runLength > 254) {
1181                     writeRun();
1182                     this.currentChar = -1;
1183                     this.runLength = 0;
1184                 }
1185                 // else nothing to do
1186             } else {
1187                 writeRun();
1188                 this.runLength = 1;
1189                 this.currentChar = b;
1190             }
1191         } else {
1192             this.currentChar = b & 0xff;
1193             this.runLength++;
1194         }
1195     }
1196 
1197     /**
1198      * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four
1199      * identical bytes).
1200      *
1201      * <p>
1202      * Flushes the current block before writing data if it is full.
1203      * </p>
1204      *
1205      * <p>
1206      * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last
1207      * to point to the last index written minus 1.
1208      * </p>
1209      */
1210     private void writeRun() throws IOException {
1211         final int lastShadow = this.last;
1212 
1213         if (lastShadow < this.allowableBlockSize) {
1214             final int currentCharShadow = this.currentChar;
1215             final Data dataShadow = this.data;
1216             dataShadow.inUse[currentCharShadow] = true;
1217             final byte ch = (byte) currentCharShadow;
1218 
1219             int runLengthShadow = this.runLength;
1220             this.crc.update(currentCharShadow, runLengthShadow);
1221 
1222             switch (runLengthShadow) {
1223             case 1:
1224                 dataShadow.block[lastShadow + 2] = ch;
1225                 this.last = lastShadow + 1;
1226                 break;
1227 
1228             case 2:
1229                 dataShadow.block[lastShadow + 2] = ch;
1230                 dataShadow.block[lastShadow + 3] = ch;
1231                 this.last = lastShadow + 2;
1232                 break;
1233 
1234             case 3: {
1235                 final byte[] block = dataShadow.block;
1236                 block[lastShadow + 2] = ch;
1237                 block[lastShadow + 3] = ch;
1238                 block[lastShadow + 4] = ch;
1239                 this.last = lastShadow + 3;
1240             }
1241                 break;
1242 
1243             default: {
1244                 runLengthShadow -= 4;
1245                 dataShadow.inUse[runLengthShadow] = true;
1246                 final byte[] block = dataShadow.block;
1247                 block[lastShadow + 2] = ch;
1248                 block[lastShadow + 3] = ch;
1249                 block[lastShadow + 4] = ch;
1250                 block[lastShadow + 5] = ch;
1251                 block[lastShadow + 6] = (byte) runLengthShadow;
1252                 this.last = lastShadow + 5;
1253             }
1254                 break;
1255 
1256             }
1257         } else {
1258             endBlock();
1259             initBlock();
1260             writeRun();
1261         }
1262     }
1263 
1264 }