001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026 027/** 028 * An output stream that compresses into the BZip2 format into another stream. 029 * 030 * <p> 031 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force 032 * {@code BZip2CompressorOutputStream} to release the allocated memory. 033 * </p> 034 * 035 * <p> 036 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower 037 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input. 038 * </p> 039 * 040 * <p> 041 * You can compute the memory usage for compressing by the following formula: 042 * </p> 043 * 044 * <pre> 045 * <code>400k + (9 * blocksize)</code>. 046 * </pre> 047 * 048 * <p> 049 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use 050 * </p> 051 * 052 * <pre> 053 * <code>65k + (5 * blocksize)</code>. 054 * </pre> 055 * 056 * <table style="width:100%" border="1"> 057 * <caption>Memory usage by blocksize</caption> 058 * <tr> 059 * <th colspan="3">Memory usage by blocksize</th> 060 * </tr> 061 * <tr> 062 * <th style="text-align: right">Blocksize</th> 063 * <th style="text-align: right">Compression<br> 064 * memory usage</th> 065 * <th style="text-align: right">Decompression<br> 066 * memory usage</th> 067 * </tr> 068 * <tr> 069 * <td style="text-align: right">100k</td> 070 * <td style="text-align: right">1300k</td> 071 * <td style="text-align: right">565k</td> 072 * </tr> 073 * <tr> 074 * <td style="text-align: right">200k</td> 075 * <td style="text-align: right">2200k</td> 076 * <td style="text-align: right">1065k</td> 077 * </tr> 078 * <tr> 079 * <td style="text-align: right">300k</td> 080 * <td style="text-align: right">3100k</td> 081 * <td style="text-align: right">1565k</td> 082 * </tr> 083 * <tr> 084 * <td style="text-align: right">400k</td> 085 * <td style="text-align: right">4000k</td> 086 * <td style="text-align: right">2065k</td> 087 * </tr> 088 * <tr> 089 * <td style="text-align: right">500k</td> 090 * <td style="text-align: right">4900k</td> 091 * <td style="text-align: right">2565k</td> 092 * </tr> 093 * <tr> 094 * <td style="text-align: right">600k</td> 095 * <td style="text-align: right">5800k</td> 096 * <td style="text-align: right">3065k</td> 097 * </tr> 098 * <tr> 099 * <td style="text-align: right">700k</td> 100 * <td style="text-align: right">6700k</td> 101 * <td style="text-align: right">3565k</td> 102 * </tr> 103 * <tr> 104 * <td style="text-align: right">800k</td> 105 * <td style="text-align: right">7600k</td> 106 * <td style="text-align: right">4065k</td> 107 * </tr> 108 * <tr> 109 * <td style="text-align: right">900k</td> 110 * <td style="text-align: right">8500k</td> 111 * <td style="text-align: right">4565k</td> 112 * </tr> 113 * </table> 114 * 115 * <p> 116 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block. 117 * </p> 118 * 119 * <p> 120 * Instances of this class are not threadsafe. 121 * </p> 122 * 123 * <p> 124 * TODO: Update to BZip2 1.0.1 125 * </p> 126 * 127 * @NotThreadSafe 128 */ 129public class BZip2CompressorOutputStream extends CompressorOutputStream<OutputStream> implements BZip2Constants { 130 131 static final class Data { 132 133 // with blockSize 900k 134 /* maps unsigned byte => "does it occur in block" */ 135 final boolean[] inUse = new boolean[256]; // 256 byte 136 final byte[] unseqToSeq = new byte[256]; // 256 byte 137 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 138 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 139 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 140 141 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 142 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 143 // byte 144 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 145 // byte 146 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 147 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 148 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 149 // byte 150 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 151 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 152 153 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 154 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 155 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 156 157 // ------------ 158 // 333408 byte 159 160 /* 161 * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0. 162 */ 163 final byte[] block; // 900021 byte 164 /* 165 * maps index in Burrows-Wheeler transformed block => index of byte in original block 166 */ 167 final int[] fmap; // 3600000 byte 168 final char[] sfmap; // 3600000 byte 169 // ------------ 170 // 8433529 byte 171 // ============ 172 173 /** 174 * Index of original line in Burrows-Wheeler table. 175 * 176 * <p> 177 * This is the index in fmap that points to the last byte of the original data. 178 * </p> 179 */ 180 int origPtr; 181 182 Data(final int blockSize100k) { 183 final int n = blockSize100k * BASEBLOCKSIZE; 184 this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES]; 185 this.fmap = new int[n]; 186 this.sfmap = new char[2 * n]; 187 } 188 189 } 190 191 /** 192 * The minimum supported blocksize {@code == 1}. 193 */ 194 public static final int MIN_BLOCKSIZE = 1; 195 196 /** 197 * The maximum supported blocksize {@code == 9}. 198 */ 199 public static final int MAX_BLOCKSIZE = 9; 200 private static final int GREATER_ICOST = 15; 201 202 private static final int LESSER_ICOST = 0; 203 204 /** 205 * Chooses a blocksize based on the given length of the data to compress. 206 * 207 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns 208 * {@code MAX_BLOCKSIZE} always. 209 * 210 * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}. 211 */ 212 public static int chooseBlockSize(final long inputLength) { 213 return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE; 214 } 215 216 private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { 217 int vec = 0; 218 for (int n = minLen; n <= maxLen; n++) { 219 for (int i = 0; i < alphaSize; i++) { 220 if ((length[i] & 0xff) == n) { 221 code[i] = vec; 222 vec++; 223 } 224 } 225 vec <<= 1; 226 } 227 } 228 229 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { 230 /* 231 * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. 232 */ 233 final int[] heap = dat.heap; 234 final int[] weight = dat.weight; 235 final int[] parent = dat.parent; 236 237 for (int i = alphaSize; --i >= 0;) { 238 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 239 } 240 241 for (boolean tooLong = true; tooLong;) { 242 tooLong = false; 243 244 int nNodes = alphaSize; 245 int nHeap = 0; 246 heap[0] = 0; 247 weight[0] = 0; 248 parent[0] = -2; 249 250 for (int i = 1; i <= alphaSize; i++) { 251 parent[i] = -1; 252 nHeap++; 253 heap[nHeap] = i; 254 255 int zz = nHeap; 256 final int tmp = heap[zz]; 257 while (weight[tmp] < weight[heap[zz >> 1]]) { 258 heap[zz] = heap[zz >> 1]; 259 zz >>= 1; 260 } 261 heap[zz] = tmp; 262 } 263 264 while (nHeap > 1) { 265 final int n1 = heap[1]; 266 heap[1] = heap[nHeap]; 267 nHeap--; 268 269 int yy = 0; 270 int zz = 1; 271 int tmp = heap[1]; 272 273 while (true) { 274 yy = zz << 1; 275 276 if (yy > nHeap) { 277 break; 278 } 279 280 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 281 yy++; 282 } 283 284 if (weight[tmp] < weight[heap[yy]]) { 285 break; 286 } 287 288 heap[zz] = heap[yy]; 289 zz = yy; 290 } 291 292 heap[zz] = tmp; 293 294 final int n2 = heap[1]; 295 heap[1] = heap[nHeap]; 296 nHeap--; 297 298 yy = 0; 299 zz = 1; 300 tmp = heap[1]; 301 302 while (true) { 303 yy = zz << 1; 304 305 if (yy > nHeap) { 306 break; 307 } 308 309 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 310 yy++; 311 } 312 313 if (weight[tmp] < weight[heap[yy]]) { 314 break; 315 } 316 317 heap[zz] = heap[yy]; 318 zz = yy; 319 } 320 321 heap[zz] = tmp; 322 nNodes++; 323 parent[n1] = parent[n2] = nNodes; 324 325 final int weight_n1 = weight[n1]; 326 final int weight_n2 = weight[n2]; 327 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff); 328 329 parent[nNodes] = -1; 330 nHeap++; 331 heap[nHeap] = nNodes; 332 333 tmp = 0; 334 zz = nHeap; 335 tmp = heap[zz]; 336 final int weight_tmp = weight[tmp]; 337 while (weight_tmp < weight[heap[zz >> 1]]) { 338 heap[zz] = heap[zz >> 1]; 339 zz >>= 1; 340 } 341 heap[zz] = tmp; 342 343 } 344 345 for (int i = 1; i <= alphaSize; i++) { 346 int j = 0; 347 int k = i; 348 349 for (int parent_k; (parent_k = parent[k]) >= 0;) { 350 k = parent_k; 351 j++; 352 } 353 354 len[i - 1] = (byte) j; 355 if (j > maxLen) { 356 tooLong = true; 357 } 358 } 359 360 if (tooLong) { 361 for (int i = 1; i < alphaSize; i++) { 362 int j = weight[i] >> 8; 363 j = 1 + (j >> 1); 364 weight[i] = j << 8; 365 } 366 } 367 } 368 } 369 370 /** 371 * Index of the last char in the block, so the block size == last + 1. 372 */ 373 private int last; 374 /** 375 * Always: in the range 0 .. 9. The current block size is 100000 * this number. 376 */ 377 private final int blockSize100k; 378 379 private int bsBuff; 380 381 private int bsLive; 382 383 private final CRC crc = new CRC(); 384 private int nInUse; 385 386 private int nMTF; 387 private int currentChar = -1; 388 private int runLength; 389 390 private int combinedCRC; 391 392 private final int allowableBlockSize; 393 /** 394 * All memory intensive stuff. 395 */ 396 private Data data; 397 398 private BlockSort blockSorter; 399 400 /** 401 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 402 * 403 * @param out the destination stream. 404 * @throws IOException if an I/O error occurs in the specified stream. 405 * @throws NullPointerException if {@code out == null}. 406 */ 407 public BZip2CompressorOutputStream(final OutputStream out) throws IOException { 408 this(out, MAX_BLOCKSIZE); 409 } 410 411 /** 412 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 413 * 414 * @param out the destination stream. 415 * @param blockSize the blockSize as 100k units. 416 * @throws IOException if an I/O error occurs in the specified stream. 417 * @throws IllegalArgumentException if {@code (blockSize < 1) || (blockSize > 9)}. 418 * @throws NullPointerException if {@code out == null}. 419 * @see #MIN_BLOCKSIZE 420 * @see #MAX_BLOCKSIZE 421 */ 422 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 423 super(out); 424 if (blockSize < 1) { 425 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 426 } 427 if (blockSize > 9) { 428 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 429 } 430 this.blockSize100k = blockSize; 431 /* 20 is just a paranoia constant */ 432 this.allowableBlockSize = this.blockSize100k * BASEBLOCKSIZE - 20; 433 init(); 434 } 435 436 private void blockSort() { 437 blockSorter.blockSort(data, last); 438 } 439 440 private void bsFinishedWithStream() throws IOException { 441 while (this.bsLive > 0) { 442 final int ch = this.bsBuff >> 24; 443 this.out.write(ch); // write 8-bit 444 this.bsBuff <<= 8; 445 this.bsLive -= 8; 446 } 447 } 448 449 private void bsPutInt(final int u) throws IOException { 450 bsW(8, u >> 24 & 0xff); 451 bsW(8, u >> 16 & 0xff); 452 bsW(8, u >> 8 & 0xff); 453 bsW(8, u & 0xff); 454 } 455 456 private void bsPutUByte(final int c) throws IOException { 457 bsW(8, c); 458 } 459 460 private void bsW(final int n, final int v) throws IOException { 461 final OutputStream outShadow = this.out; 462 int bsLiveShadow = this.bsLive; 463 int bsBuffShadow = this.bsBuff; 464 465 while (bsLiveShadow >= 8) { 466 outShadow.write(bsBuffShadow >> 24); // write 8-bit 467 bsBuffShadow <<= 8; 468 bsLiveShadow -= 8; 469 } 470 471 this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n; 472 this.bsLive = bsLiveShadow + n; 473 } 474 475 @Override 476 public void close() throws IOException { 477 if (!isClosed()) { 478 try { 479 finish(); 480 } finally { 481 super.close(); 482 } 483 } 484 } 485 486 private void endBlock() throws IOException { 487 final int blockCRC = this.crc.getValue(); 488 this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31; 489 this.combinedCRC ^= blockCRC; 490 491 // empty block at end of file 492 if (this.last == -1) { 493 return; 494 } 495 496 /* sort the block and establish posn of original string */ 497 blockSort(); 498 499 /* 500 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the 501 * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits, 502 * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal 503 * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files. 504 */ 505 bsPutUByte(0x31); 506 bsPutUByte(0x41); 507 bsPutUByte(0x59); 508 bsPutUByte(0x26); 509 bsPutUByte(0x53); 510 bsPutUByte(0x59); 511 512 /* Now the block's CRC, so it is in a known place. */ 513 bsPutInt(blockCRC); 514 515 /* Now a single bit indicating no randomization. */ 516 bsW(1, 0); 517 518 /* Finally, block's contents proper. */ 519 moveToFrontCodeAndSend(); 520 } 521 522 private void endCompression() throws IOException { 523 /* 524 * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it 525 * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.) 526 */ 527 bsPutUByte(0x17); 528 bsPutUByte(0x72); 529 bsPutUByte(0x45); 530 bsPutUByte(0x38); 531 bsPutUByte(0x50); 532 bsPutUByte(0x90); 533 534 bsPutInt(this.combinedCRC); 535 bsFinishedWithStream(); 536 } 537 538 @Override 539 public void finish() throws IOException { 540 if (!isClosed()) { 541 try { 542 if (this.runLength > 0) { 543 writeRun(); 544 } 545 this.currentChar = -1; 546 endBlock(); 547 endCompression(); 548 } finally { 549 this.blockSorter = null; 550 this.data = null; 551 } 552 } 553 } 554 555 @Override 556 public void flush() throws IOException { 557 if (out != null) { 558 super.flush(); 559 } 560 } 561 562 /* 563 * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form. 564 * 565 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 566 */ 567 private void generateMTFValues() { 568 final int lastShadow = this.last; 569 final Data dataShadow = this.data; 570 final boolean[] inUse = dataShadow.inUse; 571 final byte[] block = dataShadow.block; 572 final int[] fmap = dataShadow.fmap; 573 final char[] sfmap = dataShadow.sfmap; 574 final int[] mtfFreq = dataShadow.mtfFreq; 575 final byte[] unseqToSeq = dataShadow.unseqToSeq; 576 final byte[] yy = dataShadow.generateMTFValues_yy; 577 578 // make maps 579 int nInUseShadow = 0; 580 for (int i = 0; i < 256; i++) { 581 if (inUse[i]) { 582 unseqToSeq[i] = (byte) nInUseShadow; 583 nInUseShadow++; 584 } 585 } 586 this.nInUse = nInUseShadow; 587 588 final int eob = nInUseShadow + 1; 589 590 Arrays.fill(mtfFreq, 0, eob + 1, 0); 591 592 for (int i = nInUseShadow; --i >= 0;) { 593 yy[i] = (byte) i; 594 } 595 596 int wr = 0; 597 int zPend = 0; 598 599 for (int i = 0; i <= lastShadow; i++) { 600 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 601 byte tmp = yy[0]; 602 int j = 0; 603 604 while (ll_i != tmp) { 605 j++; 606 final byte tmp2 = tmp; 607 tmp = yy[j]; 608 yy[j] = tmp2; 609 } 610 yy[0] = tmp; 611 612 if (j == 0) { 613 zPend++; 614 } else { 615 if (zPend > 0) { 616 zPend--; 617 while (true) { 618 if ((zPend & 1) == 0) { 619 sfmap[wr] = RUNA; 620 wr++; 621 mtfFreq[RUNA]++; 622 } else { 623 sfmap[wr] = RUNB; 624 wr++; 625 mtfFreq[RUNB]++; 626 } 627 628 if (zPend < 2) { 629 break; 630 } 631 zPend = zPend - 2 >> 1; 632 } 633 zPend = 0; 634 } 635 sfmap[wr] = (char) (j + 1); 636 wr++; 637 mtfFreq[j + 1]++; 638 } 639 } 640 641 if (zPend > 0) { 642 zPend--; 643 while (true) { 644 if ((zPend & 1) == 0) { 645 sfmap[wr] = RUNA; 646 wr++; 647 mtfFreq[RUNA]++; 648 } else { 649 sfmap[wr] = RUNB; 650 wr++; 651 mtfFreq[RUNB]++; 652 } 653 654 if (zPend < 2) { 655 break; 656 } 657 zPend = zPend - 2 >> 1; 658 } 659 } 660 661 sfmap[wr] = (char) eob; 662 mtfFreq[eob]++; 663 this.nMTF = wr + 1; 664 } 665 666 /** 667 * Returns the blocksize parameter specified at construction time. 668 * 669 * @return the blocksize parameter specified at construction time 670 */ 671 public final int getBlockSize() { 672 return this.blockSize100k; 673 } 674 675 /** 676 * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating 677 * blockSize100k. 678 * 679 * @throws IOException if the magic bytes could not been written 680 */ 681 private void init() throws IOException { 682 bsPutUByte('B'); 683 bsPutUByte('Z'); 684 685 this.data = new Data(this.blockSize100k); 686 this.blockSorter = new BlockSort(this.data); 687 688 // huffmanized magic bytes 689 bsPutUByte('h'); 690 bsPutUByte('0' + this.blockSize100k); 691 692 this.combinedCRC = 0; 693 initBlock(); 694 } 695 696 private void initBlock() { 697 // blockNo++; 698 this.crc.reset(); 699 this.last = -1; 700 // ch = 0; 701 702 final boolean[] inUse = this.data.inUse; 703 for (int i = 256; --i >= 0;) { 704 inUse[i] = false; 705 } 706 707 } 708 709 private void moveToFrontCodeAndSend() throws IOException { 710 bsW(24, this.data.origPtr); 711 generateMTFValues(); 712 sendMTFValues(); 713 } 714 715 private void sendMTFValues() throws IOException { 716 final byte[][] len = this.data.sendMTFValues_len; 717 final int alphaSize = this.nInUse + 2; 718 719 for (int t = N_GROUPS; --t >= 0;) { 720 final byte[] len_t = len[t]; 721 for (int v = alphaSize; --v >= 0;) { 722 len_t[v] = GREATER_ICOST; 723 } 724 } 725 726 /* Decide how many coding tables to use */ 727 // assert (this.nMTF > 0) : this.nMTF; 728 final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6; 729 730 /* Generate an initial set of coding tables */ 731 sendMTFValues0(nGroups, alphaSize); 732 733 /* 734 * Iterate up to N_ITERS times to improve the tables. 735 */ 736 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 737 738 /* Compute MTF values for the selectors. */ 739 sendMTFValues2(nGroups, nSelectors); 740 741 /* Assign actual codes for the tables. */ 742 sendMTFValues3(nGroups, alphaSize); 743 744 /* Transmit the mapping table. */ 745 sendMTFValues4(); 746 747 /* Now the selectors. */ 748 sendMTFValues5(nGroups, nSelectors); 749 750 /* Now the coding tables. */ 751 sendMTFValues6(nGroups, alphaSize); 752 753 /* And finally, the block data proper */ 754 sendMTFValues7(); 755 } 756 757 private void sendMTFValues0(final int nGroups, final int alphaSize) { 758 final byte[][] len = this.data.sendMTFValues_len; 759 final int[] mtfFreq = this.data.mtfFreq; 760 761 int remF = this.nMTF; 762 int gs = 0; 763 764 for (int nPart = nGroups; nPart > 0; nPart--) { 765 final int tFreq = remF / nPart; 766 int ge = gs - 1; 767 int aFreq = 0; 768 769 for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) { 770 aFreq += mtfFreq[++ge]; 771 } 772 773 if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) { 774 aFreq -= mtfFreq[ge--]; 775 } 776 777 final byte[] len_np = len[nPart - 1]; 778 for (int v = alphaSize; --v >= 0;) { 779 if (v >= gs && v <= ge) { 780 len_np[v] = LESSER_ICOST; 781 } else { 782 len_np[v] = GREATER_ICOST; 783 } 784 } 785 786 gs = ge + 1; 787 remF -= aFreq; 788 } 789 } 790 791 private int sendMTFValues1(final int nGroups, final int alphaSize) { 792 final Data dataShadow = this.data; 793 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 794 final int[] fave = dataShadow.sendMTFValues_fave; 795 final short[] cost = dataShadow.sendMTFValues_cost; 796 final char[] sfmap = dataShadow.sfmap; 797 final byte[] selector = dataShadow.selector; 798 final byte[][] len = dataShadow.sendMTFValues_len; 799 final byte[] len_0 = len[0]; 800 final byte[] len_1 = len[1]; 801 final byte[] len_2 = len[2]; 802 final byte[] len_3 = len[3]; 803 final byte[] len_4 = len[4]; 804 final byte[] len_5 = len[5]; 805 final int nMTFShadow = this.nMTF; 806 807 int nSelectors = 0; 808 809 for (int iter = 0; iter < N_ITERS; iter++) { 810 for (int t = nGroups; --t >= 0;) { 811 fave[t] = 0; 812 final int[] rfreqt = rfreq[t]; 813 for (int i = alphaSize; --i >= 0;) { 814 rfreqt[i] = 0; 815 } 816 } 817 818 nSelectors = 0; 819 820 for (int gs = 0; gs < this.nMTF;) { 821 // Set group start & end marks. 822 823 // Calculate the cost of this group as coded by each of the 824 // coding tables. 825 826 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 827 828 final byte mask = (byte) 0xff; 829 if (nGroups == N_GROUPS) { 830 // unrolled version of the else-block 831 832 short cost0 = 0; 833 short cost1 = 0; 834 short cost2 = 0; 835 short cost3 = 0; 836 short cost4 = 0; 837 short cost5 = 0; 838 839 for (int i = gs; i <= ge; i++) { 840 final int icv = sfmap[i]; 841 cost0 += (short) (len_0[icv] & mask); 842 cost1 += (short) (len_1[icv] & mask); 843 cost2 += (short) (len_2[icv] & mask); 844 cost3 += (short) (len_3[icv] & mask); 845 cost4 += (short) (len_4[icv] & mask); 846 cost5 += (short) (len_5[icv] & mask); 847 } 848 849 cost[0] = cost0; 850 cost[1] = cost1; 851 cost[2] = cost2; 852 cost[3] = cost3; 853 cost[4] = cost4; 854 cost[5] = cost5; 855 856 } else { 857 for (int t = nGroups; --t >= 0;) { 858 cost[t] = 0; 859 } 860 861 for (int i = gs; i <= ge; i++) { 862 final int icv = sfmap[i]; 863 for (int t = nGroups; --t >= 0;) { 864 cost[t] += (short) (len[t][icv] & mask); 865 } 866 } 867 } 868 869 /* 870 * Find the coding table which is best for this group, and record its identity in the selector table. 871 */ 872 int bt = -1; 873 for (int t = nGroups, bc = 999999999; --t >= 0;) { 874 final int cost_t = cost[t]; 875 if (cost_t < bc) { 876 bc = cost_t; 877 bt = t; 878 } 879 } 880 881 fave[bt]++; 882 selector[nSelectors] = (byte) bt; 883 nSelectors++; 884 885 /* 886 * Increment the symbol frequencies for the selected table. 887 */ 888 final int[] rfreq_bt = rfreq[bt]; 889 for (int i = gs; i <= ge; i++) { 890 rfreq_bt[sfmap[i]]++; 891 } 892 893 gs = ge + 1; 894 } 895 896 /* 897 * Recompute the tables based on the accumulated frequencies. 898 */ 899 for (int t = 0; t < nGroups; t++) { 900 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 901 } 902 } 903 904 return nSelectors; 905 } 906 907 private void sendMTFValues2(final int nGroups, final int nSelectors) { 908 // assert (nGroups < 8) : nGroups; 909 910 final Data dataShadow = this.data; 911 final byte[] pos = dataShadow.sendMTFValues2_pos; 912 913 for (int i = nGroups; --i >= 0;) { 914 pos[i] = (byte) i; 915 } 916 917 for (int i = 0; i < nSelectors; i++) { 918 final byte ll_i = dataShadow.selector[i]; 919 byte tmp = pos[0]; 920 int j = 0; 921 922 while (ll_i != tmp) { 923 j++; 924 final byte tmp2 = tmp; 925 tmp = pos[j]; 926 pos[j] = tmp2; 927 } 928 929 pos[0] = tmp; 930 dataShadow.selectorMtf[i] = (byte) j; 931 } 932 } 933 934 private void sendMTFValues3(final int nGroups, final int alphaSize) { 935 final int[][] code = this.data.sendMTFValues_code; 936 final byte[][] len = this.data.sendMTFValues_len; 937 938 for (int t = 0; t < nGroups; t++) { 939 int minLen = 32; 940 int maxLen = 0; 941 final byte[] len_t = len[t]; 942 for (int i = alphaSize; --i >= 0;) { 943 final int l = len_t[i] & 0xff; 944 if (l > maxLen) { 945 maxLen = l; 946 } 947 if (l < minLen) { 948 minLen = l; 949 } 950 } 951 952 // assert (maxLen <= 20) : maxLen; 953 // assert (minLen >= 1) : minLen; 954 955 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 956 } 957 } 958 959 private void sendMTFValues4() throws IOException { 960 final boolean[] inUse = this.data.inUse; 961 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 962 963 for (int i = 16; --i >= 0;) { 964 inUse16[i] = false; 965 final int i16 = i * 16; 966 for (int j = 16; --j >= 0;) { 967 if (inUse[i16 + j]) { 968 inUse16[i] = true; 969 break; 970 } 971 } 972 } 973 974 for (int i = 0; i < 16; i++) { 975 bsW(1, inUse16[i] ? 1 : 0); 976 } 977 978 final OutputStream outShadow = this.out; 979 int bsLiveShadow = this.bsLive; 980 int bsBuffShadow = this.bsBuff; 981 982 for (int i = 0; i < 16; i++) { 983 if (inUse16[i]) { 984 final int i16 = i * 16; 985 for (int j = 0; j < 16; j++) { 986 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 987 while (bsLiveShadow >= 8) { 988 outShadow.write(bsBuffShadow >> 24); // write 8-bit 989 bsBuffShadow <<= 8; 990 bsLiveShadow -= 8; 991 } 992 if (inUse[i16 + j]) { 993 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 994 } 995 bsLiveShadow++; 996 } 997 } 998 } 999 1000 this.bsBuff = bsBuffShadow; 1001 this.bsLive = bsLiveShadow; 1002 } 1003 1004 private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { 1005 bsW(3, nGroups); 1006 bsW(15, nSelectors); 1007 1008 final OutputStream outShadow = this.out; 1009 final byte[] selectorMtf = this.data.selectorMtf; 1010 1011 int bsLiveShadow = this.bsLive; 1012 int bsBuffShadow = this.bsBuff; 1013 1014 for (int i = 0; i < nSelectors; i++) { 1015 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1016 // inlined: bsW(1, 1); 1017 while (bsLiveShadow >= 8) { 1018 outShadow.write(bsBuffShadow >> 24); 1019 bsBuffShadow <<= 8; 1020 bsLiveShadow -= 8; 1021 } 1022 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1023 bsLiveShadow++; 1024 } 1025 1026 // inlined: bsW(1, 0); 1027 while (bsLiveShadow >= 8) { 1028 outShadow.write(bsBuffShadow >> 24); 1029 bsBuffShadow <<= 8; 1030 bsLiveShadow -= 8; 1031 } 1032 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1033 bsLiveShadow++; 1034 } 1035 1036 this.bsBuff = bsBuffShadow; 1037 this.bsLive = bsLiveShadow; 1038 } 1039 1040 private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { 1041 final byte[][] len = this.data.sendMTFValues_len; 1042 final OutputStream outShadow = this.out; 1043 1044 int bsLiveShadow = this.bsLive; 1045 int bsBuffShadow = this.bsBuff; 1046 1047 for (int t = 0; t < nGroups; t++) { 1048 final byte[] len_t = len[t]; 1049 int curr = len_t[0] & 0xff; 1050 1051 // inlined: bsW(5, curr); 1052 while (bsLiveShadow >= 8) { 1053 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1054 bsBuffShadow <<= 8; 1055 bsLiveShadow -= 8; 1056 } 1057 bsBuffShadow |= curr << 32 - bsLiveShadow - 5; 1058 bsLiveShadow += 5; 1059 1060 for (int i = 0; i < alphaSize; i++) { 1061 final int lti = len_t[i] & 0xff; 1062 while (curr < lti) { 1063 // inlined: bsW(2, 2); 1064 while (bsLiveShadow >= 8) { 1065 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1066 bsBuffShadow <<= 8; 1067 bsLiveShadow -= 8; 1068 } 1069 bsBuffShadow |= 2 << 32 - bsLiveShadow - 2; 1070 bsLiveShadow += 2; 1071 1072 curr++; /* 10 */ 1073 } 1074 1075 while (curr > lti) { 1076 // inlined: bsW(2, 3); 1077 while (bsLiveShadow >= 8) { 1078 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1079 bsBuffShadow <<= 8; 1080 bsLiveShadow -= 8; 1081 } 1082 bsBuffShadow |= 3 << 32 - bsLiveShadow - 2; 1083 bsLiveShadow += 2; 1084 1085 curr--; /* 11 */ 1086 } 1087 1088 // inlined: bsW(1, 0); 1089 while (bsLiveShadow >= 8) { 1090 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1091 bsBuffShadow <<= 8; 1092 bsLiveShadow -= 8; 1093 } 1094 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1095 bsLiveShadow++; 1096 } 1097 } 1098 1099 this.bsBuff = bsBuffShadow; 1100 this.bsLive = bsLiveShadow; 1101 } 1102 1103 private void sendMTFValues7() throws IOException { 1104 final Data dataShadow = this.data; 1105 final byte[][] len = dataShadow.sendMTFValues_len; 1106 final int[][] code = dataShadow.sendMTFValues_code; 1107 final OutputStream outShadow = this.out; 1108 final byte[] selector = dataShadow.selector; 1109 final char[] sfmap = dataShadow.sfmap; 1110 final int nMTFShadow = this.nMTF; 1111 1112 int selCtr = 0; 1113 1114 int bsLiveShadow = this.bsLive; 1115 int bsBuffShadow = this.bsBuff; 1116 1117 for (int gs = 0; gs < nMTFShadow;) { 1118 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1119 final int selector_selCtr = selector[selCtr] & 0xff; 1120 final int[] code_selCtr = code[selector_selCtr]; 1121 final byte[] len_selCtr = len[selector_selCtr]; 1122 1123 while (gs <= ge) { 1124 final int sfmap_i = sfmap[gs]; 1125 1126 // 1127 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1128 // code_selCtr[sfmap_i]); 1129 // 1130 while (bsLiveShadow >= 8) { 1131 outShadow.write(bsBuffShadow >> 24); 1132 bsBuffShadow <<= 8; 1133 bsLiveShadow -= 8; 1134 } 1135 final int n = len_selCtr[sfmap_i] & 0xFF; 1136 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n; 1137 bsLiveShadow += n; 1138 1139 gs++; 1140 } 1141 1142 gs = ge + 1; 1143 selCtr++; 1144 } 1145 1146 this.bsBuff = bsBuffShadow; 1147 this.bsLive = bsLiveShadow; 1148 } 1149 1150 @Override 1151 public void write(final byte[] buf, int offs, final int len) throws IOException { 1152 if (offs < 0) { 1153 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 1154 } 1155 if (len < 0) { 1156 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 1157 } 1158 if (offs + len > buf.length) { 1159 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); 1160 } 1161 checkOpen(); 1162 for (final int hi = offs + len; offs < hi;) { 1163 write0(buf[offs++]); 1164 } 1165 } 1166 1167 @Override 1168 public void write(final int b) throws IOException { 1169 checkOpen(); 1170 write0(b); 1171 } 1172 1173 /** 1174 * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm. 1175 */ 1176 private void write0(int b) throws IOException { 1177 if (this.currentChar != -1) { 1178 b &= 0xff; 1179 if (this.currentChar == b) { 1180 if (++this.runLength > 254) { 1181 writeRun(); 1182 this.currentChar = -1; 1183 this.runLength = 0; 1184 } 1185 // else nothing to do 1186 } else { 1187 writeRun(); 1188 this.runLength = 1; 1189 this.currentChar = b; 1190 } 1191 } else { 1192 this.currentChar = b & 0xff; 1193 this.runLength++; 1194 } 1195 } 1196 1197 /** 1198 * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four 1199 * identical bytes). 1200 * 1201 * <p> 1202 * Flushes the current block before writing data if it is full. 1203 * </p> 1204 * 1205 * <p> 1206 * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last 1207 * to point to the last index written minus 1. 1208 * </p> 1209 */ 1210 private void writeRun() throws IOException { 1211 final int lastShadow = this.last; 1212 1213 if (lastShadow < this.allowableBlockSize) { 1214 final int currentCharShadow = this.currentChar; 1215 final Data dataShadow = this.data; 1216 dataShadow.inUse[currentCharShadow] = true; 1217 final byte ch = (byte) currentCharShadow; 1218 1219 int runLengthShadow = this.runLength; 1220 this.crc.update(currentCharShadow, runLengthShadow); 1221 1222 switch (runLengthShadow) { 1223 case 1: 1224 dataShadow.block[lastShadow + 2] = ch; 1225 this.last = lastShadow + 1; 1226 break; 1227 1228 case 2: 1229 dataShadow.block[lastShadow + 2] = ch; 1230 dataShadow.block[lastShadow + 3] = ch; 1231 this.last = lastShadow + 2; 1232 break; 1233 1234 case 3: { 1235 final byte[] block = dataShadow.block; 1236 block[lastShadow + 2] = ch; 1237 block[lastShadow + 3] = ch; 1238 block[lastShadow + 4] = ch; 1239 this.last = lastShadow + 3; 1240 } 1241 break; 1242 1243 default: { 1244 runLengthShadow -= 4; 1245 dataShadow.inUse[runLengthShadow] = true; 1246 final byte[] block = dataShadow.block; 1247 block[lastShadow + 2] = ch; 1248 block[lastShadow + 3] = ch; 1249 block[lastShadow + 4] = ch; 1250 block[lastShadow + 5] = ch; 1251 block[lastShadow + 6] = (byte) runLengthShadow; 1252 this.last = lastShadow + 5; 1253 } 1254 break; 1255 1256 } 1257 } else { 1258 endBlock(); 1259 initBlock(); 1260 writeRun(); 1261 } 1262 } 1263 1264}