001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026 027/** 028 * An output stream that compresses into the BZip2 format into another stream. 029 * 030 * <p> 031 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force 032 * {@code BZip2CompressorOutputStream} to release the allocated memory. 033 * </p> 034 * 035 * <p> 036 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower 037 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input. 038 * </p> 039 * 040 * <p> 041 * You can compute the memory usage for compressing by the following formula: 042 * </p> 043 * 044 * <pre> 045 * <code>400k + (9 * blocksize)</code>. 046 * </pre> 047 * 048 * <p> 049 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use 050 * </p> 051 * 052 * <pre> 053 * <code>65k + (5 * blocksize)</code>. 054 * </pre> 055 * 056 * <table style="width:100%" border="1"> 057 * <caption>Memory usage by blocksize</caption> 058 * <tr> 059 * <th colspan="3">Memory usage by blocksize</th> 060 * </tr> 061 * <tr> 062 * <th style="text-align: right">Blocksize</th> 063 * <th style="text-align: right">Compression<br> 064 * memory usage</th> 065 * <th style="text-align: right">Decompression<br> 066 * memory usage</th> 067 * </tr> 068 * <tr> 069 * <td style="text-align: right">100k</td> 070 * <td style="text-align: right">1300k</td> 071 * <td style="text-align: right">565k</td> 072 * </tr> 073 * <tr> 074 * <td style="text-align: right">200k</td> 075 * <td style="text-align: right">2200k</td> 076 * <td style="text-align: right">1065k</td> 077 * </tr> 078 * <tr> 079 * <td style="text-align: right">300k</td> 080 * <td style="text-align: right">3100k</td> 081 * <td style="text-align: right">1565k</td> 082 * </tr> 083 * <tr> 084 * <td style="text-align: right">400k</td> 085 * <td style="text-align: right">4000k</td> 086 * <td style="text-align: right">2065k</td> 087 * </tr> 088 * <tr> 089 * <td style="text-align: right">500k</td> 090 * <td style="text-align: right">4900k</td> 091 * <td style="text-align: right">2565k</td> 092 * </tr> 093 * <tr> 094 * <td style="text-align: right">600k</td> 095 * <td style="text-align: right">5800k</td> 096 * <td style="text-align: right">3065k</td> 097 * </tr> 098 * <tr> 099 * <td style="text-align: right">700k</td> 100 * <td style="text-align: right">6700k</td> 101 * <td style="text-align: right">3565k</td> 102 * </tr> 103 * <tr> 104 * <td style="text-align: right">800k</td> 105 * <td style="text-align: right">7600k</td> 106 * <td style="text-align: right">4065k</td> 107 * </tr> 108 * <tr> 109 * <td style="text-align: right">900k</td> 110 * <td style="text-align: right">8500k</td> 111 * <td style="text-align: right">4565k</td> 112 * </tr> 113 * </table> 114 * 115 * <p> 116 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block. 117 * </p> 118 * 119 * <p> 120 * Instances of this class are not threadsafe. 121 * </p> 122 * 123 * <p> 124 * TODO: Update to BZip2 1.0.1 125 * </p> 126 * 127 * @NotThreadSafe 128 */ 129public class BZip2CompressorOutputStream extends CompressorOutputStream implements BZip2Constants { 130 131 static final class Data { 132 133 // with blockSize 900k 134 /* maps unsigned byte => "does it occur in block" */ 135 final boolean[] inUse = new boolean[256]; // 256 byte 136 final byte[] unseqToSeq = new byte[256]; // 256 byte 137 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 138 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 139 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 140 141 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 142 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 143 // byte 144 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 145 // byte 146 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 147 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 148 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 149 // byte 150 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 151 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 152 153 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 154 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 155 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 156 157 // ------------ 158 // 333408 byte 159 160 /* 161 * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0. 162 */ 163 final byte[] block; // 900021 byte 164 /* 165 * maps index in Burrows-Wheeler transformed block => index of byte in original block 166 */ 167 final int[] fmap; // 3600000 byte 168 final char[] sfmap; // 3600000 byte 169 // ------------ 170 // 8433529 byte 171 // ============ 172 173 /** 174 * Index of original line in Burrows-Wheeler table. 175 * 176 * <p> 177 * This is the index in fmap that points to the last byte of the original data. 178 * </p> 179 */ 180 int origPtr; 181 182 Data(final int blockSize100k) { 183 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 184 this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES]; 185 this.fmap = new int[n]; 186 this.sfmap = new char[2 * n]; 187 } 188 189 } 190 191 /** 192 * The minimum supported blocksize {@code == 1}. 193 */ 194 public static final int MIN_BLOCKSIZE = 1; 195 196 /** 197 * The maximum supported blocksize {@code == 9}. 198 */ 199 public static final int MAX_BLOCKSIZE = 9; 200 private static final int GREATER_ICOST = 15; 201 202 private static final int LESSER_ICOST = 0; 203 204 /** 205 * Chooses a blocksize based on the given length of the data to compress. 206 * 207 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns 208 * {@code MAX_BLOCKSIZE} always. 209 * 210 * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}. 211 */ 212 public static int chooseBlockSize(final long inputLength) { 213 return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE; 214 } 215 216 private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { 217 int vec = 0; 218 for (int n = minLen; n <= maxLen; n++) { 219 for (int i = 0; i < alphaSize; i++) { 220 if ((length[i] & 0xff) == n) { 221 code[i] = vec; 222 vec++; 223 } 224 } 225 vec <<= 1; 226 } 227 } 228 229 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { 230 /* 231 * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. 232 */ 233 final int[] heap = dat.heap; 234 final int[] weight = dat.weight; 235 final int[] parent = dat.parent; 236 237 for (int i = alphaSize; --i >= 0;) { 238 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 239 } 240 241 for (boolean tooLong = true; tooLong;) { 242 tooLong = false; 243 244 int nNodes = alphaSize; 245 int nHeap = 0; 246 heap[0] = 0; 247 weight[0] = 0; 248 parent[0] = -2; 249 250 for (int i = 1; i <= alphaSize; i++) { 251 parent[i] = -1; 252 nHeap++; 253 heap[nHeap] = i; 254 255 int zz = nHeap; 256 final int tmp = heap[zz]; 257 while (weight[tmp] < weight[heap[zz >> 1]]) { 258 heap[zz] = heap[zz >> 1]; 259 zz >>= 1; 260 } 261 heap[zz] = tmp; 262 } 263 264 while (nHeap > 1) { 265 final int n1 = heap[1]; 266 heap[1] = heap[nHeap]; 267 nHeap--; 268 269 int yy = 0; 270 int zz = 1; 271 int tmp = heap[1]; 272 273 while (true) { 274 yy = zz << 1; 275 276 if (yy > nHeap) { 277 break; 278 } 279 280 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 281 yy++; 282 } 283 284 if (weight[tmp] < weight[heap[yy]]) { 285 break; 286 } 287 288 heap[zz] = heap[yy]; 289 zz = yy; 290 } 291 292 heap[zz] = tmp; 293 294 final int n2 = heap[1]; 295 heap[1] = heap[nHeap]; 296 nHeap--; 297 298 yy = 0; 299 zz = 1; 300 tmp = heap[1]; 301 302 while (true) { 303 yy = zz << 1; 304 305 if (yy > nHeap) { 306 break; 307 } 308 309 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 310 yy++; 311 } 312 313 if (weight[tmp] < weight[heap[yy]]) { 314 break; 315 } 316 317 heap[zz] = heap[yy]; 318 zz = yy; 319 } 320 321 heap[zz] = tmp; 322 nNodes++; 323 parent[n1] = parent[n2] = nNodes; 324 325 final int weight_n1 = weight[n1]; 326 final int weight_n2 = weight[n2]; 327 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff); 328 329 parent[nNodes] = -1; 330 nHeap++; 331 heap[nHeap] = nNodes; 332 333 tmp = 0; 334 zz = nHeap; 335 tmp = heap[zz]; 336 final int weight_tmp = weight[tmp]; 337 while (weight_tmp < weight[heap[zz >> 1]]) { 338 heap[zz] = heap[zz >> 1]; 339 zz >>= 1; 340 } 341 heap[zz] = tmp; 342 343 } 344 345 for (int i = 1; i <= alphaSize; i++) { 346 int j = 0; 347 int k = i; 348 349 for (int parent_k; (parent_k = parent[k]) >= 0;) { 350 k = parent_k; 351 j++; 352 } 353 354 len[i - 1] = (byte) j; 355 if (j > maxLen) { 356 tooLong = true; 357 } 358 } 359 360 if (tooLong) { 361 for (int i = 1; i < alphaSize; i++) { 362 int j = weight[i] >> 8; 363 j = 1 + (j >> 1); 364 weight[i] = j << 8; 365 } 366 } 367 } 368 } 369 370 /** 371 * Index of the last char in the block, so the block size == last + 1. 372 */ 373 private int last; 374 /** 375 * Always: in the range 0 .. 9. The current block size is 100000 * this number. 376 */ 377 private final int blockSize100k; 378 379 private int bsBuff; 380 381 private int bsLive; 382 383 private final CRC crc = new CRC(); 384 private int nInUse; 385 386 private int nMTF; 387 private int currentChar = -1; 388 private int runLength; 389 390 private int combinedCRC; 391 392 private final int allowableBlockSize; 393 /** 394 * All memory intensive stuff. 395 */ 396 private Data data; 397 398 private BlockSort blockSorter; 399 400 private OutputStream out; 401 402 private volatile boolean closed; 403 404 /** 405 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 406 * 407 * @param out the destination stream. 408 * 409 * @throws IOException if an I/O error occurs in the specified stream. 410 * @throws NullPointerException if {@code out == null}. 411 */ 412 public BZip2CompressorOutputStream(final OutputStream out) throws IOException { 413 this(out, MAX_BLOCKSIZE); 414 } 415 416 /** 417 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 418 * 419 * @param out the destination stream. 420 * @param blockSize the blockSize as 100k units. 421 * 422 * @throws IOException if an I/O error occurs in the specified stream. 423 * @throws IllegalArgumentException if {@code (blockSize < 1) || (blockSize > 9)}. 424 * @throws NullPointerException if {@code out == null}. 425 * 426 * @see #MIN_BLOCKSIZE 427 * @see #MAX_BLOCKSIZE 428 */ 429 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 430 if (blockSize < 1) { 431 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 432 } 433 if (blockSize > 9) { 434 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 435 } 436 437 this.blockSize100k = blockSize; 438 this.out = out; 439 440 /* 20 is just a paranoia constant */ 441 this.allowableBlockSize = this.blockSize100k * BZip2Constants.BASEBLOCKSIZE - 20; 442 init(); 443 } 444 445 private void blockSort() { 446 blockSorter.blockSort(data, last); 447 } 448 449 private void bsFinishedWithStream() throws IOException { 450 while (this.bsLive > 0) { 451 final int ch = this.bsBuff >> 24; 452 this.out.write(ch); // write 8-bit 453 this.bsBuff <<= 8; 454 this.bsLive -= 8; 455 } 456 } 457 458 private void bsPutInt(final int u) throws IOException { 459 bsW(8, u >> 24 & 0xff); 460 bsW(8, u >> 16 & 0xff); 461 bsW(8, u >> 8 & 0xff); 462 bsW(8, u & 0xff); 463 } 464 465 private void bsPutUByte(final int c) throws IOException { 466 bsW(8, c); 467 } 468 469 private void bsW(final int n, final int v) throws IOException { 470 final OutputStream outShadow = this.out; 471 int bsLiveShadow = this.bsLive; 472 int bsBuffShadow = this.bsBuff; 473 474 while (bsLiveShadow >= 8) { 475 outShadow.write(bsBuffShadow >> 24); // write 8-bit 476 bsBuffShadow <<= 8; 477 bsLiveShadow -= 8; 478 } 479 480 this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n; 481 this.bsLive = bsLiveShadow + n; 482 } 483 484 @Override 485 public void close() throws IOException { 486 if (!closed) { 487 try (OutputStream outShadow = this.out) { 488 finish(); 489 } 490 } 491 } 492 493 private void endBlock() throws IOException { 494 final int blockCRC = this.crc.getValue(); 495 this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31; 496 this.combinedCRC ^= blockCRC; 497 498 // empty block at end of file 499 if (this.last == -1) { 500 return; 501 } 502 503 /* sort the block and establish posn of original string */ 504 blockSort(); 505 506 /* 507 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the 508 * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits, 509 * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal 510 * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files. 511 */ 512 bsPutUByte(0x31); 513 bsPutUByte(0x41); 514 bsPutUByte(0x59); 515 bsPutUByte(0x26); 516 bsPutUByte(0x53); 517 bsPutUByte(0x59); 518 519 /* Now the block's CRC, so it is in a known place. */ 520 bsPutInt(blockCRC); 521 522 /* Now a single bit indicating no randomisation. */ 523 bsW(1, 0); 524 525 /* Finally, block's contents proper. */ 526 moveToFrontCodeAndSend(); 527 } 528 529 private void endCompression() throws IOException { 530 /* 531 * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it 532 * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.) 533 */ 534 bsPutUByte(0x17); 535 bsPutUByte(0x72); 536 bsPutUByte(0x45); 537 bsPutUByte(0x38); 538 bsPutUByte(0x50); 539 bsPutUByte(0x90); 540 541 bsPutInt(this.combinedCRC); 542 bsFinishedWithStream(); 543 } 544 545 public void finish() throws IOException { 546 if (!closed) { 547 closed = true; 548 try { 549 if (this.runLength > 0) { 550 writeRun(); 551 } 552 this.currentChar = -1; 553 endBlock(); 554 endCompression(); 555 } finally { 556 this.out = null; 557 this.blockSorter = null; 558 this.data = null; 559 } 560 } 561 } 562 563 @Override 564 public void flush() throws IOException { 565 final OutputStream outShadow = this.out; 566 if (outShadow != null) { 567 outShadow.flush(); 568 } 569 } 570 571 /* 572 * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form. 573 * 574 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 575 */ 576 private void generateMTFValues() { 577 final int lastShadow = this.last; 578 final Data dataShadow = this.data; 579 final boolean[] inUse = dataShadow.inUse; 580 final byte[] block = dataShadow.block; 581 final int[] fmap = dataShadow.fmap; 582 final char[] sfmap = dataShadow.sfmap; 583 final int[] mtfFreq = dataShadow.mtfFreq; 584 final byte[] unseqToSeq = dataShadow.unseqToSeq; 585 final byte[] yy = dataShadow.generateMTFValues_yy; 586 587 // make maps 588 int nInUseShadow = 0; 589 for (int i = 0; i < 256; i++) { 590 if (inUse[i]) { 591 unseqToSeq[i] = (byte) nInUseShadow; 592 nInUseShadow++; 593 } 594 } 595 this.nInUse = nInUseShadow; 596 597 final int eob = nInUseShadow + 1; 598 599 Arrays.fill(mtfFreq, 0, eob + 1, 0); 600 601 for (int i = nInUseShadow; --i >= 0;) { 602 yy[i] = (byte) i; 603 } 604 605 int wr = 0; 606 int zPend = 0; 607 608 for (int i = 0; i <= lastShadow; i++) { 609 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 610 byte tmp = yy[0]; 611 int j = 0; 612 613 while (ll_i != tmp) { 614 j++; 615 final byte tmp2 = tmp; 616 tmp = yy[j]; 617 yy[j] = tmp2; 618 } 619 yy[0] = tmp; 620 621 if (j == 0) { 622 zPend++; 623 } else { 624 if (zPend > 0) { 625 zPend--; 626 while (true) { 627 if ((zPend & 1) == 0) { 628 sfmap[wr] = RUNA; 629 wr++; 630 mtfFreq[RUNA]++; 631 } else { 632 sfmap[wr] = RUNB; 633 wr++; 634 mtfFreq[RUNB]++; 635 } 636 637 if (zPend < 2) { 638 break; 639 } 640 zPend = zPend - 2 >> 1; 641 } 642 zPend = 0; 643 } 644 sfmap[wr] = (char) (j + 1); 645 wr++; 646 mtfFreq[j + 1]++; 647 } 648 } 649 650 if (zPend > 0) { 651 zPend--; 652 while (true) { 653 if ((zPend & 1) == 0) { 654 sfmap[wr] = RUNA; 655 wr++; 656 mtfFreq[RUNA]++; 657 } else { 658 sfmap[wr] = RUNB; 659 wr++; 660 mtfFreq[RUNB]++; 661 } 662 663 if (zPend < 2) { 664 break; 665 } 666 zPend = zPend - 2 >> 1; 667 } 668 } 669 670 sfmap[wr] = (char) eob; 671 mtfFreq[eob]++; 672 this.nMTF = wr + 1; 673 } 674 675 /** 676 * Returns the blocksize parameter specified at construction time. 677 * 678 * @return the blocksize parameter specified at construction time 679 */ 680 public final int getBlockSize() { 681 return this.blockSize100k; 682 } 683 684 /** 685 * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating 686 * blockSize100k. 687 * 688 * @throws IOException if the magic bytes could not been written 689 */ 690 private void init() throws IOException { 691 bsPutUByte('B'); 692 bsPutUByte('Z'); 693 694 this.data = new Data(this.blockSize100k); 695 this.blockSorter = new BlockSort(this.data); 696 697 // huffmanized magic bytes 698 bsPutUByte('h'); 699 bsPutUByte('0' + this.blockSize100k); 700 701 this.combinedCRC = 0; 702 initBlock(); 703 } 704 705 private void initBlock() { 706 // blockNo++; 707 this.crc.reset(); 708 this.last = -1; 709 // ch = 0; 710 711 final boolean[] inUse = this.data.inUse; 712 for (int i = 256; --i >= 0;) { 713 inUse[i] = false; 714 } 715 716 } 717 718 private void moveToFrontCodeAndSend() throws IOException { 719 bsW(24, this.data.origPtr); 720 generateMTFValues(); 721 sendMTFValues(); 722 } 723 724 private void sendMTFValues() throws IOException { 725 final byte[][] len = this.data.sendMTFValues_len; 726 final int alphaSize = this.nInUse + 2; 727 728 for (int t = N_GROUPS; --t >= 0;) { 729 final byte[] len_t = len[t]; 730 for (int v = alphaSize; --v >= 0;) { 731 len_t[v] = GREATER_ICOST; 732 } 733 } 734 735 /* Decide how many coding tables to use */ 736 // assert (this.nMTF > 0) : this.nMTF; 737 final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6; 738 739 /* Generate an initial set of coding tables */ 740 sendMTFValues0(nGroups, alphaSize); 741 742 /* 743 * Iterate up to N_ITERS times to improve the tables. 744 */ 745 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 746 747 /* Compute MTF values for the selectors. */ 748 sendMTFValues2(nGroups, nSelectors); 749 750 /* Assign actual codes for the tables. */ 751 sendMTFValues3(nGroups, alphaSize); 752 753 /* Transmit the mapping table. */ 754 sendMTFValues4(); 755 756 /* Now the selectors. */ 757 sendMTFValues5(nGroups, nSelectors); 758 759 /* Now the coding tables. */ 760 sendMTFValues6(nGroups, alphaSize); 761 762 /* And finally, the block data proper */ 763 sendMTFValues7(); 764 } 765 766 private void sendMTFValues0(final int nGroups, final int alphaSize) { 767 final byte[][] len = this.data.sendMTFValues_len; 768 final int[] mtfFreq = this.data.mtfFreq; 769 770 int remF = this.nMTF; 771 int gs = 0; 772 773 for (int nPart = nGroups; nPart > 0; nPart--) { 774 final int tFreq = remF / nPart; 775 int ge = gs - 1; 776 int aFreq = 0; 777 778 for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) { 779 aFreq += mtfFreq[++ge]; 780 } 781 782 if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) { 783 aFreq -= mtfFreq[ge--]; 784 } 785 786 final byte[] len_np = len[nPart - 1]; 787 for (int v = alphaSize; --v >= 0;) { 788 if (v >= gs && v <= ge) { 789 len_np[v] = LESSER_ICOST; 790 } else { 791 len_np[v] = GREATER_ICOST; 792 } 793 } 794 795 gs = ge + 1; 796 remF -= aFreq; 797 } 798 } 799 800 private int sendMTFValues1(final int nGroups, final int alphaSize) { 801 final Data dataShadow = this.data; 802 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 803 final int[] fave = dataShadow.sendMTFValues_fave; 804 final short[] cost = dataShadow.sendMTFValues_cost; 805 final char[] sfmap = dataShadow.sfmap; 806 final byte[] selector = dataShadow.selector; 807 final byte[][] len = dataShadow.sendMTFValues_len; 808 final byte[] len_0 = len[0]; 809 final byte[] len_1 = len[1]; 810 final byte[] len_2 = len[2]; 811 final byte[] len_3 = len[3]; 812 final byte[] len_4 = len[4]; 813 final byte[] len_5 = len[5]; 814 final int nMTFShadow = this.nMTF; 815 816 int nSelectors = 0; 817 818 for (int iter = 0; iter < N_ITERS; iter++) { 819 for (int t = nGroups; --t >= 0;) { 820 fave[t] = 0; 821 final int[] rfreqt = rfreq[t]; 822 for (int i = alphaSize; --i >= 0;) { 823 rfreqt[i] = 0; 824 } 825 } 826 827 nSelectors = 0; 828 829 for (int gs = 0; gs < this.nMTF;) { 830 // Set group start & end marks. 831 832 // Calculate the cost of this group as coded by each of the 833 // coding tables. 834 835 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 836 837 final byte mask = (byte) 0xff; 838 if (nGroups == N_GROUPS) { 839 // unrolled version of the else-block 840 841 short cost0 = 0; 842 short cost1 = 0; 843 short cost2 = 0; 844 short cost3 = 0; 845 short cost4 = 0; 846 short cost5 = 0; 847 848 for (int i = gs; i <= ge; i++) { 849 final int icv = sfmap[i]; 850 cost0 += (short) (len_0[icv] & mask); 851 cost1 += (short) (len_1[icv] & mask); 852 cost2 += (short) (len_2[icv] & mask); 853 cost3 += (short) (len_3[icv] & mask); 854 cost4 += (short) (len_4[icv] & mask); 855 cost5 += (short) (len_5[icv] & mask); 856 } 857 858 cost[0] = cost0; 859 cost[1] = cost1; 860 cost[2] = cost2; 861 cost[3] = cost3; 862 cost[4] = cost4; 863 cost[5] = cost5; 864 865 } else { 866 for (int t = nGroups; --t >= 0;) { 867 cost[t] = 0; 868 } 869 870 for (int i = gs; i <= ge; i++) { 871 final int icv = sfmap[i]; 872 for (int t = nGroups; --t >= 0;) { 873 cost[t] += (short) (len[t][icv] & mask); 874 } 875 } 876 } 877 878 /* 879 * Find the coding table which is best for this group, and record its identity in the selector table. 880 */ 881 int bt = -1; 882 for (int t = nGroups, bc = 999999999; --t >= 0;) { 883 final int cost_t = cost[t]; 884 if (cost_t < bc) { 885 bc = cost_t; 886 bt = t; 887 } 888 } 889 890 fave[bt]++; 891 selector[nSelectors] = (byte) bt; 892 nSelectors++; 893 894 /* 895 * Increment the symbol frequencies for the selected table. 896 */ 897 final int[] rfreq_bt = rfreq[bt]; 898 for (int i = gs; i <= ge; i++) { 899 rfreq_bt[sfmap[i]]++; 900 } 901 902 gs = ge + 1; 903 } 904 905 /* 906 * Recompute the tables based on the accumulated frequencies. 907 */ 908 for (int t = 0; t < nGroups; t++) { 909 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 910 } 911 } 912 913 return nSelectors; 914 } 915 916 private void sendMTFValues2(final int nGroups, final int nSelectors) { 917 // assert (nGroups < 8) : nGroups; 918 919 final Data dataShadow = this.data; 920 final byte[] pos = dataShadow.sendMTFValues2_pos; 921 922 for (int i = nGroups; --i >= 0;) { 923 pos[i] = (byte) i; 924 } 925 926 for (int i = 0; i < nSelectors; i++) { 927 final byte ll_i = dataShadow.selector[i]; 928 byte tmp = pos[0]; 929 int j = 0; 930 931 while (ll_i != tmp) { 932 j++; 933 final byte tmp2 = tmp; 934 tmp = pos[j]; 935 pos[j] = tmp2; 936 } 937 938 pos[0] = tmp; 939 dataShadow.selectorMtf[i] = (byte) j; 940 } 941 } 942 943 private void sendMTFValues3(final int nGroups, final int alphaSize) { 944 final int[][] code = this.data.sendMTFValues_code; 945 final byte[][] len = this.data.sendMTFValues_len; 946 947 for (int t = 0; t < nGroups; t++) { 948 int minLen = 32; 949 int maxLen = 0; 950 final byte[] len_t = len[t]; 951 for (int i = alphaSize; --i >= 0;) { 952 final int l = len_t[i] & 0xff; 953 if (l > maxLen) { 954 maxLen = l; 955 } 956 if (l < minLen) { 957 minLen = l; 958 } 959 } 960 961 // assert (maxLen <= 20) : maxLen; 962 // assert (minLen >= 1) : minLen; 963 964 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 965 } 966 } 967 968 private void sendMTFValues4() throws IOException { 969 final boolean[] inUse = this.data.inUse; 970 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 971 972 for (int i = 16; --i >= 0;) { 973 inUse16[i] = false; 974 final int i16 = i * 16; 975 for (int j = 16; --j >= 0;) { 976 if (inUse[i16 + j]) { 977 inUse16[i] = true; 978 break; 979 } 980 } 981 } 982 983 for (int i = 0; i < 16; i++) { 984 bsW(1, inUse16[i] ? 1 : 0); 985 } 986 987 final OutputStream outShadow = this.out; 988 int bsLiveShadow = this.bsLive; 989 int bsBuffShadow = this.bsBuff; 990 991 for (int i = 0; i < 16; i++) { 992 if (inUse16[i]) { 993 final int i16 = i * 16; 994 for (int j = 0; j < 16; j++) { 995 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 996 while (bsLiveShadow >= 8) { 997 outShadow.write(bsBuffShadow >> 24); // write 8-bit 998 bsBuffShadow <<= 8; 999 bsLiveShadow -= 8; 1000 } 1001 if (inUse[i16 + j]) { 1002 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1003 } 1004 bsLiveShadow++; 1005 } 1006 } 1007 } 1008 1009 this.bsBuff = bsBuffShadow; 1010 this.bsLive = bsLiveShadow; 1011 } 1012 1013 private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { 1014 bsW(3, nGroups); 1015 bsW(15, nSelectors); 1016 1017 final OutputStream outShadow = this.out; 1018 final byte[] selectorMtf = this.data.selectorMtf; 1019 1020 int bsLiveShadow = this.bsLive; 1021 int bsBuffShadow = this.bsBuff; 1022 1023 for (int i = 0; i < nSelectors; i++) { 1024 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1025 // inlined: bsW(1, 1); 1026 while (bsLiveShadow >= 8) { 1027 outShadow.write(bsBuffShadow >> 24); 1028 bsBuffShadow <<= 8; 1029 bsLiveShadow -= 8; 1030 } 1031 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1032 bsLiveShadow++; 1033 } 1034 1035 // inlined: bsW(1, 0); 1036 while (bsLiveShadow >= 8) { 1037 outShadow.write(bsBuffShadow >> 24); 1038 bsBuffShadow <<= 8; 1039 bsLiveShadow -= 8; 1040 } 1041 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1042 bsLiveShadow++; 1043 } 1044 1045 this.bsBuff = bsBuffShadow; 1046 this.bsLive = bsLiveShadow; 1047 } 1048 1049 private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { 1050 final byte[][] len = this.data.sendMTFValues_len; 1051 final OutputStream outShadow = this.out; 1052 1053 int bsLiveShadow = this.bsLive; 1054 int bsBuffShadow = this.bsBuff; 1055 1056 for (int t = 0; t < nGroups; t++) { 1057 final byte[] len_t = len[t]; 1058 int curr = len_t[0] & 0xff; 1059 1060 // inlined: bsW(5, curr); 1061 while (bsLiveShadow >= 8) { 1062 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1063 bsBuffShadow <<= 8; 1064 bsLiveShadow -= 8; 1065 } 1066 bsBuffShadow |= curr << 32 - bsLiveShadow - 5; 1067 bsLiveShadow += 5; 1068 1069 for (int i = 0; i < alphaSize; i++) { 1070 final int lti = len_t[i] & 0xff; 1071 while (curr < lti) { 1072 // inlined: bsW(2, 2); 1073 while (bsLiveShadow >= 8) { 1074 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1075 bsBuffShadow <<= 8; 1076 bsLiveShadow -= 8; 1077 } 1078 bsBuffShadow |= 2 << 32 - bsLiveShadow - 2; 1079 bsLiveShadow += 2; 1080 1081 curr++; /* 10 */ 1082 } 1083 1084 while (curr > lti) { 1085 // inlined: bsW(2, 3); 1086 while (bsLiveShadow >= 8) { 1087 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1088 bsBuffShadow <<= 8; 1089 bsLiveShadow -= 8; 1090 } 1091 bsBuffShadow |= 3 << 32 - bsLiveShadow - 2; 1092 bsLiveShadow += 2; 1093 1094 curr--; /* 11 */ 1095 } 1096 1097 // inlined: bsW(1, 0); 1098 while (bsLiveShadow >= 8) { 1099 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1100 bsBuffShadow <<= 8; 1101 bsLiveShadow -= 8; 1102 } 1103 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1104 bsLiveShadow++; 1105 } 1106 } 1107 1108 this.bsBuff = bsBuffShadow; 1109 this.bsLive = bsLiveShadow; 1110 } 1111 1112 private void sendMTFValues7() throws IOException { 1113 final Data dataShadow = this.data; 1114 final byte[][] len = dataShadow.sendMTFValues_len; 1115 final int[][] code = dataShadow.sendMTFValues_code; 1116 final OutputStream outShadow = this.out; 1117 final byte[] selector = dataShadow.selector; 1118 final char[] sfmap = dataShadow.sfmap; 1119 final int nMTFShadow = this.nMTF; 1120 1121 int selCtr = 0; 1122 1123 int bsLiveShadow = this.bsLive; 1124 int bsBuffShadow = this.bsBuff; 1125 1126 for (int gs = 0; gs < nMTFShadow;) { 1127 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1128 final int selector_selCtr = selector[selCtr] & 0xff; 1129 final int[] code_selCtr = code[selector_selCtr]; 1130 final byte[] len_selCtr = len[selector_selCtr]; 1131 1132 while (gs <= ge) { 1133 final int sfmap_i = sfmap[gs]; 1134 1135 // 1136 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1137 // code_selCtr[sfmap_i]); 1138 // 1139 while (bsLiveShadow >= 8) { 1140 outShadow.write(bsBuffShadow >> 24); 1141 bsBuffShadow <<= 8; 1142 bsLiveShadow -= 8; 1143 } 1144 final int n = len_selCtr[sfmap_i] & 0xFF; 1145 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n; 1146 bsLiveShadow += n; 1147 1148 gs++; 1149 } 1150 1151 gs = ge + 1; 1152 selCtr++; 1153 } 1154 1155 this.bsBuff = bsBuffShadow; 1156 this.bsLive = bsLiveShadow; 1157 } 1158 1159 @Override 1160 public void write(final byte[] buf, int offs, final int len) throws IOException { 1161 if (offs < 0) { 1162 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 1163 } 1164 if (len < 0) { 1165 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 1166 } 1167 if (offs + len > buf.length) { 1168 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); 1169 } 1170 if (closed) { 1171 throw new IOException("Stream closed"); 1172 } 1173 1174 for (final int hi = offs + len; offs < hi;) { 1175 write0(buf[offs++]); 1176 } 1177 } 1178 1179 @Override 1180 public void write(final int b) throws IOException { 1181 if (closed) { 1182 throw new IOException("Closed"); 1183 } 1184 write0(b); 1185 } 1186 1187 /** 1188 * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm. 1189 */ 1190 private void write0(int b) throws IOException { 1191 if (this.currentChar != -1) { 1192 b &= 0xff; 1193 if (this.currentChar == b) { 1194 if (++this.runLength > 254) { 1195 writeRun(); 1196 this.currentChar = -1; 1197 this.runLength = 0; 1198 } 1199 // else nothing to do 1200 } else { 1201 writeRun(); 1202 this.runLength = 1; 1203 this.currentChar = b; 1204 } 1205 } else { 1206 this.currentChar = b & 0xff; 1207 this.runLength++; 1208 } 1209 } 1210 1211 /** 1212 * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four 1213 * identical bytes). 1214 * 1215 * <p> 1216 * Flushes the current block before writing data if it is full. 1217 * </p> 1218 * 1219 * <p> 1220 * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last 1221 * to point to the last index written minus 1. 1222 * </p> 1223 */ 1224 private void writeRun() throws IOException { 1225 final int lastShadow = this.last; 1226 1227 if (lastShadow < this.allowableBlockSize) { 1228 final int currentCharShadow = this.currentChar; 1229 final Data dataShadow = this.data; 1230 dataShadow.inUse[currentCharShadow] = true; 1231 final byte ch = (byte) currentCharShadow; 1232 1233 int runLengthShadow = this.runLength; 1234 this.crc.update(currentCharShadow, runLengthShadow); 1235 1236 switch (runLengthShadow) { 1237 case 1: 1238 dataShadow.block[lastShadow + 2] = ch; 1239 this.last = lastShadow + 1; 1240 break; 1241 1242 case 2: 1243 dataShadow.block[lastShadow + 2] = ch; 1244 dataShadow.block[lastShadow + 3] = ch; 1245 this.last = lastShadow + 2; 1246 break; 1247 1248 case 3: { 1249 final byte[] block = dataShadow.block; 1250 block[lastShadow + 2] = ch; 1251 block[lastShadow + 3] = ch; 1252 block[lastShadow + 4] = ch; 1253 this.last = lastShadow + 3; 1254 } 1255 break; 1256 1257 default: { 1258 runLengthShadow -= 4; 1259 dataShadow.inUse[runLengthShadow] = true; 1260 final byte[] block = dataShadow.block; 1261 block[lastShadow + 2] = ch; 1262 block[lastShadow + 3] = ch; 1263 block[lastShadow + 4] = ch; 1264 block[lastShadow + 5] = ch; 1265 block[lastShadow + 6] = (byte) runLengthShadow; 1266 this.last = lastShadow + 5; 1267 } 1268 break; 1269 1270 } 1271 } else { 1272 endBlock(); 1273 initBlock(); 1274 writeRun(); 1275 } 1276 } 1277 1278}