001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.io.IOUtils; 027 028/** 029 * An output stream that compresses into the BZip2 format into another stream. 030 * 031 * <p> 032 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force 033 * {@code BZip2CompressorOutputStream} to release the allocated memory. 034 * </p> 035 * 036 * <p> 037 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower 038 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input. 039 * </p> 040 * 041 * <p> 042 * You can compute the memory usage for compressing by the following formula: 043 * </p> 044 * 045 * <pre> 046 * <code>400k + (9 * blocksize)</code>. 047 * </pre> 048 * 049 * <p> 050 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use 051 * </p> 052 * 053 * <pre> 054 * <code>65k + (5 * blocksize)</code>. 055 * </pre> 056 * 057 * <table style="width:100%" border="1"> 058 * <caption>Memory usage by blocksize</caption> 059 * <tr> 060 * <th colspan="3">Memory usage by blocksize</th> 061 * </tr> 062 * <tr> 063 * <th style="text-align: right">Blocksize</th> 064 * <th style="text-align: right">Compression<br> 065 * memory usage</th> 066 * <th style="text-align: right">Decompression<br> 067 * memory usage</th> 068 * </tr> 069 * <tr> 070 * <td style="text-align: right">100k</td> 071 * <td style="text-align: right">1300k</td> 072 * <td style="text-align: right">565k</td> 073 * </tr> 074 * <tr> 075 * <td style="text-align: right">200k</td> 076 * <td style="text-align: right">2200k</td> 077 * <td style="text-align: right">1065k</td> 078 * </tr> 079 * <tr> 080 * <td style="text-align: right">300k</td> 081 * <td style="text-align: right">3100k</td> 082 * <td style="text-align: right">1565k</td> 083 * </tr> 084 * <tr> 085 * <td style="text-align: right">400k</td> 086 * <td style="text-align: right">4000k</td> 087 * <td style="text-align: right">2065k</td> 088 * </tr> 089 * <tr> 090 * <td style="text-align: right">500k</td> 091 * <td style="text-align: right">4900k</td> 092 * <td style="text-align: right">2565k</td> 093 * </tr> 094 * <tr> 095 * <td style="text-align: right">600k</td> 096 * <td style="text-align: right">5800k</td> 097 * <td style="text-align: right">3065k</td> 098 * </tr> 099 * <tr> 100 * <td style="text-align: right">700k</td> 101 * <td style="text-align: right">6700k</td> 102 * <td style="text-align: right">3565k</td> 103 * </tr> 104 * <tr> 105 * <td style="text-align: right">800k</td> 106 * <td style="text-align: right">7600k</td> 107 * <td style="text-align: right">4065k</td> 108 * </tr> 109 * <tr> 110 * <td style="text-align: right">900k</td> 111 * <td style="text-align: right">8500k</td> 112 * <td style="text-align: right">4565k</td> 113 * </tr> 114 * </table> 115 * 116 * <p> 117 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block. 118 * </p> 119 * 120 * <p> 121 * Instances of this class are not threadsafe. 122 * </p> 123 * 124 * <p> 125 * TODO: Update to BZip2 1.0.1 126 * </p> 127 * 128 * @NotThreadSafe 129 */ 130public class BZip2CompressorOutputStream extends CompressorOutputStream<OutputStream> implements BZip2Constants { 131 132 static final class Data { 133 134 // with blockSize 900k 135 /* maps unsigned byte => "does it occur in block" */ 136 final boolean[] inUse = new boolean[256]; // 256 byte 137 final byte[] unseqToSeq = new byte[256]; // 256 byte 138 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 139 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 140 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 141 142 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 143 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 144 // byte 145 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 146 // byte 147 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 148 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 149 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 150 // byte 151 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 152 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 153 154 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 155 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 156 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 157 158 // ------------ 159 // 333408 byte 160 161 /* 162 * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0. 163 */ 164 final byte[] block; // 900021 byte 165 /* 166 * maps index in Burrows-Wheeler transformed block => index of byte in original block 167 */ 168 final int[] fmap; // 3600000 byte 169 final char[] sfmap; // 3600000 byte 170 // ------------ 171 // 8433529 byte 172 // ============ 173 174 /** 175 * Index of original line in Burrows-Wheeler table. 176 * 177 * <p> 178 * This is the index in fmap that points to the last byte of the original data. 179 * </p> 180 */ 181 int origPtr; 182 183 Data(final int blockSize100k) { 184 final int n = blockSize100k * BASEBLOCKSIZE; 185 this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES]; 186 this.fmap = new int[n]; 187 this.sfmap = new char[2 * n]; 188 } 189 190 } 191 192 /** 193 * The minimum supported blocksize {@code == 1}. 194 */ 195 public static final int MIN_BLOCKSIZE = 1; 196 197 /** 198 * The maximum supported blocksize {@code == 9}. 199 */ 200 public static final int MAX_BLOCKSIZE = 9; 201 private static final int GREATER_ICOST = 15; 202 203 private static final int LESSER_ICOST = 0; 204 205 /** 206 * Chooses a blocksize based on the given length of the data to compress. 207 * 208 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns 209 * {@code MAX_BLOCKSIZE} always. 210 * 211 * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}. 212 */ 213 public static int chooseBlockSize(final long inputLength) { 214 return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE; 215 } 216 217 private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { 218 int vec = 0; 219 for (int n = minLen; n <= maxLen; n++) { 220 for (int i = 0; i < alphaSize; i++) { 221 if ((length[i] & 0xff) == n) { 222 code[i] = vec; 223 vec++; 224 } 225 } 226 vec <<= 1; 227 } 228 } 229 230 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { 231 /* 232 * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. 233 */ 234 final int[] heap = dat.heap; 235 final int[] weight = dat.weight; 236 final int[] parent = dat.parent; 237 238 for (int i = alphaSize; --i >= 0;) { 239 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 240 } 241 242 for (boolean tooLong = true; tooLong;) { 243 tooLong = false; 244 245 int nNodes = alphaSize; 246 int nHeap = 0; 247 heap[0] = 0; 248 weight[0] = 0; 249 parent[0] = -2; 250 251 for (int i = 1; i <= alphaSize; i++) { 252 parent[i] = -1; 253 nHeap++; 254 heap[nHeap] = i; 255 256 int zz = nHeap; 257 final int tmp = heap[zz]; 258 while (weight[tmp] < weight[heap[zz >> 1]]) { 259 heap[zz] = heap[zz >> 1]; 260 zz >>= 1; 261 } 262 heap[zz] = tmp; 263 } 264 265 while (nHeap > 1) { 266 final int n1 = heap[1]; 267 heap[1] = heap[nHeap]; 268 nHeap--; 269 270 int yy = 0; 271 int zz = 1; 272 int tmp = heap[1]; 273 274 while (true) { 275 yy = zz << 1; 276 277 if (yy > nHeap) { 278 break; 279 } 280 281 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 282 yy++; 283 } 284 285 if (weight[tmp] < weight[heap[yy]]) { 286 break; 287 } 288 289 heap[zz] = heap[yy]; 290 zz = yy; 291 } 292 293 heap[zz] = tmp; 294 295 final int n2 = heap[1]; 296 heap[1] = heap[nHeap]; 297 nHeap--; 298 299 yy = 0; 300 zz = 1; 301 tmp = heap[1]; 302 303 while (true) { 304 yy = zz << 1; 305 306 if (yy > nHeap) { 307 break; 308 } 309 310 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 311 yy++; 312 } 313 314 if (weight[tmp] < weight[heap[yy]]) { 315 break; 316 } 317 318 heap[zz] = heap[yy]; 319 zz = yy; 320 } 321 322 heap[zz] = tmp; 323 nNodes++; 324 parent[n1] = parent[n2] = nNodes; 325 326 final int weight_n1 = weight[n1]; 327 final int weight_n2 = weight[n2]; 328 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff); 329 330 parent[nNodes] = -1; 331 nHeap++; 332 heap[nHeap] = nNodes; 333 334 tmp = 0; 335 zz = nHeap; 336 tmp = heap[zz]; 337 final int weight_tmp = weight[tmp]; 338 while (weight_tmp < weight[heap[zz >> 1]]) { 339 heap[zz] = heap[zz >> 1]; 340 zz >>= 1; 341 } 342 heap[zz] = tmp; 343 344 } 345 346 for (int i = 1; i <= alphaSize; i++) { 347 int j = 0; 348 int k = i; 349 350 for (int parent_k; (parent_k = parent[k]) >= 0;) { 351 k = parent_k; 352 j++; 353 } 354 355 len[i - 1] = (byte) j; 356 if (j > maxLen) { 357 tooLong = true; 358 } 359 } 360 361 if (tooLong) { 362 for (int i = 1; i < alphaSize; i++) { 363 int j = weight[i] >> 8; 364 j = 1 + (j >> 1); 365 weight[i] = j << 8; 366 } 367 } 368 } 369 } 370 371 /** 372 * Index of the last char in the block, so the block size == last + 1. 373 */ 374 private int last; 375 /** 376 * Always: in the range 0 .. 9. The current block size is 100000 * this number. 377 */ 378 private final int blockSize100k; 379 380 private int bsBuff; 381 382 private int bsLive; 383 384 private final CRC crc = new CRC(); 385 private int nInUse; 386 387 private int nMTF; 388 private int currentChar = -1; 389 private int runLength; 390 391 private int combinedCRC; 392 393 private final int allowableBlockSize; 394 /** 395 * All memory intensive stuff. 396 */ 397 private Data data; 398 399 private BlockSort blockSorter; 400 401 private volatile boolean closed; 402 403 /** 404 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 405 * 406 * @param out the destination stream. 407 * 408 * @throws IOException if an I/O error occurs in the specified stream. 409 * @throws NullPointerException if {@code out == null}. 410 */ 411 public BZip2CompressorOutputStream(final OutputStream out) throws IOException { 412 this(out, MAX_BLOCKSIZE); 413 } 414 415 /** 416 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 417 * 418 * @param out the destination stream. 419 * @param blockSize the blockSize as 100k units. 420 * 421 * @throws IOException if an I/O error occurs in the specified stream. 422 * @throws IllegalArgumentException if {@code (blockSize < 1) || (blockSize > 9)}. 423 * @throws NullPointerException if {@code out == null}. 424 * 425 * @see #MIN_BLOCKSIZE 426 * @see #MAX_BLOCKSIZE 427 */ 428 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 429 super(out); 430 if (blockSize < 1) { 431 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 432 } 433 if (blockSize > 9) { 434 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 435 } 436 this.blockSize100k = blockSize; 437 /* 20 is just a paranoia constant */ 438 this.allowableBlockSize = this.blockSize100k * BASEBLOCKSIZE - 20; 439 init(); 440 } 441 442 private void blockSort() { 443 blockSorter.blockSort(data, last); 444 } 445 446 private void bsFinishedWithStream() throws IOException { 447 while (this.bsLive > 0) { 448 final int ch = this.bsBuff >> 24; 449 this.out.write(ch); // write 8-bit 450 this.bsBuff <<= 8; 451 this.bsLive -= 8; 452 } 453 } 454 455 private void bsPutInt(final int u) throws IOException { 456 bsW(8, u >> 24 & 0xff); 457 bsW(8, u >> 16 & 0xff); 458 bsW(8, u >> 8 & 0xff); 459 bsW(8, u & 0xff); 460 } 461 462 private void bsPutUByte(final int c) throws IOException { 463 bsW(8, c); 464 } 465 466 private void bsW(final int n, final int v) throws IOException { 467 final OutputStream outShadow = this.out; 468 int bsLiveShadow = this.bsLive; 469 int bsBuffShadow = this.bsBuff; 470 471 while (bsLiveShadow >= 8) { 472 outShadow.write(bsBuffShadow >> 24); // write 8-bit 473 bsBuffShadow <<= 8; 474 bsLiveShadow -= 8; 475 } 476 477 this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n; 478 this.bsLive = bsLiveShadow + n; 479 } 480 481 private void checkClosed() throws IOException { 482 if (closed) { 483 throw new IOException("Stream closed"); 484 } 485 } 486 487 @Override 488 public void close() throws IOException { 489 if (!closed) { 490 try { 491 finish(); 492 } finally { 493 IOUtils.close(out); 494 } 495 } 496 } 497 498 private void endBlock() throws IOException { 499 final int blockCRC = this.crc.getValue(); 500 this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31; 501 this.combinedCRC ^= blockCRC; 502 503 // empty block at end of file 504 if (this.last == -1) { 505 return; 506 } 507 508 /* sort the block and establish posn of original string */ 509 blockSort(); 510 511 /* 512 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the 513 * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits, 514 * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal 515 * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files. 516 */ 517 bsPutUByte(0x31); 518 bsPutUByte(0x41); 519 bsPutUByte(0x59); 520 bsPutUByte(0x26); 521 bsPutUByte(0x53); 522 bsPutUByte(0x59); 523 524 /* Now the block's CRC, so it is in a known place. */ 525 bsPutInt(blockCRC); 526 527 /* Now a single bit indicating no randomization. */ 528 bsW(1, 0); 529 530 /* Finally, block's contents proper. */ 531 moveToFrontCodeAndSend(); 532 } 533 534 private void endCompression() throws IOException { 535 /* 536 * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it 537 * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.) 538 */ 539 bsPutUByte(0x17); 540 bsPutUByte(0x72); 541 bsPutUByte(0x45); 542 bsPutUByte(0x38); 543 bsPutUByte(0x50); 544 bsPutUByte(0x90); 545 546 bsPutInt(this.combinedCRC); 547 bsFinishedWithStream(); 548 } 549 550 public void finish() throws IOException { 551 if (!closed) { 552 closed = true; 553 try { 554 if (this.runLength > 0) { 555 writeRun(); 556 } 557 this.currentChar = -1; 558 endBlock(); 559 endCompression(); 560 } finally { 561 this.blockSorter = null; 562 this.data = null; 563 } 564 } 565 } 566 567 @Override 568 public void flush() throws IOException { 569 if (out != null) { 570 super.flush(); 571 } 572 } 573 574 /* 575 * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form. 576 * 577 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 578 */ 579 private void generateMTFValues() { 580 final int lastShadow = this.last; 581 final Data dataShadow = this.data; 582 final boolean[] inUse = dataShadow.inUse; 583 final byte[] block = dataShadow.block; 584 final int[] fmap = dataShadow.fmap; 585 final char[] sfmap = dataShadow.sfmap; 586 final int[] mtfFreq = dataShadow.mtfFreq; 587 final byte[] unseqToSeq = dataShadow.unseqToSeq; 588 final byte[] yy = dataShadow.generateMTFValues_yy; 589 590 // make maps 591 int nInUseShadow = 0; 592 for (int i = 0; i < 256; i++) { 593 if (inUse[i]) { 594 unseqToSeq[i] = (byte) nInUseShadow; 595 nInUseShadow++; 596 } 597 } 598 this.nInUse = nInUseShadow; 599 600 final int eob = nInUseShadow + 1; 601 602 Arrays.fill(mtfFreq, 0, eob + 1, 0); 603 604 for (int i = nInUseShadow; --i >= 0;) { 605 yy[i] = (byte) i; 606 } 607 608 int wr = 0; 609 int zPend = 0; 610 611 for (int i = 0; i <= lastShadow; i++) { 612 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 613 byte tmp = yy[0]; 614 int j = 0; 615 616 while (ll_i != tmp) { 617 j++; 618 final byte tmp2 = tmp; 619 tmp = yy[j]; 620 yy[j] = tmp2; 621 } 622 yy[0] = tmp; 623 624 if (j == 0) { 625 zPend++; 626 } else { 627 if (zPend > 0) { 628 zPend--; 629 while (true) { 630 if ((zPend & 1) == 0) { 631 sfmap[wr] = RUNA; 632 wr++; 633 mtfFreq[RUNA]++; 634 } else { 635 sfmap[wr] = RUNB; 636 wr++; 637 mtfFreq[RUNB]++; 638 } 639 640 if (zPend < 2) { 641 break; 642 } 643 zPend = zPend - 2 >> 1; 644 } 645 zPend = 0; 646 } 647 sfmap[wr] = (char) (j + 1); 648 wr++; 649 mtfFreq[j + 1]++; 650 } 651 } 652 653 if (zPend > 0) { 654 zPend--; 655 while (true) { 656 if ((zPend & 1) == 0) { 657 sfmap[wr] = RUNA; 658 wr++; 659 mtfFreq[RUNA]++; 660 } else { 661 sfmap[wr] = RUNB; 662 wr++; 663 mtfFreq[RUNB]++; 664 } 665 666 if (zPend < 2) { 667 break; 668 } 669 zPend = zPend - 2 >> 1; 670 } 671 } 672 673 sfmap[wr] = (char) eob; 674 mtfFreq[eob]++; 675 this.nMTF = wr + 1; 676 } 677 678 /** 679 * Returns the blocksize parameter specified at construction time. 680 * 681 * @return the blocksize parameter specified at construction time 682 */ 683 public final int getBlockSize() { 684 return this.blockSize100k; 685 } 686 687 /** 688 * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating 689 * blockSize100k. 690 * 691 * @throws IOException if the magic bytes could not been written 692 */ 693 private void init() throws IOException { 694 bsPutUByte('B'); 695 bsPutUByte('Z'); 696 697 this.data = new Data(this.blockSize100k); 698 this.blockSorter = new BlockSort(this.data); 699 700 // huffmanized magic bytes 701 bsPutUByte('h'); 702 bsPutUByte('0' + this.blockSize100k); 703 704 this.combinedCRC = 0; 705 initBlock(); 706 } 707 708 private void initBlock() { 709 // blockNo++; 710 this.crc.reset(); 711 this.last = -1; 712 // ch = 0; 713 714 final boolean[] inUse = this.data.inUse; 715 for (int i = 256; --i >= 0;) { 716 inUse[i] = false; 717 } 718 719 } 720 721 private void moveToFrontCodeAndSend() throws IOException { 722 bsW(24, this.data.origPtr); 723 generateMTFValues(); 724 sendMTFValues(); 725 } 726 727 private void sendMTFValues() throws IOException { 728 final byte[][] len = this.data.sendMTFValues_len; 729 final int alphaSize = this.nInUse + 2; 730 731 for (int t = N_GROUPS; --t >= 0;) { 732 final byte[] len_t = len[t]; 733 for (int v = alphaSize; --v >= 0;) { 734 len_t[v] = GREATER_ICOST; 735 } 736 } 737 738 /* Decide how many coding tables to use */ 739 // assert (this.nMTF > 0) : this.nMTF; 740 final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6; 741 742 /* Generate an initial set of coding tables */ 743 sendMTFValues0(nGroups, alphaSize); 744 745 /* 746 * Iterate up to N_ITERS times to improve the tables. 747 */ 748 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 749 750 /* Compute MTF values for the selectors. */ 751 sendMTFValues2(nGroups, nSelectors); 752 753 /* Assign actual codes for the tables. */ 754 sendMTFValues3(nGroups, alphaSize); 755 756 /* Transmit the mapping table. */ 757 sendMTFValues4(); 758 759 /* Now the selectors. */ 760 sendMTFValues5(nGroups, nSelectors); 761 762 /* Now the coding tables. */ 763 sendMTFValues6(nGroups, alphaSize); 764 765 /* And finally, the block data proper */ 766 sendMTFValues7(); 767 } 768 769 private void sendMTFValues0(final int nGroups, final int alphaSize) { 770 final byte[][] len = this.data.sendMTFValues_len; 771 final int[] mtfFreq = this.data.mtfFreq; 772 773 int remF = this.nMTF; 774 int gs = 0; 775 776 for (int nPart = nGroups; nPart > 0; nPart--) { 777 final int tFreq = remF / nPart; 778 int ge = gs - 1; 779 int aFreq = 0; 780 781 for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) { 782 aFreq += mtfFreq[++ge]; 783 } 784 785 if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) { 786 aFreq -= mtfFreq[ge--]; 787 } 788 789 final byte[] len_np = len[nPart - 1]; 790 for (int v = alphaSize; --v >= 0;) { 791 if (v >= gs && v <= ge) { 792 len_np[v] = LESSER_ICOST; 793 } else { 794 len_np[v] = GREATER_ICOST; 795 } 796 } 797 798 gs = ge + 1; 799 remF -= aFreq; 800 } 801 } 802 803 private int sendMTFValues1(final int nGroups, final int alphaSize) { 804 final Data dataShadow = this.data; 805 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 806 final int[] fave = dataShadow.sendMTFValues_fave; 807 final short[] cost = dataShadow.sendMTFValues_cost; 808 final char[] sfmap = dataShadow.sfmap; 809 final byte[] selector = dataShadow.selector; 810 final byte[][] len = dataShadow.sendMTFValues_len; 811 final byte[] len_0 = len[0]; 812 final byte[] len_1 = len[1]; 813 final byte[] len_2 = len[2]; 814 final byte[] len_3 = len[3]; 815 final byte[] len_4 = len[4]; 816 final byte[] len_5 = len[5]; 817 final int nMTFShadow = this.nMTF; 818 819 int nSelectors = 0; 820 821 for (int iter = 0; iter < N_ITERS; iter++) { 822 for (int t = nGroups; --t >= 0;) { 823 fave[t] = 0; 824 final int[] rfreqt = rfreq[t]; 825 for (int i = alphaSize; --i >= 0;) { 826 rfreqt[i] = 0; 827 } 828 } 829 830 nSelectors = 0; 831 832 for (int gs = 0; gs < this.nMTF;) { 833 // Set group start & end marks. 834 835 // Calculate the cost of this group as coded by each of the 836 // coding tables. 837 838 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 839 840 final byte mask = (byte) 0xff; 841 if (nGroups == N_GROUPS) { 842 // unrolled version of the else-block 843 844 short cost0 = 0; 845 short cost1 = 0; 846 short cost2 = 0; 847 short cost3 = 0; 848 short cost4 = 0; 849 short cost5 = 0; 850 851 for (int i = gs; i <= ge; i++) { 852 final int icv = sfmap[i]; 853 cost0 += (short) (len_0[icv] & mask); 854 cost1 += (short) (len_1[icv] & mask); 855 cost2 += (short) (len_2[icv] & mask); 856 cost3 += (short) (len_3[icv] & mask); 857 cost4 += (short) (len_4[icv] & mask); 858 cost5 += (short) (len_5[icv] & mask); 859 } 860 861 cost[0] = cost0; 862 cost[1] = cost1; 863 cost[2] = cost2; 864 cost[3] = cost3; 865 cost[4] = cost4; 866 cost[5] = cost5; 867 868 } else { 869 for (int t = nGroups; --t >= 0;) { 870 cost[t] = 0; 871 } 872 873 for (int i = gs; i <= ge; i++) { 874 final int icv = sfmap[i]; 875 for (int t = nGroups; --t >= 0;) { 876 cost[t] += (short) (len[t][icv] & mask); 877 } 878 } 879 } 880 881 /* 882 * Find the coding table which is best for this group, and record its identity in the selector table. 883 */ 884 int bt = -1; 885 for (int t = nGroups, bc = 999999999; --t >= 0;) { 886 final int cost_t = cost[t]; 887 if (cost_t < bc) { 888 bc = cost_t; 889 bt = t; 890 } 891 } 892 893 fave[bt]++; 894 selector[nSelectors] = (byte) bt; 895 nSelectors++; 896 897 /* 898 * Increment the symbol frequencies for the selected table. 899 */ 900 final int[] rfreq_bt = rfreq[bt]; 901 for (int i = gs; i <= ge; i++) { 902 rfreq_bt[sfmap[i]]++; 903 } 904 905 gs = ge + 1; 906 } 907 908 /* 909 * Recompute the tables based on the accumulated frequencies. 910 */ 911 for (int t = 0; t < nGroups; t++) { 912 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 913 } 914 } 915 916 return nSelectors; 917 } 918 919 private void sendMTFValues2(final int nGroups, final int nSelectors) { 920 // assert (nGroups < 8) : nGroups; 921 922 final Data dataShadow = this.data; 923 final byte[] pos = dataShadow.sendMTFValues2_pos; 924 925 for (int i = nGroups; --i >= 0;) { 926 pos[i] = (byte) i; 927 } 928 929 for (int i = 0; i < nSelectors; i++) { 930 final byte ll_i = dataShadow.selector[i]; 931 byte tmp = pos[0]; 932 int j = 0; 933 934 while (ll_i != tmp) { 935 j++; 936 final byte tmp2 = tmp; 937 tmp = pos[j]; 938 pos[j] = tmp2; 939 } 940 941 pos[0] = tmp; 942 dataShadow.selectorMtf[i] = (byte) j; 943 } 944 } 945 946 private void sendMTFValues3(final int nGroups, final int alphaSize) { 947 final int[][] code = this.data.sendMTFValues_code; 948 final byte[][] len = this.data.sendMTFValues_len; 949 950 for (int t = 0; t < nGroups; t++) { 951 int minLen = 32; 952 int maxLen = 0; 953 final byte[] len_t = len[t]; 954 for (int i = alphaSize; --i >= 0;) { 955 final int l = len_t[i] & 0xff; 956 if (l > maxLen) { 957 maxLen = l; 958 } 959 if (l < minLen) { 960 minLen = l; 961 } 962 } 963 964 // assert (maxLen <= 20) : maxLen; 965 // assert (minLen >= 1) : minLen; 966 967 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 968 } 969 } 970 971 private void sendMTFValues4() throws IOException { 972 final boolean[] inUse = this.data.inUse; 973 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 974 975 for (int i = 16; --i >= 0;) { 976 inUse16[i] = false; 977 final int i16 = i * 16; 978 for (int j = 16; --j >= 0;) { 979 if (inUse[i16 + j]) { 980 inUse16[i] = true; 981 break; 982 } 983 } 984 } 985 986 for (int i = 0; i < 16; i++) { 987 bsW(1, inUse16[i] ? 1 : 0); 988 } 989 990 final OutputStream outShadow = this.out; 991 int bsLiveShadow = this.bsLive; 992 int bsBuffShadow = this.bsBuff; 993 994 for (int i = 0; i < 16; i++) { 995 if (inUse16[i]) { 996 final int i16 = i * 16; 997 for (int j = 0; j < 16; j++) { 998 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 999 while (bsLiveShadow >= 8) { 1000 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1001 bsBuffShadow <<= 8; 1002 bsLiveShadow -= 8; 1003 } 1004 if (inUse[i16 + j]) { 1005 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1006 } 1007 bsLiveShadow++; 1008 } 1009 } 1010 } 1011 1012 this.bsBuff = bsBuffShadow; 1013 this.bsLive = bsLiveShadow; 1014 } 1015 1016 private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { 1017 bsW(3, nGroups); 1018 bsW(15, nSelectors); 1019 1020 final OutputStream outShadow = this.out; 1021 final byte[] selectorMtf = this.data.selectorMtf; 1022 1023 int bsLiveShadow = this.bsLive; 1024 int bsBuffShadow = this.bsBuff; 1025 1026 for (int i = 0; i < nSelectors; i++) { 1027 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1028 // inlined: bsW(1, 1); 1029 while (bsLiveShadow >= 8) { 1030 outShadow.write(bsBuffShadow >> 24); 1031 bsBuffShadow <<= 8; 1032 bsLiveShadow -= 8; 1033 } 1034 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1035 bsLiveShadow++; 1036 } 1037 1038 // inlined: bsW(1, 0); 1039 while (bsLiveShadow >= 8) { 1040 outShadow.write(bsBuffShadow >> 24); 1041 bsBuffShadow <<= 8; 1042 bsLiveShadow -= 8; 1043 } 1044 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1045 bsLiveShadow++; 1046 } 1047 1048 this.bsBuff = bsBuffShadow; 1049 this.bsLive = bsLiveShadow; 1050 } 1051 1052 private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { 1053 final byte[][] len = this.data.sendMTFValues_len; 1054 final OutputStream outShadow = this.out; 1055 1056 int bsLiveShadow = this.bsLive; 1057 int bsBuffShadow = this.bsBuff; 1058 1059 for (int t = 0; t < nGroups; t++) { 1060 final byte[] len_t = len[t]; 1061 int curr = len_t[0] & 0xff; 1062 1063 // inlined: bsW(5, curr); 1064 while (bsLiveShadow >= 8) { 1065 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1066 bsBuffShadow <<= 8; 1067 bsLiveShadow -= 8; 1068 } 1069 bsBuffShadow |= curr << 32 - bsLiveShadow - 5; 1070 bsLiveShadow += 5; 1071 1072 for (int i = 0; i < alphaSize; i++) { 1073 final int lti = len_t[i] & 0xff; 1074 while (curr < lti) { 1075 // inlined: bsW(2, 2); 1076 while (bsLiveShadow >= 8) { 1077 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1078 bsBuffShadow <<= 8; 1079 bsLiveShadow -= 8; 1080 } 1081 bsBuffShadow |= 2 << 32 - bsLiveShadow - 2; 1082 bsLiveShadow += 2; 1083 1084 curr++; /* 10 */ 1085 } 1086 1087 while (curr > lti) { 1088 // inlined: bsW(2, 3); 1089 while (bsLiveShadow >= 8) { 1090 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1091 bsBuffShadow <<= 8; 1092 bsLiveShadow -= 8; 1093 } 1094 bsBuffShadow |= 3 << 32 - bsLiveShadow - 2; 1095 bsLiveShadow += 2; 1096 1097 curr--; /* 11 */ 1098 } 1099 1100 // inlined: bsW(1, 0); 1101 while (bsLiveShadow >= 8) { 1102 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1103 bsBuffShadow <<= 8; 1104 bsLiveShadow -= 8; 1105 } 1106 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1107 bsLiveShadow++; 1108 } 1109 } 1110 1111 this.bsBuff = bsBuffShadow; 1112 this.bsLive = bsLiveShadow; 1113 } 1114 1115 private void sendMTFValues7() throws IOException { 1116 final Data dataShadow = this.data; 1117 final byte[][] len = dataShadow.sendMTFValues_len; 1118 final int[][] code = dataShadow.sendMTFValues_code; 1119 final OutputStream outShadow = this.out; 1120 final byte[] selector = dataShadow.selector; 1121 final char[] sfmap = dataShadow.sfmap; 1122 final int nMTFShadow = this.nMTF; 1123 1124 int selCtr = 0; 1125 1126 int bsLiveShadow = this.bsLive; 1127 int bsBuffShadow = this.bsBuff; 1128 1129 for (int gs = 0; gs < nMTFShadow;) { 1130 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1131 final int selector_selCtr = selector[selCtr] & 0xff; 1132 final int[] code_selCtr = code[selector_selCtr]; 1133 final byte[] len_selCtr = len[selector_selCtr]; 1134 1135 while (gs <= ge) { 1136 final int sfmap_i = sfmap[gs]; 1137 1138 // 1139 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1140 // code_selCtr[sfmap_i]); 1141 // 1142 while (bsLiveShadow >= 8) { 1143 outShadow.write(bsBuffShadow >> 24); 1144 bsBuffShadow <<= 8; 1145 bsLiveShadow -= 8; 1146 } 1147 final int n = len_selCtr[sfmap_i] & 0xFF; 1148 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n; 1149 bsLiveShadow += n; 1150 1151 gs++; 1152 } 1153 1154 gs = ge + 1; 1155 selCtr++; 1156 } 1157 1158 this.bsBuff = bsBuffShadow; 1159 this.bsLive = bsLiveShadow; 1160 } 1161 1162 @Override 1163 public void write(final byte[] buf, int offs, final int len) throws IOException { 1164 if (offs < 0) { 1165 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 1166 } 1167 if (len < 0) { 1168 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 1169 } 1170 if (offs + len > buf.length) { 1171 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); 1172 } 1173 checkClosed(); 1174 for (final int hi = offs + len; offs < hi;) { 1175 write0(buf[offs++]); 1176 } 1177 } 1178 1179 @Override 1180 public void write(final int b) throws IOException { 1181 checkClosed(); 1182 write0(b); 1183 } 1184 1185 /** 1186 * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm. 1187 */ 1188 private void write0(int b) throws IOException { 1189 if (this.currentChar != -1) { 1190 b &= 0xff; 1191 if (this.currentChar == b) { 1192 if (++this.runLength > 254) { 1193 writeRun(); 1194 this.currentChar = -1; 1195 this.runLength = 0; 1196 } 1197 // else nothing to do 1198 } else { 1199 writeRun(); 1200 this.runLength = 1; 1201 this.currentChar = b; 1202 } 1203 } else { 1204 this.currentChar = b & 0xff; 1205 this.runLength++; 1206 } 1207 } 1208 1209 /** 1210 * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four 1211 * identical bytes). 1212 * 1213 * <p> 1214 * Flushes the current block before writing data if it is full. 1215 * </p> 1216 * 1217 * <p> 1218 * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last 1219 * to point to the last index written minus 1. 1220 * </p> 1221 */ 1222 private void writeRun() throws IOException { 1223 final int lastShadow = this.last; 1224 1225 if (lastShadow < this.allowableBlockSize) { 1226 final int currentCharShadow = this.currentChar; 1227 final Data dataShadow = this.data; 1228 dataShadow.inUse[currentCharShadow] = true; 1229 final byte ch = (byte) currentCharShadow; 1230 1231 int runLengthShadow = this.runLength; 1232 this.crc.update(currentCharShadow, runLengthShadow); 1233 1234 switch (runLengthShadow) { 1235 case 1: 1236 dataShadow.block[lastShadow + 2] = ch; 1237 this.last = lastShadow + 1; 1238 break; 1239 1240 case 2: 1241 dataShadow.block[lastShadow + 2] = ch; 1242 dataShadow.block[lastShadow + 3] = ch; 1243 this.last = lastShadow + 2; 1244 break; 1245 1246 case 3: { 1247 final byte[] block = dataShadow.block; 1248 block[lastShadow + 2] = ch; 1249 block[lastShadow + 3] = ch; 1250 block[lastShadow + 4] = ch; 1251 this.last = lastShadow + 3; 1252 } 1253 break; 1254 1255 default: { 1256 runLengthShadow -= 4; 1257 dataShadow.inUse[runLengthShadow] = true; 1258 final byte[] block = dataShadow.block; 1259 block[lastShadow + 2] = ch; 1260 block[lastShadow + 3] = ch; 1261 block[lastShadow + 4] = ch; 1262 block[lastShadow + 5] = ch; 1263 block[lastShadow + 6] = (byte) runLengthShadow; 1264 this.last = lastShadow + 5; 1265 } 1266 break; 1267 1268 } 1269 } else { 1270 endBlock(); 1271 initBlock(); 1272 writeRun(); 1273 } 1274 } 1275 1276}