001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.stream.IntStream; 027 028/** 029 * Abstract superclass for a set of bands 030 */ 031public abstract class BandSet { 032 033 /** 034 * Results obtained by trying different Codecs to encode a band 035 */ 036 public class BandAnalysisResults { 037 038 // The number of Codecs tried so far 039 private int numCodecsTried; 040 041 // The number of bytes saved by using betterCodec instead of the default codec 042 private int saved; 043 044 // Extra metadata to pass to the segment header (to be appended to the 045 // band_headers band) 046 private int[] extraMetadata; 047 048 // The results of encoding the band with betterCodec 049 private byte[] encodedBand; 050 051 // The best Codec found so far, or should be null if the default is the 052 // best so far 053 private Codec betterCodec; 054 055 } 056 057 /** 058 * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate whether an alternative Codec could make 059 * the encoded band smaller. 060 */ 061 public class BandData { 062 063 private final int[] band; 064 private int smallest = Integer.MAX_VALUE; 065 private int largest = Integer.MIN_VALUE; 066 private int smallestDelta; 067 private int largestDelta; 068 069 private int deltaIsAscending; 070 private int smallDeltaCount; 071 072 private double averageAbsoluteDelta; 073 private double averageAbsoluteValue; 074 075 private Map<Integer, Integer> distinctValues; 076 077 /** 078 * Constructs a new instance of BandData. The band is then analysed. 079 * 080 * @param band - the band of integers 081 */ 082 public BandData(final int[] band) { 083 this.band = band; 084 final Integer one = Integer.valueOf(1); 085 for (int i = 0; i < band.length; i++) { 086 if (band[i] < smallest) { 087 smallest = band[i]; 088 } 089 if (band[i] > largest) { 090 largest = band[i]; 091 } 092 if (i != 0) { 093 final int delta = band[i] - band[i - 1]; 094 if (delta < smallestDelta) { 095 smallestDelta = delta; 096 } 097 if (delta > largestDelta) { 098 largestDelta = delta; 099 } 100 if (delta >= 0) { 101 deltaIsAscending++; 102 } 103 averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1); 104 if (Math.abs(delta) < 256) { 105 smallDeltaCount++; 106 } 107 } else { 108 smallestDelta = band[0]; 109 largestDelta = band[0]; 110 } 111 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length; 112 if (effort > 3) { // do calculations needed to consider population codec 113 if (distinctValues == null) { 114 distinctValues = new HashMap<>(); 115 } 116 final Integer value = Integer.valueOf(band[i]); 117 Integer count = distinctValues.get(value); 118 if (count == null) { 119 count = one; 120 } else { 121 count = Integer.valueOf(count.intValue() + 1); 122 } 123 distinctValues.put(value, count); 124 } 125 } 126 } 127 128 /** 129 * Returns true if any band elements are negative. 130 * 131 * @return true if any band elements are negative. 132 */ 133 public boolean anyNegatives() { 134 return smallest < 0; 135 } 136 137 /** 138 * Returns true if the band deltas are mainly positive (heuristic). 139 * 140 * @return true if the band deltas are mainly positive (heuristic). 141 */ 142 public boolean mainlyPositiveDeltas() { 143 // Note: the value below has been tuned - please test carefully if changing it 144 return (float) deltaIsAscending / (float) band.length > 0.95F; 145 } 146 147 /** 148 * Returns true if the deltas between adjacent band elements are mainly small (heuristic). 149 * 150 * @return true if the deltas between adjacent band elements are mainly small (heuristic). 151 */ 152 public boolean mainlySmallDeltas() { 153 // Note: the value below has been tuned - please test carefully if changing it 154 return (float) smallDeltaCount / (float) band.length > 0.7F; 155 } 156 157 /** 158 * Returns the total number of distinct values found in the band. 159 * 160 * @return the total number of distinct values found in the band. 161 */ 162 public int numDistinctValues() { 163 if (distinctValues == null) { 164 return band.length; 165 } 166 return distinctValues.size(); 167 } 168 169 /** 170 * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 171 * 172 * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 173 */ 174 public boolean wellCorrelated() { 175 // Note: the value below has been tuned - please test carefully if changing it 176 return averageAbsoluteDelta * 3.1 < averageAbsoluteValue; 177 } 178 179 } 180 181 private static final byte[] EMPTY_BYTE_ARRAY = {}; 182 183 // Minimum size of band for each effort level where we consider alternative codecs 184 // Note: these values have been tuned - please test carefully if changing them 185 private static final int[] effortThresholds = { 0, 0, 1000, 500, 100, 100, 100, 100, 100, 0 }; 186 187 protected final SegmentHeader segmentHeader; 188 final int effort; 189 190 private long[] canonicalLargest; 191 192 private long[] canonicalSmallest; 193 194 /** 195 * Constructs a new BandSet. 196 * 197 * @param effort - the packing effort to be used (must be 1-9) 198 * @param header - the segment header 199 */ 200 public BandSet(final int effort, final SegmentHeader header) { 201 this.effort = effort; 202 this.segmentHeader = header; 203 } 204 205 private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) throws Pack200Exception { 206 207 final BandAnalysisResults results = new BandAnalysisResults(); 208 209 if (canonicalLargest == null) { 210 canonicalLargest = new long[116]; 211 canonicalSmallest = new long[116]; 212 for (int i = 1; i < canonicalLargest.length; i++) { 213 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest(); 214 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest(); 215 } 216 } 217 final BandData bandData = new BandData(band); 218 219 // Check that there is a reasonable saving to be made 220 final byte[] encoded = defaultCodec.encode(band); 221 results.encodedBand = encoded; 222 223 // Note: these values have been tuned - please test carefully if changing them 224 if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak 225 return results; 226 } 227 228 // Check if we can use BYTE1 as that's a 1:1 mapping if we can 229 if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) { 230 results.encodedBand = Codec.BYTE1.encode(band); 231 results.betterCodec = Codec.BYTE1; 232 return results; 233 } 234 235 // Consider a population codec (but can't be nested) 236 if (effort > 3 && !name.equals("POPULATION")) { 237 final int numDistinctValues = bandData.numDistinctValues(); 238 final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length; 239 240 // Note: these values have been tuned - please test carefully if changing them 241 if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 || effort > 6 && distinctValuesAsProportion < 0.04) { // TODO: tweak 242 encodeWithPopulationCodec(name, band, defaultCodec, bandData, results); 243 if (timeToStop(results)) { 244 return results; 245 } 246 } 247 } 248 249 final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>(); 250 251 // See if the deltas are mainly small increments 252 if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) { 253 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2); 254 } 255 256 if (bandData.wellCorrelated()) { // Try delta encodings 257 if (bandData.mainlyPositiveDeltas()) { 258 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 259 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 260 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 261 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 262 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 263 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 264 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 265 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 266 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 267 } else { 268 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 269 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 270 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 271 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 272 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 273 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 274 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 275 } 276 } else if (bandData.anyNegatives()) { 277 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 278 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 279 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 280 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 281 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 282 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 283 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 284 } else { 285 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 286 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 287 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 288 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 289 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 290 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 291 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 292 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 293 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 294 } 295 if (name.equalsIgnoreCase("cpint")) { 296 System.out.print(""); 297 } 298 299 for (final BHSDCodec[] family : codecFamiliesToTry) { 300 tryCodecs(name, band, defaultCodec, bandData, results, encoded, family); 301 if (timeToStop(results)) { 302 break; 303 } 304 } 305 306 return results; 307 } 308 309 /** 310 * Converts a list of ConstantPoolEntrys to an int[] array of their indices 311 * 312 * @param list conversion source. 313 * @return conversion result. 314 */ 315 protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) { 316 final int[] array = new int[list.size()]; 317 for (int i = 0; i < array.length; i++) { 318 array[i] = list.get(i).getIndex(); 319 if (array[i] < 0) { 320 throw new IllegalArgumentException("Index should be > 0"); 321 } 322 } 323 return array; 324 } 325 326 /** 327 * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls) 328 * 329 * @param list conversion source. 330 * @return conversion result. 331 */ 332 protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) { 333 final int[] array = new int[list.size()]; 334 for (int j = 0; j < array.length; j++) { 335 final ConstantPoolEntry cpEntry = list.get(j); 336 array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1; 337 if (cpEntry != null && cpEntry.getIndex() < 0) { 338 throw new IllegalArgumentException("Index should be > 0"); 339 } 340 } 341 return array; 342 } 343 344 /** 345 * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater than 1. 346 * 347 * @param name - name of the band (used for debugging) 348 * @param ints - the band 349 * @param defaultCodec - the default Codec 350 * @return the encoded band 351 * @throws Pack200Exception TODO 352 */ 353 public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) throws Pack200Exception { 354 byte[] encodedBand = null; 355 // Useful for debugging 356// if (ints.length > 0) { 357// System.out.println("encoding " + name + " " + ints.length); 358// } 359 if (effort > 1 && ints.length >= effortThresholds[effort]) { 360 final BandAnalysisResults results = analyseBand(name, ints, defaultCodec); 361 final Codec betterCodec = results.betterCodec; 362 encodedBand = results.encodedBand; 363 if (betterCodec != null) { 364 if (betterCodec instanceof BHSDCodec) { 365 final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec); 366 int specifier = specifierBand[0]; 367 if (specifierBand.length > 1) { 368 for (int i = 1; i < specifierBand.length; i++) { 369 segmentHeader.appendBandCodingSpecifier(specifierBand[i]); 370 } 371 } 372 if (defaultCodec.isSigned()) { 373 specifier = -1 - specifier; 374 } else { 375 specifier += defaultCodec.getL(); 376 } 377 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 378 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 379 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 380 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 381 return band; 382 } 383 if (betterCodec instanceof PopulationCodec) { 384 IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier); 385 return encodedBand; 386 } 387 if (betterCodec instanceof RunCodec) { 388 389 } 390 } 391 } 392 393 // If we get here then we've decided to use the default codec. 394 if (ints.length > 0) { 395 if (encodedBand == null) { 396 encodedBand = defaultCodec.encode(ints); 397 } 398 final int first = ints[0]; 399 if (defaultCodec.getB() != 1) { 400 if (defaultCodec.isSigned() && first >= -256 && first <= -1) { 401 final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec); 402 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 403 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 404 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 405 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 406 return band; 407 } 408 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) { 409 final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL(); 410 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 411 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 412 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 413 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 414 return band; 415 } 416 } 417 return encodedBand; 418 } 419 return EMPTY_BYTE_ARRAY; 420 } 421 422 /** 423 * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands 424 * 425 * @param name - name of the band (for debugging purposes) 426 * @param flags - the band 427 * @param loCodec - Codec for the low 32-bits band 428 * @param hiCodec - Codec for the high 32-bits band 429 * @param haveHiFlags - ignores the high band if true as all values would be zero 430 * @return the encoded band 431 * @throws Pack200Exception TODO 432 */ 433 protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags) 434 throws Pack200Exception { 435 if (!haveHiFlags) { 436 final int[] loBits = new int[flags.length]; 437 Arrays.setAll(loBits, i -> (int) flags[i]); 438 return encodeBandInt(name, loBits, loCodec); 439 } 440 final int[] hiBits = new int[flags.length]; 441 final int[] loBits = new int[flags.length]; 442 for (int i = 0; i < flags.length; i++) { 443 final long l = flags[i]; 444 hiBits[i] = (int) (l >> 32); 445 loBits[i] = (int) l; 446 } 447 final byte[] hi = encodeBandInt(name, hiBits, hiCodec); 448 final byte[] lo = encodeBandInt(name, loBits, loCodec); 449 final byte[] total = new byte[hi.length + lo.length]; 450 System.arraycopy(hi, 0, total, 0, hi.length); 451 System.arraycopy(lo, 0, total, hi.length + 1, lo.length); 452 return total; 453 } 454 455// This could be useful if further enhancements are done but is not currently used 456// 457// private void encodeWithRunCodec(String name, int[] band, int index, 458// BHSDCodec defaultCodec, BandData bandData, 459// BandAnalysisResults results) throws Pack200Exception { 460// int[] firstBand = new int[index]; 461// int[] secondBand = new int[band.length - index]; 462// System.arraycopy(band, 0, firstBand, 0, index); 463// System.arraycopy(band, index, secondBand, 0, secondBand.length); 464// BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec); 465// BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec); 466// int specifier = 117; 467// byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 468// int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual 469// if (totalLength < results.encodedBand.length) { 470// System.out.println("using run codec"); 471// results.saved += results.encodedBand.length - totalLength; 472// byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length]; 473// System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length); 474// System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length); 475// System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, 476// secondResults.encodedBand.length); 477// results.encodedBand = encodedBand; 478// results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec); 479// } 480// } 481 482 protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags) 483 throws Pack200Exception { 484 return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags); 485 } 486 487 /** 488 * Encode a single value with the given Codec 489 * 490 * @param value - the value to encode 491 * @param codec - Codec to use 492 * @return the encoded value 493 * @throws Pack200Exception TODO 494 */ 495 public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception { 496 return codec.encode(value); 497 } 498 499 /** 500 * Encode a band without considering other Codecs 501 * 502 * @param band - the band 503 * @param codec - the Codec to use 504 * @return the encoded band 505 * @throws Pack200Exception TODO 506 */ 507 public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception { 508 return codec.encode(band); 509 } 510 511 private void encodeWithPopulationCodec(final String name, final int[] band, final BHSDCodec defaultCodec, final BandData bandData, 512 final BandAnalysisResults results) throws Pack200Exception { 513 results.numCodecsTried += 3; // quite a bit more effort to try this codec 514 final Map<Integer, Integer> distinctValues = bandData.distinctValues; 515 516 final List<Integer> favored = new ArrayList<>(); 517 distinctValues.forEach((k, v) -> { 518 if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak 519 favored.add(k); 520 } 521 }); 522 523 // Sort the favored list with the most commonly occurring first 524 if (distinctValues.size() > 255) { 525 favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0))); 526 } 527 528 final Map<Integer, Integer> favoredToIndex = new HashMap<>(); 529 for (int i = 0; i < favored.size(); i++) { 530 favoredToIndex.put(favored.get(i), Integer.valueOf(i)); 531 } 532 533 final IntList unfavoured = new IntList(); 534 final int[] tokens = new int[band.length]; 535 for (int i = 0; i < band.length; i++) { 536 final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i])); 537 if (favouredIndex == null) { 538 tokens[i] = 0; 539 unfavoured.add(band[i]); 540 } else { 541 tokens[i] = favouredIndex.intValue() + 1; 542 } 543 } 544 favored.add(favored.get(favored.size() - 1)); // repeat last value 545 final int[] favouredBand = integerListToArray(favored); 546 final int[] unfavouredBand = unfavoured.toArray(); 547 548 // Analyse the three bands to get the best codec 549 final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec); 550 final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec); 551 552 int tdefL = 0; 553 int l = 0; 554 Codec tokenCodec = null; 555 byte[] tokensEncoded; 556 final int k = favored.size() - 1; 557 if (k < 256) { 558 tdefL = 1; 559 tokensEncoded = Codec.BYTE1.encode(tokens); 560 } else { 561 final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec); 562 tokenCodec = tokenResults.betterCodec; 563 tokensEncoded = tokenResults.encodedBand; 564 if (tokenCodec == null) { 565 tokenCodec = defaultCodec; 566 } 567 l = ((BHSDCodec) tokenCodec).getL(); 568 final int h = ((BHSDCodec) tokenCodec).getH(); 569 final int s = ((BHSDCodec) tokenCodec).getS(); 570 final int b = ((BHSDCodec) tokenCodec).getB(); 571 final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0; 572 if (s == 0 && d == 0) { 573 boolean canUseTDefL = true; 574 if (b > 1) { 575 final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h); 576 if (oneLowerB.largest() >= k) { 577 canUseTDefL = false; 578 } 579 } 580 if (canUseTDefL) { 581 switch (l) { 582 case 4: 583 tdefL = 1; 584 break; 585 case 8: 586 tdefL = 2; 587 break; 588 case 16: 589 tdefL = 3; 590 break; 591 case 32: 592 tdefL = 4; 593 break; 594 case 64: 595 tdefL = 5; 596 break; 597 case 128: 598 tdefL = 6; 599 break; 600 case 192: 601 tdefL = 7; 602 break; 603 case 224: 604 tdefL = 8; 605 break; 606 case 240: 607 tdefL = 9; 608 break; 609 case 248: 610 tdefL = 10; 611 break; 612 case 252: 613 tdefL = 11; 614 break; 615 } 616 } 617 } 618 } 619 620 final byte[] favouredEncoded = favouredResults.encodedBand; 621 final byte[] unfavouredEncoded = unfavouredResults.encodedBand; 622 final Codec favouredCodec = favouredResults.betterCodec; 623 final Codec unfavouredCodec = unfavouredResults.betterCodec; 624 625 int specifier = 141 + (favouredCodec == null ? 1 : 0) + 4 * tdefL + (unfavouredCodec == null ? 2 : 0); 626 final IntList extraBandMetadata = new IntList(3); 627 if (favouredCodec != null) { 628 IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add); 629 } 630 if (tdefL == 0) { 631 IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add); 632 } 633 if (unfavouredCodec != null) { 634 IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add); 635 } 636 final int[] extraMetadata = extraBandMetadata.toArray(); 637 final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata); 638 if (defaultCodec.isSigned()) { 639 specifier = -1 - specifier; 640 } else { 641 specifier += defaultCodec.getL(); 642 } 643 final byte[] firstValueEncoded = defaultCodec.encode(new int[] { specifier }); 644 final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length; 645 646 if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) { 647 results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length); 648 final byte[] encodedBand = new byte[totalBandLength]; 649 System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length); 650 System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length); 651 System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, tokensEncoded.length); 652 System.arraycopy(unfavouredEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, 653 unfavouredEncoded.length); 654 results.encodedBand = encodedBand; 655 results.extraMetadata = extraMetadata; 656 if (l != 0) { 657 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec); 658 } else { 659 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec); 660 } 661 } 662 } 663 664 /* 665 * Flatten a 2-dimension array into a 1-dimension array 666 */ 667 private long[] flatten(final long[][] flags) { 668 int totalSize = 0; 669 for (final long[] flag : flags) { 670 totalSize += flag.length; 671 } 672 final long[] flatArray = new long[totalSize]; 673 int index = 0; 674 for (final long[] flag : flags) { 675 for (final long element : flag) { 676 flatArray[index] = element; 677 index++; 678 } 679 } 680 return flatArray; 681 } 682 683 /** 684 * Converts a list of Integers to an int[] array. 685 * 686 * @param integerList conversion source. 687 * @return conversion result. 688 */ 689 protected int[] integerListToArray(final List<Integer> integerList) { 690 return integerList.stream().mapToInt(Integer::intValue).toArray(); 691 } 692 693 /** 694 * Converts a list of Longs to an long[] array. 695 * 696 * @param longList conversion source. 697 * @return conversion result. 698 */ 699 protected long[] longListToArray(final List<Long> longList) { 700 return longList.stream().mapToLong(Long::longValue).toArray(); 701 } 702 703 /** 704 * Write the packed set of bands to the given output stream 705 * 706 * @param out TODO 707 * @throws IOException If an I/O error occurs. 708 * @throws Pack200Exception TODO 709 */ 710 public abstract void pack(OutputStream out) throws IOException, Pack200Exception; 711 712 private boolean timeToStop(final BandAnalysisResults results) { 713 // if tried more than effort number of codecs for this band then return 714 // Note: these values have been tuned - please test carefully if changing them 715 if (effort > 6) { 716 return results.numCodecsTried >= effort * 2; 717 } 718 return results.numCodecsTried >= effort; 719 // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 % 720 // || (float) results.saved/(float) results.encodedBand.length > (float) effort * 2/100; 721 } 722 723 private void tryCodecs(final String name, final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results, 724 final byte[] encoded, final BHSDCodec[] potentialCodecs) throws Pack200Exception { 725 for (final BHSDCodec potential : potentialCodecs) { 726 if (potential.equals(defaultCodec)) { 727 return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there 728 // won't be any savings 729 } 730 if (potential.isDelta()) { 731 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta && potential.largest() >= bandData.largest 732 && potential.smallest() <= bandData.smallest) { 733 // TODO: can represent some negative deltas with overflow 734 final byte[] encoded2 = potential.encode(band); 735 results.numCodecsTried++; 736 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 737 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 738 if (saved > results.saved) { 739 results.betterCodec = potential; 740 results.encodedBand = encoded2; 741 results.saved = saved; 742 } 743 } 744 } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 745 final byte[] encoded2 = potential.encode(band); 746 results.numCodecsTried++; 747 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 748 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 749 if (saved > results.saved) { 750 results.betterCodec = potential; 751 results.encodedBand = encoded2; 752 results.saved = saved; 753 } 754 } 755 if (timeToStop(results)) { 756 return; 757 } 758 } 759 } 760 761}