PopulationCodec.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */
  17. package org.apache.commons.compress.harmony.pack200;

  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.util.Arrays;

  21. /**
  22.  * A PopulationCodec is a Codec that is well suited to encoding data that shows statistical or repetitive patterns, containing for example a few numbers which
  23.  * are repeated a lot throughout the set, but not necessarily sequentially.
  24.  */
  25. public class PopulationCodec extends Codec {

  26.     private final Codec favouredCodec;
  27.     private Codec tokenCodec;
  28.     private final Codec unfavouredCodec;
  29.     private int l;
  30.     private int[] favoured;

  31.     public PopulationCodec(final Codec favouredCodec, final Codec tokenCodec, final Codec unvafouredCodec) {
  32.         this.favouredCodec = favouredCodec;
  33.         this.tokenCodec = tokenCodec;
  34.         this.unfavouredCodec = unvafouredCodec;
  35.     }

  36.     public PopulationCodec(final Codec favouredCodec, final int l, final Codec unfavouredCodec) {
  37.         if (l >= 256 || l <= 0) {
  38.             throw new IllegalArgumentException("L must be between 1..255");
  39.         }
  40.         this.favouredCodec = favouredCodec;
  41.         this.l = l;
  42.         this.unfavouredCodec = unfavouredCodec;
  43.     }

  44.     @Override
  45.     public int decode(final InputStream in) throws IOException, Pack200Exception {
  46.         throw new Pack200Exception("Population encoding does not work unless the number of elements are known");
  47.     }

  48.     @Override
  49.     public int decode(final InputStream in, final long last) throws IOException, Pack200Exception {
  50.         throw new Pack200Exception("Population encoding does not work unless the number of elements are known");
  51.     }

  52.     @Override
  53.     public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
  54.         lastBandLength = 0;
  55.         favoured = new int[check(n, in)]; // there must be <= n values, but probably a lot
  56.         // less
  57.         int[] result;
  58.         // read table of favorites first
  59.         int smallest = Integer.MAX_VALUE, absoluteSmallest;
  60.         int last = 0;
  61.         int value = 0, absoluteValue;
  62.         int k = -1;
  63.         while (true) {
  64.             value = favouredCodec.decode(in, last);
  65.             if (k > -1 && (value == smallest || value == last)) {
  66.                 break;
  67.             }
  68.             favoured[++k] = value;
  69.             absoluteSmallest = Math.abs(smallest);
  70.             absoluteValue = Math.abs(value);
  71.             if (absoluteSmallest > absoluteValue) {
  72.                 smallest = value;
  73.             } else if (absoluteSmallest == absoluteValue) {
  74.                 // ensure that -X and +X -> +X
  75.                 smallest = absoluteSmallest;
  76.             }
  77.             last = value;
  78.         }
  79.         lastBandLength += k;
  80.         // if tokenCodec needs to be derived from the T, L and K values
  81.         if (tokenCodec == null) {
  82.             if (k < 256) {
  83.                 tokenCodec = BYTE1;
  84.             } else {
  85.                 // if k >= 256, b >= 2
  86.                 int b = 1;
  87.                 BHSDCodec codec;
  88.                 while (++b < 5) {
  89.                     codec = new BHSDCodec(b, 256 - l, 0);
  90.                     if (codec.encodes(k)) {
  91.                         tokenCodec = codec;
  92.                         break;
  93.                     }
  94.                 }
  95.                 if (tokenCodec == null) {
  96.                     throw new Pack200Exception("Cannot calculate token codec from " + k + " and " + l);
  97.                 }
  98.             }
  99.         }
  100.         // read favorites
  101.         lastBandLength += n;
  102.         result = tokenCodec.decodeInts(n, in);
  103.         // read unfavorites
  104.         last = 0;
  105.         for (int i = 0; i < n; i++) {
  106.             final int index = result[i];
  107.             if (index == 0) {
  108.                 lastBandLength++;
  109.                 result[i] = last = unfavouredCodec.decode(in, last);
  110.             } else {
  111.                 result[i] = favoured[index - 1];
  112.             }
  113.         }
  114.         return result;
  115.     }

  116.     @Override
  117.     public byte[] encode(final int value) throws Pack200Exception {
  118.         throw new Pack200Exception("Population encoding does not work unless the number of elements are known");
  119.     }

  120.     @Override
  121.     public byte[] encode(final int value, final int last) throws Pack200Exception {
  122.         throw new Pack200Exception("Population encoding does not work unless the number of elements are known");
  123.     }

  124.     public byte[] encode(final int[] favoured, final int[] tokens, final int[] unfavoured) throws Pack200Exception {
  125.         final int[] favoured2 = Arrays.copyOf(favoured, favoured.length + 1);
  126.         favoured2[favoured2.length - 1] = favoured[favoured.length - 1]; // repeat last value;
  127.         final byte[] favouredEncoded = favouredCodec.encode(favoured2);
  128.         final byte[] tokensEncoded = tokenCodec.encode(tokens);
  129.         final byte[] unfavouredEncoded = unfavouredCodec.encode(unfavoured);
  130.         final byte[] band = new byte[favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length];
  131.         System.arraycopy(favouredEncoded, 0, band, 0, favouredEncoded.length);
  132.         System.arraycopy(tokensEncoded, 0, band, favouredEncoded.length, tokensEncoded.length);
  133.         System.arraycopy(unfavouredEncoded, 0, band, favouredEncoded.length + tokensEncoded.length, unfavouredEncoded.length);
  134.         return band;
  135.     }

  136.     public int[] getFavoured() {
  137.         return favoured;
  138.     }

  139.     public Codec getFavouredCodec() {
  140.         return favouredCodec;
  141.     }

  142.     public Codec getTokenCodec() {
  143.         return tokenCodec;
  144.     }

  145.     public Codec getUnfavouredCodec() {
  146.         return unfavouredCodec;
  147.     }
  148. }