001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.harmony.pack200; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.Arrays; 024 025/** 026 * A PopulationCodec is a Codec that is well suited to encoding data that shows statistical or repetitive patterns, containing for example a few numbers which 027 * are repeated a lot throughout the set, but not necessarily sequentially. 028 */ 029public class PopulationCodec extends Codec { 030 031 private final Codec favouredCodec; 032 private Codec tokenCodec; 033 private final Codec unfavouredCodec; 034 private int l; 035 private int[] favoured; 036 037 public PopulationCodec(final Codec favouredCodec, final Codec tokenCodec, final Codec unvafouredCodec) { 038 this.favouredCodec = favouredCodec; 039 this.tokenCodec = tokenCodec; 040 this.unfavouredCodec = unvafouredCodec; 041 } 042 043 public PopulationCodec(final Codec favouredCodec, final int l, final Codec unfavouredCodec) { 044 if (l >= 256 || l <= 0) { 045 throw new IllegalArgumentException("L must be between 1..255"); 046 } 047 this.favouredCodec = favouredCodec; 048 this.l = l; 049 this.unfavouredCodec = unfavouredCodec; 050 } 051 052 @Override 053 public int decode(final InputStream in) throws IOException, Pack200Exception { 054 throw new Pack200Exception("Population encoding does not work unless the number of elements are known"); 055 } 056 057 @Override 058 public int decode(final InputStream in, final long last) throws IOException, Pack200Exception { 059 throw new Pack200Exception("Population encoding does not work unless the number of elements are known"); 060 } 061 062 @Override 063 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception { 064 lastBandLength = 0; 065 favoured = new int[check(n, in)]; // there must be <= n values, but probably a lot 066 // less 067 final int[] result; 068 // read table of favorites first 069 int smallest = Integer.MAX_VALUE; 070 int absoluteSmallest; 071 int last = 0; 072 int value = 0; 073 int absoluteValue; 074 int k = -1; 075 while (true) { 076 value = favouredCodec.decode(in, last); 077 if (k > -1 && (value == smallest || value == last)) { 078 break; 079 } 080 favoured[++k] = value; 081 absoluteSmallest = Math.abs(smallest); 082 absoluteValue = Math.abs(value); 083 if (absoluteSmallest > absoluteValue) { 084 smallest = value; 085 } else if (absoluteSmallest == absoluteValue) { 086 // ensure that -X and +X -> +X 087 smallest = absoluteSmallest; 088 } 089 last = value; 090 } 091 lastBandLength += k; 092 // if tokenCodec needs to be derived from the T, L and K values 093 if (tokenCodec == null) { 094 if (k < 256) { 095 tokenCodec = BYTE1; 096 } else { 097 // if k >= 256, b >= 2 098 int b = 1; 099 BHSDCodec codec; 100 while (++b < 5) { 101 codec = new BHSDCodec(b, 256 - l, 0); 102 if (codec.encodes(k)) { 103 tokenCodec = codec; 104 break; 105 } 106 } 107 if (tokenCodec == null) { 108 throw new Pack200Exception("Cannot calculate token codec from " + k + " and " + l); 109 } 110 } 111 } 112 // read favorites 113 lastBandLength += n; 114 result = tokenCodec.decodeInts(n, in); 115 // read unfavorites 116 last = 0; 117 for (int i = 0; i < n; i++) { 118 final int index = result[i]; 119 if (index == 0) { 120 lastBandLength++; 121 result[i] = last = unfavouredCodec.decode(in, last); 122 } else { 123 result[i] = favoured[index - 1]; 124 } 125 } 126 return result; 127 } 128 129 @Override 130 public byte[] encode(final int value) throws Pack200Exception { 131 throw new Pack200Exception("Population encoding does not work unless the number of elements are known"); 132 } 133 134 @Override 135 public byte[] encode(final int value, final int last) throws Pack200Exception { 136 throw new Pack200Exception("Population encoding does not work unless the number of elements are known"); 137 } 138 139 public byte[] encode(final int[] favoured, final int[] tokens, final int[] unfavoured) throws Pack200Exception { 140 final int[] favoured2 = Arrays.copyOf(favoured, favoured.length + 1); 141 favoured2[favoured2.length - 1] = favoured[favoured.length - 1]; // repeat last value; 142 final byte[] favouredEncoded = favouredCodec.encode(favoured2); 143 final byte[] tokensEncoded = tokenCodec.encode(tokens); 144 final byte[] unfavouredEncoded = unfavouredCodec.encode(unfavoured); 145 final byte[] band = new byte[favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length]; 146 System.arraycopy(favouredEncoded, 0, band, 0, favouredEncoded.length); 147 System.arraycopy(tokensEncoded, 0, band, favouredEncoded.length, tokensEncoded.length); 148 System.arraycopy(unfavouredEncoded, 0, band, favouredEncoded.length + tokensEncoded.length, unfavouredEncoded.length); 149 return band; 150 } 151 152 public int[] getFavoured() { 153 return favoured; 154 } 155 156 public Codec getFavouredCodec() { 157 return favouredCodec; 158 } 159 160 public Codec getTokenCodec() { 161 return tokenCodec; 162 } 163 164 public Codec getUnfavouredCodec() { 165 return unfavouredCodec; 166 } 167}