View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz77support;
20  
21  /**
22   * Parameters of the {@link LZ77Compressor compressor}.
23   */
24  public final class Parameters {
25      /**
26       * Builder for {@link Parameters} instances.
27       */
28      public static class Builder {
29          private final int windowSize;
30          private int minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength;
31          private Integer niceBackReferenceLength, maxCandidates, lazyThreshold;
32          private Boolean lazyMatches;
33  
34          private Builder(final int windowSize) {
35              if (windowSize < 2 || !isPowerOfTwo(windowSize)) {
36                  throw new IllegalArgumentException("windowSize must be a power of two");
37              }
38              this.windowSize = windowSize;
39              minBackReferenceLength = TRUE_MIN_BACK_REFERENCE_LENGTH;
40              maxBackReferenceLength = windowSize - 1;
41              maxOffset = windowSize - 1;
42              maxLiteralLength = windowSize;
43          }
44  
45          /**
46           * Creates the {@link Parameters} instance.
47           *
48           * @return the configured {@link Parameters} instance.
49           */
50          public Parameters build() {
51              // default settings tuned for a compromise of good compression and acceptable speed
52              final int niceLen = niceBackReferenceLength != null ? niceBackReferenceLength : Math.max(minBackReferenceLength, maxBackReferenceLength / 2);
53              final int candidates = maxCandidates != null ? maxCandidates : Math.max(256, windowSize / 128);
54              final boolean lazy = lazyMatches == null || lazyMatches;
55              final int threshold = lazy ? lazyThreshold != null ? lazyThreshold : niceLen : minBackReferenceLength;
56  
57              return new Parameters(windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength, niceLen, candidates, lazy,
58                      threshold);
59          }
60  
61          /**
62           * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved compression ratio at the cost of
63           * compression speed.
64           *
65           * <p>
66           * Use this method after configuring "maximum back-reference length".
67           * </p>
68           *
69           * @return the builder
70           */
71          public Builder tunedForCompressionRatio() {
72              niceBackReferenceLength = lazyThreshold = maxBackReferenceLength;
73              maxCandidates = Math.max(32, windowSize / 16);
74              lazyMatches = true;
75              return this;
76          }
77  
78          /**
79           * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved compression speed at the cost of
80           * compression ratio.
81           *
82           * <p>
83           * Use this method after configuring "maximum back-reference length".
84           * </p>
85           *
86           * @return the builder
87           */
88          public Builder tunedForSpeed() {
89              niceBackReferenceLength = Math.max(minBackReferenceLength, maxBackReferenceLength / 8);
90              maxCandidates = Math.max(32, windowSize / 1024);
91              lazyMatches = false;
92              lazyThreshold = minBackReferenceLength;
93              return this;
94          }
95  
96          /**
97           * Sets whether lazy matching should be performed.
98           *
99           * <p>
100          * Lazy matching means that after a back-reference for a certain position has been found the compressor will try to find a longer match for the next
101          * position.
102          * </p>
103          *
104          * <p>
105          * Lazy matching is enabled by default and disabled when tuning for speed.
106          * </p>
107          *
108          * @param lazy whether lazy matching should be performed
109          * @return the builder
110          */
111         public Builder withLazyMatching(final boolean lazy) {
112             lazyMatches = lazy;
113             return this;
114         }
115 
116         /**
117          * Sets the threshold for lazy matching.
118          *
119          * <p>
120          * Even if lazy matching is enabled it will not be performed if the length of the back-reference found for the current position is longer than this
121          * value.
122          * </p>
123          *
124          * @param threshold the threshold for lazy matching
125          * @return the builder
126          */
127         public Builder withLazyThreshold(final int threshold) {
128             lazyThreshold = threshold;
129             return this;
130         }
131 
132         /**
133          * Sets the maximal length of a back-reference.
134          *
135          * <p>
136          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
137          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
138          * </p>
139          *
140          * @param maxBackReferenceLength maximal length of a back-reference found. A value smaller than {@code minBackReferenceLength} is interpreted as
141          *                               {@code minBackReferenceLength}. {@code maxBackReferenceLength} is capped at {@code windowSize - 1}.
142          * @return the builder
143          */
144         public Builder withMaxBackReferenceLength(final int maxBackReferenceLength) {
145             this.maxBackReferenceLength = maxBackReferenceLength < minBackReferenceLength ? minBackReferenceLength
146                     : Math.min(maxBackReferenceLength, windowSize - 1);
147             return this;
148         }
149 
150         /**
151          * Sets the maximal length of a literal block.
152          *
153          * <p>
154          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
155          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
156          * </p>
157          *
158          * @param maxLiteralLength maximal length of a literal block. Negative numbers and 0 as well as values bigger than {@code windowSize} are interpreted as
159          *                         {@code windowSize}.
160          * @return the builder
161          */
162         public Builder withMaxLiteralLength(final int maxLiteralLength) {
163             this.maxLiteralLength = maxLiteralLength < 1 ? windowSize : Math.min(maxLiteralLength, windowSize);
164             return this;
165         }
166 
167         /**
168          * Sets the maximum number of back-reference candidates that should be consulted.
169          *
170          * <p>
171          * This settings can be used to tune the tradeoff between compression speed and compression ratio.
172          * </p>
173          *
174          * @param maxCandidates maximum number of back-reference candidates
175          * @return the builder
176          */
177         public Builder withMaxNumberOfCandidates(final int maxCandidates) {
178             this.maxCandidates = maxCandidates;
179             return this;
180         }
181 
182         /**
183          * Sets the maximal offset of a back-reference.
184          *
185          * <p>
186          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
187          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
188          * </p>
189          *
190          * @param maxOffset maximal offset of a back-reference. A non-positive value as well as values bigger than {@code windowSize - 1} are interpreted as
191          *                  <code>windowSize
192          * - 1</code>    .
193          * @return the builder
194          */
195         public Builder withMaxOffset(final int maxOffset) {
196             this.maxOffset = maxOffset < 1 ? windowSize - 1 : Math.min(maxOffset, windowSize - 1);
197             return this;
198         }
199 
200         /**
201          * Sets the minimal length of a back-reference.
202          *
203          * <p>
204          * Ensures {@code maxBackReferenceLength} is not smaller than {@code minBackReferenceLength}.
205          *
206          * <p>
207          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
208          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
209          * </p>
210          *
211          * @param minBackReferenceLength the minimal length of a back-reference found. A true minimum of 3 is hard-coded inside of this implementation but
212          *                               bigger lengths can be configured.
213          * @throws IllegalArgumentException if {@code windowSize} is smaller than {@code minBackReferenceLength}.
214          * @return the builder
215          */
216         public Builder withMinBackReferenceLength(final int minBackReferenceLength) {
217             this.minBackReferenceLength = Math.max(TRUE_MIN_BACK_REFERENCE_LENGTH, minBackReferenceLength);
218             if (windowSize < this.minBackReferenceLength) {
219                 throw new IllegalArgumentException("minBackReferenceLength can't be bigger than windowSize");
220             }
221             if (maxBackReferenceLength < this.minBackReferenceLength) {
222                 maxBackReferenceLength = this.minBackReferenceLength;
223             }
224             return this;
225         }
226 
227         /**
228          * Sets the "nice length" of a back-reference.
229          *
230          * <p>
231          * When a back-references if this size has been found, stop searching for longer back-references.
232          * </p>
233          *
234          * <p>
235          * This settings can be used to tune the tradeoff between compression speed and compression ratio.
236          * </p>
237          *
238          * @param niceLen the "nice length" of a back-reference
239          * @return the builder
240          */
241         public Builder withNiceBackReferenceLength(final int niceLen) {
242             niceBackReferenceLength = niceLen;
243             return this;
244         }
245     }
246 
247     /**
248      * The hard-coded absolute minimal length of a back-reference.
249      */
250     public static final int TRUE_MIN_BACK_REFERENCE_LENGTH = LZ77Compressor.NUMBER_OF_BYTES_IN_HASH;
251 
252     /**
253      * Initializes the builder for the compressor's parameters with a {@code minBackReferenceLength} of 3 and {@code max*Length} equal to
254      * {@code windowSize - 1}.
255      *
256      * <p>
257      * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
258      * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
259      * </p>
260      *
261      * @param windowSize the size of the sliding window - this determines the maximum offset a back-reference can take. Must be a power of two.
262      * @throws IllegalArgumentException if windowSize is not a power of two.
263      * @return a builder configured for the given window size
264      */
265     public static Builder builder(final int windowSize) {
266         return new Builder(windowSize);
267     }
268 
269     private static boolean isPowerOfTwo(final int x) {
270         // pre-condition: x > 0
271         return (x & x - 1) == 0;
272     }
273 
274     private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength, niceBackReferenceLength, maxCandidates,
275             lazyThreshold;
276 
277     private final boolean lazyMatching;
278 
279     private Parameters(final int windowSize, final int minBackReferenceLength, final int maxBackReferenceLength, final int maxOffset,
280             final int maxLiteralLength, final int niceBackReferenceLength, final int maxCandidates, final boolean lazyMatching, final int lazyThreshold) {
281         this.windowSize = windowSize;
282         this.minBackReferenceLength = minBackReferenceLength;
283         this.maxBackReferenceLength = maxBackReferenceLength;
284         this.maxOffset = maxOffset;
285         this.maxLiteralLength = maxLiteralLength;
286         this.niceBackReferenceLength = niceBackReferenceLength;
287         this.maxCandidates = maxCandidates;
288         this.lazyMatching = lazyMatching;
289         this.lazyThreshold = lazyThreshold;
290     }
291 
292     /**
293      * Gets whether to perform lazy matching.
294      *
295      * @return whether to perform lazy matching
296      */
297     public boolean getLazyMatching() {
298         return lazyMatching;
299     }
300 
301     /**
302      * Gets the threshold for lazy matching.
303      *
304      * @return the threshold for lazy matching
305      */
306     public int getLazyMatchingThreshold() {
307         return lazyThreshold;
308     }
309 
310     /**
311      * Gets the maximal length of a back-reference found.
312      *
313      * @return the maximal length of a back-reference found
314      */
315     public int getMaxBackReferenceLength() {
316         return maxBackReferenceLength;
317     }
318 
319     /**
320      * Gets the maximum number of back-reference candidates to consider.
321      *
322      * @return the maximum number of back-reference candidates to consider
323      */
324     public int getMaxCandidates() {
325         return maxCandidates;
326     }
327 
328     /**
329      * Gets the maximal length of a literal block.
330      *
331      * @return the maximal length of a literal block
332      */
333     public int getMaxLiteralLength() {
334         return maxLiteralLength;
335     }
336 
337     /**
338      * Gets the maximal offset of a back-reference found.
339      *
340      * @return the maximal offset of a back-reference found
341      */
342     public int getMaxOffset() {
343         return maxOffset;
344     }
345 
346     /**
347      * Gets the minimal length of a back-reference found.
348      *
349      * @return the minimal length of a back-reference found
350      */
351     public int getMinBackReferenceLength() {
352         return minBackReferenceLength;
353     }
354 
355     /**
356      * Gets the length of a back-reference that is considered nice enough to stop searching for longer ones.
357      *
358      * @return the length of a back-reference that is considered nice enough to stop searching
359      */
360     public int getNiceBackReferenceLength() {
361         return niceBackReferenceLength;
362     }
363 
364     /**
365      * Gets the size of the sliding window - this determines the maximum offset a back-reference can take.
366      *
367      * @return the size of the sliding window
368      */
369     public int getWindowSize() {
370         return windowSize;
371     }
372 }