View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.util.Arrays;
22  import java.util.BitSet;
23  
24  /**
25   * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link BZip2CompressorOutputStream}.
26   *
27   * <p>
28   * This class is based on a Java port of Julian Seward's blocksort.c in his libbzip2
29   * </p>
30   *
31   * <p>
32   * The Burrows-Wheeler transform is a reversible transform of the original data that is supposed to group similar bytes close to each other. The idea is to sort
33   * all permutations of the input and only keep the last byte of each permutation. E.g. for "Commons Compress" you'd get:
34   * </p>
35   *
36   * <pre>
37   *  CompressCommons
38   * Commons Compress
39   * CompressCommons
40   * essCommons Compr
41   * mmons CompressCo
42   * mons CompressCom
43   * mpressCommons Co
44   * ns CompressCommo
45   * ommons CompressC
46   * ompressCommons C
47   * ons CompressComm
48   * pressCommons Com
49   * ressCommons Comp
50   * s CompressCommon
51   * sCommons Compres
52   * ssCommons Compre
53   * </pre>
54   *
55   * <p>
56   * Which results in a new text "ss romooCCmmpnse", in adition the index of the first line that contained the original text is kept - in this case it is 1. The
57   * idea is that in a long English text all permutations that start with "he" are likely suffixes of a "the" and thus they end in "t" leading to a larger block
58   * of "t"s that can better be compressed by the subsequent Move-to-Front, run-length und Huffman encoding steps.
59   * </p>
60   *
61   * <p>
62   * For more information see for example:
63   * </p>
64   * <ul>
65   * <li><a href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows, M. and Wheeler, D.: A Block-sorting Lossless Data Compression
66   * Algorithm</a></li>
67   * <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and Myers, G.: Suffix arrays: A new method for on-line string searches</a></li>
68   * <li><a href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley, J.L. and Sedgewick, R.: Fast Algorithms for Sorting and
69   * Searching Strings</a></li>
70   * </ul>
71   *
72   * @NotThreadSafe
73   */
74  final class BlockSort {
75  
76      /*
77       * Some of the constructs used in the C code cannot be ported literally to Java - for example macros, unsigned types. Some code has been hand-tuned to
78       * improve performance. In order to avoid memory pressure some structures are reused for several blocks and some memory is even shared between sorting and
79       * the MTF stage even though either algorithm uses it for its own purpose.
80       *
81       * Comments preserved from the actual C code are prefixed with "LBZ2:".
82       */
83  
84      /*
85       * 2012-05-20 Stefan Bodewig:
86       *
87       * This class seems to mix several revisions of libbzip2's code. The mainSort function and those used by it look closer to the 0.9.5 version but show some
88       * variations introduced later. At the same time the logic of Compress 1.4 to randomize the block on bad input has been dropped after libbzip2 0.9.0 and
89       * replaced by a fallback sorting algorithm.
90       *
91       * I've added the fallbackSort function of 1.0.6 and tried to integrate it with the existing code without touching too much. I've also removed the now
92       * unused randomization code.
93       */
94  
95      private static final int FTAB_LENGTH = 65537; // 262148 byte
96  
97      /*
98       * LBZ2: If you are ever unlucky/improbable enough to get a stack overflow whilst sorting, increase the following constant and try again. In practice I have
99       * never seen the stack go above 27 elems, so the following limit seems very generous.
100      */
101     private static final int QSORT_STACK_SIZE = 1000;
102 
103     private static final int FALLBACK_QSORT_STACK_SIZE = 100;
104 
105     private static final int STACK_SIZE = Math.max(QSORT_STACK_SIZE, FALLBACK_QSORT_STACK_SIZE);
106 
107     private static final int FALLBACK_QSORT_SMALL_THRESH = 10;
108     /*
109      * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. Possibly because the number of elems to sort is usually small, typically &lt;=
110      * 20.
111      */
112     private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
113     private static final int SMALL_THRESH = 20;
114 
115     private static final int DEPTH_THRESH = 10;
116     private static final int WORK_FACTOR = 30;
117     private static final int SETMASK = 1 << 21;
118 
119     private static final int CLEARMASK = ~SETMASK;
120 
121     private static int med3(final int a, final int b, final int c) {
122         return a < b ? b < c ? b : a < c ? c : a : b > c ? b : a > c ? c : a;
123     }
124 
125     private static void vswap(final int[] fmap, int p1, int p2, int n) {
126         n += p1;
127         while (p1 < n) {
128             final int t = fmap[p1];
129             fmap[p1++] = fmap[p2];
130             fmap[p2++] = t;
131         }
132     }
133 
134     /*
135      * Used when sorting. If too many long comparisons happen, we stop sorting, and use fallbackSort instead.
136      */
137     private int workDone;
138 
139     private int workLimit;
140 
141     private boolean firstAttempt;
142 
143     private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte
144 
145     private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte
146 
147     /*---------------------------------------------*/
148 
149     /*---------------------------------------------*/
150     /*--- LBZ2: Fallback O(N log(N)^2) sorting        ---*/
151     /*--- algorithm, for repetitive blocks      ---*/
152     /*---------------------------------------------*/
153 
154     /*
155      * This is the fallback sorting algorithm libbzip2 1.0.6 uses for repetitive or very short inputs.
156      *
157      * The idea is inspired by Manber-Myers string suffix sorting algorithm. First a bucket sort places each permutation of the block into a bucket based on its
158      * first byte. Permutations are represented by pointers to their first character kept in (partially) sorted order inside the array ftab.
159      *
160      * The next step visits all buckets in order and performs a quicksort on all permutations of the bucket based on the index of the bucket the second byte of
161      * the permutation belongs to, thereby forming new buckets. When arrived here the permutations are sorted up to the second character and we have buckets of
162      * permutations that are identical up to two characters.
163      *
164      * Repeat the step of quicksorting each bucket, now based on the bucket holding the sequence of the third and forth character leading to four byte buckets.
165      * Repeat this doubling of bucket sizes until all buckets only contain single permutations or the bucket size exceeds the block size.
166      *
167      * I.e.
168      *
169      * "abraba" form three buckets for the chars "a", "b", and "r" in the first step with
170      *
171      * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 }
172      *
173      * when looking at the bucket of "a"s the second characters are in the buckets that start with fmap-index 0 (rolled over), 3 and 3 respectively, forming two
174      * new buckets "aa" and "ab", so we get
175      *
176      * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 }
177      *
178      * since the last bucket only contained a single item it didn't have to be sorted at all.
179      *
180      * There now is just one bucket with more than one permutation that remains to be sorted. For the permutation that starts with index 3 the third and forth
181      * char are in bucket 'aa' at index 0 and for the one starting at block index 0 they are in bucket 'ra' with sort index 5. The fully sorted order then
182      * becomes.
183      *
184      * fmap = { 5, 3, 0, 4, 1, 2 }
185      */
186 
187     private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
188 
189     private final int[] mainSort_runningOrder = new int[256]; // 1024 byte
190 
191     private final int[] mainSort_copy = new int[256]; // 1024 byte
192 
193     private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
194 
195     private final int[] ftab = new int[FTAB_LENGTH]; // 262148 byte
196 
197     /**
198      * Array instance identical to Data's sfmap, both are used only temporarily and indepently, so we do not need to allocate additional memory.
199      */
200     private final char[] quadrant;
201 
202     private int[] eclass;
203 
204     /*---------------------------------------------*/
205 
206     BlockSort(final BZip2CompressorOutputStream.Data data) {
207         this.quadrant = data.sfmap;
208     }
209 
210     void blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
211         this.workLimit = WORK_FACTOR * last;
212         this.workDone = 0;
213         this.firstAttempt = true;
214 
215         if (last + 1 < 10000) {
216             fallbackSort(data, last);
217         } else {
218             mainSort(data, last);
219 
220             if (this.firstAttempt && this.workDone > this.workLimit) {
221                 fallbackSort(data, last);
222             }
223         }
224 
225         final int[] fmap = data.fmap;
226         data.origPtr = -1;
227         for (int i = 0; i <= last; i++) {
228             if (fmap[i] == 0) {
229                 data.origPtr = i;
230                 break;
231             }
232         }
233 
234         // assert (data.origPtr != -1) : data.origPtr;
235     }
236 
237     /*
238      * The C code uses an array of ints (each int holding 32 flags) to represents the bucket-start flags (bhtab). It also contains optimizations to skip over 32
239      * consecutively set or consecutively unset bits on word boundaries at once. For now I've chosen to use the simpler but potentially slower code using BitSet
240      * - also in the hope that using the BitSet#nextXXX methods may be fast enough.
241      */
242 
243     /**
244      * @param fmap   points to the index of the starting point of a permutation inside the block of data in the current partially sorted order
245      * @param eclass points from the index of a character inside the block to the first index in fmap that contains the bucket of its suffix that is sorted in
246      *               this step.
247      * @param loSt   lower boundary of the fmap-interval to be sorted
248      * @param hiSt   upper boundary of the fmap-interval to be sorted
249      */
250     private void fallbackQSort3(final int[] fmap, final int[] eclass, final int loSt, final int hiSt) {
251         int lo, unLo, ltLo, hi, unHi, gtHi, n;
252 
253         long r = 0;
254         int sp = 0;
255         fpush(sp++, loSt, hiSt);
256 
257         while (sp > 0) {
258             final int[] s = fpop(--sp);
259             lo = s[0];
260             hi = s[1];
261 
262             if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
263                 fallbackSimpleSort(fmap, eclass, lo, hi);
264                 continue;
265             }
266 
267             /*
268              * LBZ2: Random partitioning. Median of 3 sometimes fails to avoid bad cases. Median of 9 seems to help but looks rather expensive. This too seems
269              * to work but is cheaper. Guidance for the magic constants 7621 and 32768 is taken from Sedgewick's algorithms book, chapter 35.
270              */
271             r = (r * 7621 + 1) % 32768;
272             final long r3 = r % 3;
273             final long med;
274             if (r3 == 0) {
275                 med = eclass[fmap[lo]];
276             } else if (r3 == 1) {
277                 med = eclass[fmap[lo + hi >>> 1]];
278             } else {
279                 med = eclass[fmap[hi]];
280             }
281 
282             unLo = ltLo = lo;
283             unHi = gtHi = hi;
284 
285             // looks like the ternary partition attributed to Wegner
286             // in the cited Sedgewick paper
287             while (true) {
288                 while (true) {
289                     if (unLo > unHi) {
290                         break;
291                     }
292                     n = eclass[fmap[unLo]] - (int) med;
293                     if (n == 0) {
294                         fswap(fmap, unLo, ltLo);
295                         ltLo++;
296                         unLo++;
297                         continue;
298                     }
299                     if (n > 0) {
300                         break;
301                     }
302                     unLo++;
303                 }
304                 while (true) {
305                     if (unLo > unHi) {
306                         break;
307                     }
308                     n = eclass[fmap[unHi]] - (int) med;
309                     if (n == 0) {
310                         fswap(fmap, unHi, gtHi);
311                         gtHi--;
312                         unHi--;
313                         continue;
314                     }
315                     if (n < 0) {
316                         break;
317                     }
318                     unHi--;
319                 }
320                 if (unLo > unHi) {
321                     break;
322                 }
323                 fswap(fmap, unLo, unHi);
324                 unLo++;
325                 unHi--;
326             }
327 
328             if (gtHi < ltLo) {
329                 continue;
330             }
331 
332             n = Math.min(ltLo - lo, unLo - ltLo);
333             fvswap(fmap, lo, unLo - n, n);
334             int m = Math.min(hi - gtHi, gtHi - unHi);
335             fvswap(fmap, unHi + 1, hi - m + 1, m);
336 
337             n = lo + unLo - ltLo - 1;
338             m = hi - (gtHi - unHi) + 1;
339 
340             if (n - lo > hi - m) {
341                 fpush(sp++, lo, n);
342                 fpush(sp++, m, hi);
343             } else {
344                 fpush(sp++, m, hi);
345                 fpush(sp++, lo, n);
346             }
347         }
348     }
349 
350     /*---------------------------------------------*/
351 
352     /**
353      * @param fmap   points to the index of the starting point of a permutation inside the block of data in the current partially sorted order
354      * @param eclass points from the index of a character inside the block to the first index in fmap that contains the bucket of its suffix that is sorted in
355      *               this step.
356      * @param lo     lower boundary of the fmap-interval to be sorted
357      * @param hi     upper boundary of the fmap-interval to be sorted
358      */
359     private void fallbackSimpleSort(final int[] fmap, final int[] eclass, final int lo, final int hi) {
360         if (lo == hi) {
361             return;
362         }
363 
364         int j;
365         if (hi - lo > 3) {
366             for (int i = hi - 4; i >= lo; i--) {
367                 final int tmp = fmap[i];
368                 final int ec_tmp = eclass[tmp];
369                 for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) {
370                     fmap[j - 4] = fmap[j];
371                 }
372                 fmap[j - 4] = tmp;
373             }
374         }
375 
376         for (int i = hi - 1; i >= lo; i--) {
377             final int tmp = fmap[i];
378             final int ec_tmp = eclass[tmp];
379             for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) {
380                 fmap[j - 1] = fmap[j];
381             }
382             fmap[j - 1] = tmp;
383         }
384     }
385 
386     /**
387      * Adapt fallbackSort to the expected interface of the rest of the code, in particular deal with the fact that block starts at offset 1 (in libbzip2 1.0.6
388      * it starts at 0).
389      */
390     void fallbackSort(final BZip2CompressorOutputStream.Data data, final int last) {
391         data.block[0] = data.block[last + 1];
392         fallbackSort(data.fmap, data.block, last + 1);
393         for (int i = 0; i < last + 1; i++) {
394             --data.fmap[i];
395         }
396         for (int i = 0; i < last + 1; i++) {
397             if (data.fmap[i] == -1) {
398                 data.fmap[i] = last;
399                 break;
400             }
401         }
402     }
403 
404     /*--
405        LBZ2: The following is an implementation of
406        an elegant 3-way quicksort for strings,
407        described in a paper "Fast Algorithms for
408        Sorting and Searching Strings", by Robert
409        Sedgewick and Jon L. Bentley.
410     --*/
411 
412     /**
413      * @param fmap   points to the index of the starting point of a permutation inside the block of data in the current partially sorted order
414      * @param block  the original data
415      * @param nblock size of the block
416      */
417     void fallbackSort(final int[] fmap, final byte[] block, final int nblock) {
418         final int[] ftab = new int[257];
419         int H, i, j, k, l, r, cc, cc1;
420         int nNotDone;
421         final int nBhtab;
422         final int[] eclass = getEclass();
423 
424         for (i = 0; i < nblock; i++) {
425             eclass[i] = 0;
426         }
427         /*--
428           LBZ2: Initial 1-char radix sort to generate
429           initial fmap and initial BH bits.
430           --*/
431         for (i = 0; i < nblock; i++) {
432             ftab[block[i] & 0xff]++;
433         }
434         for (i = 1; i < 257; i++) {
435             ftab[i] += ftab[i - 1];
436         }
437 
438         for (i = 0; i < nblock; i++) {
439             j = block[i] & 0xff;
440             k = ftab[j] - 1;
441             ftab[j] = k;
442             fmap[k] = i;
443         }
444 
445         nBhtab = 64 + nblock;
446         final BitSet bhtab = new BitSet(nBhtab);
447         for (i = 0; i < 256; i++) {
448             bhtab.set(ftab[i]);
449         }
450 
451         /*--
452           LBZ2: Inductively refine the buckets.  Kind-of an
453           "exponential radix sort" (!), inspired by the
454           Manber-Myers suffix array construction algorithm.
455           --*/
456 
457         /*-- LBZ2: set sentinel bits for block-end detection --*/
458         for (i = 0; i < 32; i++) {
459             bhtab.set(nblock + 2 * i);
460             bhtab.clear(nblock + 2 * i + 1);
461         }
462 
463         /*-- LBZ2: the log(N) loop --*/
464         H = 1;
465         while (true) {
466 
467             j = 0;
468             for (i = 0; i < nblock; i++) {
469                 if (bhtab.get(i)) {
470                     j = i;
471                 }
472                 k = fmap[i] - H;
473                 if (k < 0) {
474                     k += nblock;
475                 }
476                 eclass[k] = j;
477             }
478 
479             nNotDone = 0;
480             r = -1;
481             while (true) {
482 
483                 /*-- LBZ2: find the next non-singleton bucket --*/
484                 k = r + 1;
485                 k = bhtab.nextClearBit(k);
486                 l = k - 1;
487                 if (l >= nblock) {
488                     break;
489                 }
490                 k = bhtab.nextSetBit(k + 1);
491                 r = k - 1;
492                 if (r >= nblock) {
493                     break;
494                 }
495 
496                 /*-- LBZ2: now [l, r] bracket current bucket --*/
497                 if (r > l) {
498                     nNotDone += r - l + 1;
499                     fallbackQSort3(fmap, eclass, l, r);
500 
501                     /*-- LBZ2: scan bucket and generate header bits-- */
502                     cc = -1;
503                     for (i = l; i <= r; i++) {
504                         cc1 = eclass[fmap[i]];
505                         if (cc != cc1) {
506                             bhtab.set(i);
507                             cc = cc1;
508                         }
509                     }
510                 }
511             }
512 
513             H *= 2;
514             if (H > nblock || nNotDone == 0) {
515                 break;
516             }
517         }
518     }
519 
520     private int[] fpop(final int sp) {
521         return new int[] { stack_ll[sp], stack_hh[sp] };
522     }
523 
524     private void fpush(final int sp, final int lz, final int hz) {
525         stack_ll[sp] = lz;
526         stack_hh[sp] = hz;
527     }
528 
529     /**
530      * swaps two values in fmap
531      */
532     private void fswap(final int[] fmap, final int zz1, final int zz2) {
533         final int zztmp = fmap[zz1];
534         fmap[zz1] = fmap[zz2];
535         fmap[zz2] = zztmp;
536     }
537 
538     /**
539      * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap.
540      */
541     private void fvswap(final int[] fmap, int yyp1, int yyp2, int yyn) {
542         while (yyn > 0) {
543             fswap(fmap, yyp1, yyp2);
544             yyp1++;
545             yyp2++;
546             yyn--;
547         }
548     }
549 
550     private int[] getEclass() {
551         if (eclass == null) {
552             eclass = new int[quadrant.length / 2];
553         }
554         return eclass;
555     }
556 
557     /**
558      * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
559      */
560     private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, final int loSt, final int hiSt, final int dSt, final int last) {
561         final int[] stack_ll = this.stack_ll;
562         final int[] stack_hh = this.stack_hh;
563         final int[] stack_dd = this.stack_dd;
564         final int[] fmap = dataShadow.fmap;
565         final byte[] block = dataShadow.block;
566 
567         stack_ll[0] = loSt;
568         stack_hh[0] = hiSt;
569         stack_dd[0] = dSt;
570 
571         for (int sp = 1; --sp >= 0;) {
572             final int lo = stack_ll[sp];
573             final int hi = stack_hh[sp];
574             final int d = stack_dd[sp];
575 
576             if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
577                 if (mainSimpleSort(dataShadow, lo, hi, d, last)) {
578                     return;
579                 }
580             } else {
581                 final int d1 = d + 1;
582                 final int med = med3(block[fmap[lo] + d1] & 0xff, block[fmap[hi] + d1] & 0xff, block[fmap[lo + hi >>> 1] + d1] & 0xff);
583 
584                 int unLo = lo;
585                 int unHi = hi;
586                 int ltLo = lo;
587                 int gtHi = hi;
588 
589                 while (true) {
590                     while (unLo <= unHi) {
591                         final int n = (block[fmap[unLo] + d1] & 0xff) - med;
592                         if (n == 0) {
593                             final int temp = fmap[unLo];
594                             fmap[unLo++] = fmap[ltLo];
595                             fmap[ltLo++] = temp;
596                         } else if (n < 0) {
597                             unLo++;
598                         } else {
599                             break;
600                         }
601                     }
602 
603                     while (unLo <= unHi) {
604                         final int n = (block[fmap[unHi] + d1] & 0xff) - med;
605                         if (n == 0) {
606                             final int temp = fmap[unHi];
607                             fmap[unHi--] = fmap[gtHi];
608                             fmap[gtHi--] = temp;
609                         } else if (n > 0) {
610                             unHi--;
611                         } else {
612                             break;
613                         }
614                     }
615 
616                     if (unLo > unHi) {
617                         break;
618                     }
619                     final int temp = fmap[unLo];
620                     fmap[unLo++] = fmap[unHi];
621                     fmap[unHi--] = temp;
622                 }
623 
624                 if (gtHi < ltLo) {
625                     stack_ll[sp] = lo;
626                     stack_hh[sp] = hi;
627                     stack_dd[sp] = d1;
628                 } else {
629                     int n = Math.min(ltLo - lo, unLo - ltLo);
630                     vswap(fmap, lo, unLo - n, n);
631                     int m = Math.min(hi - gtHi, gtHi - unHi);
632                     vswap(fmap, unLo, hi - m + 1, m);
633 
634                     n = lo + unLo - ltLo - 1;
635                     m = hi - (gtHi - unHi) + 1;
636 
637                     stack_ll[sp] = lo;
638                     stack_hh[sp] = n;
639                     stack_dd[sp] = d;
640                     sp++;
641 
642                     stack_ll[sp] = n + 1;
643                     stack_hh[sp] = m - 1;
644                     stack_dd[sp] = d1;
645                     sp++;
646 
647                     stack_ll[sp] = m;
648                     stack_hh[sp] = hi;
649                     stack_dd[sp] = d;
650                 }
651                 sp++;
652             }
653         }
654     }
655 
656     /**
657      * This is the most hammered method of this class.
658      *
659      * <p>
660      * This is the version using unrolled loops. Normally I never use such ones in Java code. The unrolling has shown a noticeable performance improvement on
661      * JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the JIT compiler of the vm.
662      * </p>
663      */
664     private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow, final int lo, final int hi, final int d, final int lastShadow) {
665         final int bigN = hi - lo + 1;
666         if (bigN < 2) {
667             return this.firstAttempt && this.workDone > this.workLimit;
668         }
669 
670         int hp = 0;
671         while (INCS[hp] < bigN) {
672             hp++;
673         }
674 
675         final int[] fmap = dataShadow.fmap;
676         final char[] quadrant = this.quadrant;
677         final byte[] block = dataShadow.block;
678         final int lastPlus1 = lastShadow + 1;
679         final boolean firstAttemptShadow = this.firstAttempt;
680         final int workLimitShadow = this.workLimit;
681         int workDoneShadow = this.workDone;
682 
683         // Following block contains unrolled code which could be shortened by
684         // coding it in additional loops.
685 
686         HP: while (--hp >= 0) {
687             final int h = INCS[hp];
688             final int mj = lo + h - 1;
689 
690             for (int i = lo + h; i <= hi;) {
691                 // copy
692                 for (int k = 3; i <= hi && --k >= 0; i++) {
693                     final int v = fmap[i];
694                     final int vd = v + d;
695                     int j = i;
696 
697                     // for (int a;
698                     // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
699                     // block, quadrant, lastShadow);
700                     // j -= h) {
701                     // fmap[j] = a;
702                     // }
703                     //
704                     // unrolled version:
705 
706                     // start inline mainGTU
707                     boolean onceRunned = false;
708                     int a = 0;
709 
710                     HAMMER: while (true) {
711                         if (onceRunned) {
712                             fmap[j] = a;
713                             if ((j -= h) <= mj) { // NOSONAR
714                                 break HAMMER;
715                             }
716                         } else {
717                             onceRunned = true;
718                         }
719 
720                         a = fmap[j - h];
721                         int i1 = a + d;
722                         int i2 = vd;
723 
724                         // following could be done in a loop, but
725                         // unrolled it for performance:
726                         if (block[i1 + 1] == block[i2 + 1]) {
727                             if (block[i1 + 2] == block[i2 + 2]) {
728                                 if (block[i1 + 3] == block[i2 + 3]) {
729                                     if (block[i1 + 4] == block[i2 + 4]) {
730                                         if (block[i1 + 5] == block[i2 + 5]) {
731                                             if (block[i1 += 6] == block[i2 += 6]) { // NOSONAR
732                                                 int x = lastShadow;
733                                                 X: while (x > 0) {
734                                                     x -= 4;
735                                                     if (block[i1 + 1] == block[i2 + 1]) {
736                                                         if (quadrant[i1] == quadrant[i2]) {
737                                                             if (block[i1 + 2] == block[i2 + 2]) {
738                                                                 if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
739                                                                     if (block[i1 + 3] == block[i2 + 3]) {
740                                                                         if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
741                                                                             if (block[i1 + 4] == block[i2 + 4]) {
742                                                                                 if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
743                                                                                     if ((i1 += 4) >= lastPlus1) { // NOSONAR
744                                                                                         i1 -= lastPlus1;
745                                                                                     }
746                                                                                     if ((i2 += 4) >= lastPlus1) { // NOSONAR
747                                                                                         i2 -= lastPlus1;
748                                                                                     }
749                                                                                     workDoneShadow++;
750                                                                                     continue X;
751                                                                                 }
752                                                                                 if (quadrant[i1 + 3] > quadrant[i2 + 3]) {
753                                                                                     continue HAMMER;
754                                                                                 }
755                                                                                 break HAMMER;
756                                                                             }
757                                                                             if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
758                                                                                 continue HAMMER;
759                                                                             }
760                                                                             break HAMMER;
761                                                                         }
762                                                                         if (quadrant[i1 + 2] > quadrant[i2 + 2]) {
763                                                                             continue HAMMER;
764                                                                         }
765                                                                         break HAMMER;
766                                                                     }
767                                                                     if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
768                                                                         continue HAMMER;
769                                                                     }
770                                                                     break HAMMER;
771                                                                 }
772                                                                 if (quadrant[i1 + 1] > quadrant[i2 + 1]) {
773                                                                     continue HAMMER;
774                                                                 }
775                                                                 break HAMMER;
776                                                             }
777                                                             if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
778                                                                 continue HAMMER;
779                                                             }
780                                                             break HAMMER;
781                                                         }
782                                                         if (quadrant[i1] > quadrant[i2]) {
783                                                             continue HAMMER;
784                                                         }
785                                                         break HAMMER;
786                                                     }
787                                                     if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
788                                                         continue HAMMER;
789                                                     }
790                                                     break HAMMER;
791 
792                                                 }
793                                                 break HAMMER;
794                                             } // while x > 0
795                                             if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
796                                                 continue HAMMER;
797                                             }
798                                             break HAMMER;
799                                         }
800                                         if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
801                                             continue HAMMER;
802                                         }
803                                         break HAMMER;
804                                     }
805                                     if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
806                                         continue HAMMER;
807                                     }
808                                     break HAMMER;
809                                 }
810                                 if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
811                                     continue HAMMER;
812                                 }
813                                 break HAMMER;
814                             }
815                             if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
816                                 continue HAMMER;
817                             }
818                             break HAMMER;
819                         }
820                         if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
821                             continue HAMMER;
822                         }
823                         break HAMMER;
824 
825                     } // HAMMER
826                       // end inline mainGTU
827 
828                     fmap[j] = v;
829                 }
830 
831                 if (firstAttemptShadow && i <= hi && workDoneShadow > workLimitShadow) {
832                     break HP;
833                 }
834             }
835         }
836 
837         this.workDone = workDoneShadow;
838         return firstAttemptShadow && workDoneShadow > workLimitShadow;
839     }
840 
841     void mainSort(final BZip2CompressorOutputStream.Data dataShadow, final int lastShadow) {
842         final int[] runningOrder = this.mainSort_runningOrder;
843         final int[] copy = this.mainSort_copy;
844         final boolean[] bigDone = this.mainSort_bigDone;
845         final int[] ftab = this.ftab;
846         final byte[] block = dataShadow.block;
847         final int[] fmap = dataShadow.fmap;
848         final char[] quadrant = this.quadrant;
849         final int workLimitShadow = this.workLimit;
850         final boolean firstAttemptShadow = this.firstAttempt;
851 
852         // LBZ2: Set up the 2-byte frequency table
853         Arrays.fill(ftab, 0);
854 
855         /*
856          * In the various block-sized structures, live data runs from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area for block.
857          */
858         for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
859             block[lastShadow + i + 2] = block[i % (lastShadow + 1) + 1];
860         }
861         for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES + 1; --i >= 0;) {
862             quadrant[i] = 0;
863         }
864         block[0] = block[lastShadow + 1];
865 
866         // LBZ2: Complete the initial radix sort:
867 
868         int c1 = block[0] & 0xff;
869         for (int i = 0; i <= lastShadow; i++) {
870             final int c2 = block[i + 1] & 0xff;
871             ftab[(c1 << 8) + c2]++;
872             c1 = c2;
873         }
874 
875         for (int i = 1; i <= 65536; i++) {
876             ftab[i] += ftab[i - 1];
877         }
878 
879         c1 = block[1] & 0xff;
880         for (int i = 0; i < lastShadow; i++) {
881             final int c2 = block[i + 2] & 0xff;
882             fmap[--ftab[(c1 << 8) + c2]] = i;
883             c1 = c2;
884         }
885 
886         fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
887 
888         /*
889          * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the running order, from smallest to largest big bucket.
890          */
891         for (int i = 256; --i >= 0;) {
892             bigDone[i] = false;
893             runningOrder[i] = i;
894         }
895 
896         // h = 364, 121, 40, 13, 4, 1
897         for (int h = 364; h != 1;) { // NOSONAR
898             h /= 3;
899             for (int i = h; i <= 255; i++) {
900                 final int vv = runningOrder[i];
901                 final int a = ftab[vv + 1 << 8] - ftab[vv << 8];
902                 final int b = h - 1;
903                 int j = i;
904                 for (int ro = runningOrder[j - h]; ftab[ro + 1 << 8] - ftab[ro << 8] > a; ro = runningOrder[j - h]) {
905                     runningOrder[j] = ro;
906                     j -= h;
907                     if (j <= b) {
908                         break;
909                     }
910                 }
911                 runningOrder[j] = vv;
912             }
913         }
914 
915         /*
916          * LBZ2: The main sorting loop.
917          */
918         for (int i = 0; i <= 255; i++) {
919             /*
920              * LBZ2: Process big buckets, starting with the least full.
921              */
922             final int ss = runningOrder[i];
923 
924             // Step 1:
925             /*
926              * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small buckets [ss, j]. Hopefully previous pointer-scanning phases have already
927              * completed many of the small buckets [ss, j], so we don't have to sort them at all.
928              */
929             for (int j = 0; j <= 255; j++) {
930                 final int sb = (ss << 8) + j;
931                 final int ftab_sb = ftab[sb];
932                 if ((ftab_sb & SETMASK) != SETMASK) {
933                     final int lo = ftab_sb & CLEARMASK;
934                     final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
935                     if (hi > lo) {
936                         mainQSort3(dataShadow, lo, hi, 2, lastShadow);
937                         if (firstAttemptShadow && this.workDone > workLimitShadow) {
938                             return;
939                         }
940                     }
941                     ftab[sb] = ftab_sb | SETMASK;
942                 }
943             }
944 
945             // Step 2:
946             // LBZ2: Now scan this big bucket so as to synthesise the
947             // sorted order for small buckets [t, ss] for all t != ss.
948 
949             for (int j = 0; j <= 255; j++) {
950                 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
951             }
952 
953             for (int j = ftab[ss << 8] & CLEARMASK, hj = ftab[ss + 1 << 8] & CLEARMASK; j < hj; j++) {
954                 final int fmap_j = fmap[j];
955                 c1 = block[fmap_j] & 0xff;
956                 if (!bigDone[c1]) {
957                     fmap[copy[c1]] = fmap_j == 0 ? lastShadow : fmap_j - 1;
958                     copy[c1]++;
959                 }
960             }
961 
962             for (int j = 256; --j >= 0;) {
963                 ftab[(j << 8) + ss] |= SETMASK;
964             }
965 
966             // Step 3:
967             /*
968              * LBZ2: The ss big bucket is now done. Record this fact, and update the quadrant descriptors. Remember to update quadrants in the overshoot area
969              * too, if necessary. The "if (i < 255)" test merely skips this updating for the last bucket processed, since updating for the last bucket is
970              * pointless.
971              */
972             bigDone[ss] = true;
973 
974             if (i < 255) {
975                 final int bbStart = ftab[ss << 8] & CLEARMASK;
976                 final int bbSize = (ftab[ss + 1 << 8] & CLEARMASK) - bbStart;
977                 int shifts = 0;
978 
979                 while (bbSize >> shifts > 65534) {
980                     shifts++;
981                 }
982 
983                 for (int j = 0; j < bbSize; j++) {
984                     final int a2update = fmap[bbStart + j];
985                     final char qVal = (char) (j >> shifts);
986                     quadrant[a2update] = qVal;
987                     if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
988                         quadrant[a2update + lastShadow + 1] = qVal;
989                     }
990                 }
991             }
992 
993         }
994     }
995 
996 }