View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.util.BitSet;
22  
23  /**
24   * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link
25   * BZip2CompressorOutputStream}.
26   *
27   * <p>This class is based on a Java port of Julian Seward's
28   * blocksort.c in his libbzip2</p>
29   *
30   * <p>The Burrows-Wheeler transform is a reversible transform of the
31   * original data that is supposed to group similar bytes close to
32   * each other.  The idea is to sort all permutations of the input and
33   * only keep the last byte of each permutation.  E.g. for "Commons
34   * Compress" you'd get:</p>
35   *
36   * <pre>
37   *  CompressCommons
38   * Commons Compress
39   * CompressCommons 
40   * essCommons Compr
41   * mmons CompressCo
42   * mons CompressCom
43   * mpressCommons Co
44   * ns CompressCommo
45   * ommons CompressC
46   * ompressCommons C
47   * ons CompressComm
48   * pressCommons Com
49   * ressCommons Comp
50   * s CompressCommon
51   * sCommons Compres
52   * ssCommons Compre
53   * </pre>
54   *
55   * <p>Which results in a new text "ss romooCCmmpnse", in adition the
56   * index of the first line that contained the original text is kept -
57   * in this case it is 1.  The idea is that in a long English text all
58   * permutations that start with "he" are likely suffixes of a "the" and
59   * thus they end in "t" leading to a larger block of "t"s that can
60   * better be compressed by the subsequent Move-to-Front, run-length
61   * und Huffman encoding steps.</p>
62   *
63   * <p>For more information see for example:</p>
64   * <ul>
65   *   <li><a
66   *   href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows,
67   *   M. and Wheeler, D.: A Block-sorting Lossless Data Compression
68   *   Algorithm</a></li>
69   *   <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and
70   *   Myers, G.: Suffix arrays: A new method for on-line string
71   *   searches</a></li>
72   *   <li><a
73   *   href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley,
74   *   J.L. and Sedgewick, R.: Fast Algorithms for Sorting and Searching
75   *   Strings</a></li>
76   * </ul>
77   *
78   * @NotThreadSafe
79   */
80  class BlockSort {
81  
82      /*
83       * Some of the constructs used in the C code cannot be ported
84       * literally to Java - for example macros, unsigned types.  Some
85       * code has been hand-tuned to improve performance.  In order to
86       * avoid memory pressure some structures are reused for several
87       * blocks and some memory is even shared between sorting and the
88       * MTF stage even though either algorithm uses it for its own
89       * purpose.
90       *
91       * Comments preserved from the actual C code are prefixed with
92       * "LBZ2:".
93       */
94  
95      /*
96       * 2012-05-20 Stefan Bodewig:
97       *
98       * This class seems to mix several revisions of libbzip2's code.
99       * The mainSort function and those used by it look closer to the
100      * 0.9.5 version but show some variations introduced later.  At
101      * the same time the logic of Compress 1.4 to randomize the block
102      * on bad input has been dropped after libbzip2 0.9.0 and replaced
103      * by a fallback sorting algorithm.
104      *
105      * I've added the fallbackSort function of 1.0.6 and tried to
106      * integrate it with the existing code without touching too much.
107      * I've also removed the now unused randomization code.
108      */
109 
110     /*
111      * LBZ2: If you are ever unlucky/improbable enough to get a stack
112      * overflow whilst sorting, increase the following constant and
113      * try again. In practice I have never seen the stack go above 27
114      * elems, so the following limit seems very generous.
115      */
116     private static final int QSORT_STACK_SIZE = 1000;
117 
118     private static final int FALLBACK_QSORT_STACK_SIZE = 100;
119 
120     private static final int STACK_SIZE =
121         QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE
122         ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE;
123 
124     /*
125      * Used when sorting. If too many long comparisons happen, we stop sorting,
126      * and use fallbackSort instead.
127      */
128     private int workDone;
129     private int workLimit;
130     private boolean firstAttempt;
131 
132     private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte
133     private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte
134     private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
135 
136     private final int[] mainSort_runningOrder = new int[256]; // 1024 byte
137     private final int[] mainSort_copy = new int[256]; // 1024 byte
138     private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
139 
140     private final int[] ftab = new int[65537]; // 262148 byte
141 
142     /**
143      * Array instance identical to Data's sfmap, both are used only
144      * temporarily and indepently, so we do not need to allocate
145      * additional memory.
146      */
147     private final char[] quadrant;
148 
149     BlockSort(final BZip2CompressorOutputStream.Data data) {
150         this.quadrant = data.sfmap;
151     }
152 
153     void blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
154         this.workLimit = WORK_FACTOR * last;
155         this.workDone = 0;
156         this.firstAttempt = true;
157 
158         if (last + 1 < 10000) {
159             fallbackSort(data, last);
160         } else {
161             mainSort(data, last);
162 
163             if (this.firstAttempt && (this.workDone > this.workLimit)) {
164                 fallbackSort(data, last);
165             }
166         }
167 
168         final int[] fmap = data.fmap;
169         data.origPtr = -1;
170         for (int i = 0; i <= last; i++) {
171             if (fmap[i] == 0) {
172                 data.origPtr = i;
173                 break;
174             }
175         }
176 
177         // assert (data.origPtr != -1) : data.origPtr;
178     }
179 
180     /**
181      * Adapt fallbackSort to the expected interface of the rest of the
182      * code, in particular deal with the fact that block starts at
183      * offset 1 (in libbzip2 1.0.6 it starts at 0).
184      */
185     final void fallbackSort(final BZip2CompressorOutputStream.Data data,
186                             final int last) {
187         data.block[0] = data.block[last + 1];
188         fallbackSort(data.fmap, data.block, last + 1);
189         for (int i = 0; i < last + 1; i++) {
190             --data.fmap[i];
191         }
192         for (int i = 0; i < last + 1; i++) {
193             if (data.fmap[i] == -1) {
194                 data.fmap[i] = last;
195                 break;
196             }
197         }
198     }
199 
200 /*---------------------------------------------*/
201 
202 /*---------------------------------------------*/
203 /*--- LBZ2: Fallback O(N log(N)^2) sorting        ---*/
204 /*--- algorithm, for repetitive blocks      ---*/
205 /*---------------------------------------------*/
206 
207     /*
208      * This is the fallback sorting algorithm libbzip2 1.0.6 uses for
209      * repetitive or very short inputs.
210      *
211      * The idea is inspired by Manber-Myers string suffix sorting
212      * algorithm.  First a bucket sort places each permutation of the
213      * block into a bucket based on its first byte.  Permutations are
214      * represented by pointers to their first character kept in
215      * (partially) sorted order inside the array ftab.
216      *
217      * The next step visits all buckets in order and performs a
218      * quicksort on all permutations of the bucket based on the index
219      * of the bucket the second byte of the permutation belongs to,
220      * thereby forming new buckets.  When arrived here the
221      * permutations are sorted up to the second character and we have
222      * buckets of permutations that are identical up to two
223      * characters.
224      *
225      * Repeat the step of quicksorting each bucket, now based on the
226      * bucket holding the sequence of the third and forth character
227      * leading to four byte buckets.  Repeat this doubling of bucket
228      * sizes until all buckets only contain single permutations or the
229      * bucket size exceeds the block size.
230      *
231      * I.e.
232      *
233      * "abraba" form three buckets for the chars "a", "b", and "r" in
234      * the first step with
235      *
236      * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 }
237      *
238      * when looking at the bucket of "a"s the second characters are in
239      * the buckets that start with fmap-index 0 (rolled over), 3 and 3
240      * respectively, forming two new buckets "aa" and "ab", so we get
241      *
242      * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 }
243      *
244      * since the last bucket only contained a single item it didn't
245      * have to be sorted at all.
246      *
247      * There now is just one bucket with more than one permutation
248      * that remains to be sorted.  For the permutation that starts
249      * with index 3 the third and forth char are in bucket 'aa' at
250      * index 0 and for the one starting at block index 0 they are in
251      * bucket 'ra' with sort index 5.  The fully sorted order then becomes.
252      *
253      * fmap = { 5, 3, 0, 4, 1, 2 }
254      * 
255      */
256 
257     /**
258      * @param fmap points to the index of the starting point of a
259      *        permutation inside the block of data in the current
260      *        partially sorted order
261      * @param eclass points from the index of a character inside the
262      *        block to the first index in fmap that contains the
263      *        bucket of its suffix that is sorted in this step.
264      * @param lo lower boundary of the fmap-interval to be sorted 
265      * @param hi upper boundary of the fmap-interval to be sorted 
266      */
267     private void fallbackSimpleSort(final int[] fmap, 
268                                     final int[] eclass, 
269                                     final int lo, 
270                                     final int hi) {
271         if (lo == hi) {
272             return;
273         }
274 
275         int j;
276         if (hi - lo > 3) {
277             for (int i = hi - 4; i >= lo; i--) {
278                 final int tmp = fmap[i];
279                 final int ec_tmp = eclass[tmp];
280                 for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]];
281                      j += 4) {
282                     fmap[j - 4] = fmap[j];
283                 }
284                 fmap[j - 4] = tmp;
285             }
286         }
287 
288         for (int i = hi - 1; i >= lo; i--) {
289             final int tmp = fmap[i];
290             final int ec_tmp = eclass[tmp];
291             for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) {
292                 fmap[j - 1] = fmap[j];
293             }
294             fmap[j-1] = tmp;
295         }
296     }
297 
298     private static final int FALLBACK_QSORT_SMALL_THRESH = 10;
299 
300     /**
301      * swaps two values in fmap
302      */
303     private void fswap(final int[] fmap, final int zz1, final int zz2) {
304         final int zztmp = fmap[zz1];
305         fmap[zz1] = fmap[zz2];
306         fmap[zz2] = zztmp;
307     }
308 
309     /**
310      * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap.
311      */
312     private void fvswap(final int[] fmap, int yyp1, int yyp2, int yyn) {
313         while (yyn > 0) {
314             fswap(fmap, yyp1, yyp2);
315             yyp1++; yyp2++; yyn--;
316         }
317     }
318 
319     private int fmin(final int a, final int b) {
320         return a < b ? a : b;
321     }
322 
323     private void fpush(final int sp, final int lz, final int hz) {
324         stack_ll[sp] = lz;
325         stack_hh[sp] = hz;
326     }
327 
328     private int[] fpop(final int sp) {
329         return new int[] { stack_ll[sp], stack_hh[sp] };
330     }
331 
332     /**
333      * @param fmap points to the index of the starting point of a
334      *        permutation inside the block of data in the current
335      *        partially sorted order
336      * @param eclass points from the index of a character inside the
337      *        block to the first index in fmap that contains the
338      *        bucket of its suffix that is sorted in this step.
339      * @param loSt lower boundary of the fmap-interval to be sorted 
340      * @param hiSt upper boundary of the fmap-interval to be sorted 
341      */
342     private void fallbackQSort3(final int[] fmap, 
343                                 final int[] eclass, 
344                                 final int loSt, 
345                                 final int hiSt) {
346         int lo, unLo, ltLo, hi, unHi, gtHi, n;
347 
348         long r = 0;
349         int sp = 0;
350         fpush(sp++, loSt, hiSt);
351 
352         while (sp > 0) {
353             final int[] s = fpop(--sp);
354             lo = s[0]; hi = s[1];
355 
356             if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
357                 fallbackSimpleSort(fmap, eclass, lo, hi);
358                 continue;
359             }
360 
361             /* LBZ2: Random partitioning.  Median of 3 sometimes fails to
362                avoid bad cases.  Median of 9 seems to help but 
363                looks rather expensive.  This too seems to work but
364                is cheaper.  Guidance for the magic constants 
365                7621 and 32768 is taken from Sedgewick's algorithms
366                book, chapter 35.
367             */
368             r = ((r * 7621) + 1) % 32768;
369             final long r3 = r % 3;
370             long med;
371             if (r3 == 0) {
372                 med = eclass[fmap[lo]]; 
373             } else if (r3 == 1) {
374                 med = eclass[fmap[(lo + hi) >>> 1]];
375             } else {
376                 med = eclass[fmap[hi]];
377             }
378 
379             unLo = ltLo = lo;
380             unHi = gtHi = hi;
381 
382             // looks like the ternary partition attributed to Wegner
383             // in the cited Sedgewick paper
384             while (true) {
385                 while (true) {
386                     if (unLo > unHi) {
387                         break;
388                     }
389                     n = eclass[fmap[unLo]] - (int) med;
390                     if (n == 0) { 
391                         fswap(fmap, unLo, ltLo); 
392                         ltLo++; unLo++; 
393                         continue; 
394                     }
395                     if (n > 0) {
396                         break;
397                     }
398                     unLo++;
399                 }
400                 while (true) {
401                     if (unLo > unHi) {
402                         break;
403                     }
404                     n = eclass[fmap[unHi]] - (int) med;
405                     if (n == 0) {
406                         fswap(fmap, unHi, gtHi); 
407                         gtHi--; unHi--; 
408                         continue; 
409                     }
410                     if (n < 0) {
411                         break;
412                     }
413                     unHi--;
414                 }
415                 if (unLo > unHi) {
416                     break;
417                 }
418                 fswap(fmap, unLo, unHi); unLo++; unHi--;
419             }
420 
421             if (gtHi < ltLo) {
422                 continue;
423             }
424 
425             n = fmin(ltLo - lo, unLo - ltLo);
426             fvswap(fmap, lo, unLo - n, n);
427             int m = fmin(hi - gtHi, gtHi - unHi);
428             fvswap(fmap, unHi + 1, hi - m + 1, m);
429 
430             n = lo + unLo - ltLo - 1;
431             m = hi - (gtHi - unHi) + 1;
432 
433             if (n - lo > hi - m) {
434                 fpush(sp++, lo, n);
435                 fpush(sp++, m, hi);
436             } else {
437                 fpush(sp++, m, hi);
438                 fpush(sp++, lo, n);
439             }
440         }
441     }
442 
443 
444 /*---------------------------------------------*/
445 
446     private int[] eclass;
447 
448     private int[] getEclass() {
449         if (eclass == null) {
450             eclass = new int[quadrant.length / 2];
451         }
452         return eclass;
453     }
454 
455     /*
456      * The C code uses an array of ints (each int holding 32 flags) to
457      * represents the bucket-start flags (bhtab).  It also contains
458      * optimizations to skip over 32 consecutively set or
459      * consecutively unset bits on word boundaries at once.  For now
460      * I've chosen to use the simpler but potentially slower code
461      * using BitSet - also in the hope that using the BitSet#nextXXX
462      * methods may be fast enough.
463      */
464 
465     /**
466      * @param fmap points to the index of the starting point of a
467      *        permutation inside the block of data in the current
468      *        partially sorted order
469      * @param block the original data
470      * @param nblock size of the block
471      * @param off offset of first byte to sort in block
472      */
473     final void fallbackSort(final int[] fmap, final byte[] block, final int nblock) {
474         final int[] ftab = new int[257];
475         int H, i, j, k, l, r, cc, cc1;
476         int nNotDone;
477         int nBhtab;
478         final int[] eclass = getEclass();
479 
480         for (i = 0; i < nblock; i++) {
481             eclass[i] = 0;
482         }
483         /*--
484           LBZ2: Initial 1-char radix sort to generate
485           initial fmap and initial BH bits.
486           --*/
487         for (i = 0; i < nblock; i++) {
488             ftab[block[i] & 0xff]++;
489         }
490         for (i = 1; i < 257;    i++) {
491             ftab[i] += ftab[i - 1];
492         }
493 
494         for (i = 0; i < nblock; i++) {
495             j = block[i] & 0xff;
496             k = ftab[j] - 1;
497             ftab[j] = k;
498             fmap[k] = i;
499         }
500 
501         nBhtab = 64 + nblock;
502         final BitSet bhtab = new BitSet(nBhtab);
503         for (i = 0; i < 256; i++) {
504             bhtab.set(ftab[i]);
505         }
506 
507         /*--
508           LBZ2: Inductively refine the buckets.  Kind-of an
509           "exponential radix sort" (!), inspired by the
510           Manber-Myers suffix array construction algorithm.
511           --*/
512 
513         /*-- LBZ2: set sentinel bits for block-end detection --*/
514         for (i = 0; i < 32; i++) { 
515             bhtab.set(nblock + 2 * i);
516             bhtab.clear(nblock + 2 * i + 1);
517         }
518 
519         /*-- LBZ2: the log(N) loop --*/
520         H = 1;
521         while (true) {
522 
523             j = 0;
524             for (i = 0; i < nblock; i++) {
525                 if (bhtab.get(i)) {
526                     j = i;
527                 }
528                 k = fmap[i] - H;
529                 if (k < 0) {
530                     k += nblock;
531                 }
532                 eclass[k] = j;
533             }
534 
535             nNotDone = 0;
536             r = -1;
537             while (true) {
538 
539                 /*-- LBZ2: find the next non-singleton bucket --*/
540                 k = r + 1;
541                 k = bhtab.nextClearBit(k);
542                 l = k - 1;
543                 if (l >= nblock) {
544                     break;
545                 }
546                 k = bhtab.nextSetBit(k + 1);
547                 r = k - 1;
548                 if (r >= nblock) {
549                     break;
550                 }
551 
552                 /*-- LBZ2: now [l, r] bracket current bucket --*/
553                 if (r > l) {
554                     nNotDone += (r - l + 1);
555                     fallbackQSort3(fmap, eclass, l, r);
556 
557                     /*-- LBZ2: scan bucket and generate header bits-- */
558                     cc = -1;
559                     for (i = l; i <= r; i++) {
560                         cc1 = eclass[fmap[i]];
561                         if (cc != cc1) {
562                             bhtab.set(i);
563                             cc = cc1;
564                         }
565                     }
566                 }
567             }
568 
569             H *= 2;
570             if (H > nblock || nNotDone == 0) {
571                 break;
572             }
573         }
574     }
575 
576 /*---------------------------------------------*/
577 
578     /*
579      * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here.
580      * Possibly because the number of elems to sort is usually small, typically
581      * &lt;= 20.
582      */
583     private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
584                                         9841, 29524, 88573, 265720, 797161,
585                                         2391484 };
586 
587     /**
588      * This is the most hammered method of this class.
589      *
590      * <p>
591      * This is the version using unrolled loops. Normally I never use such ones
592      * in Java code. The unrolling has shown a noticable performance improvement
593      * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
594      * JIT compiler of the vm.
595      * </p>
596      */
597     private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow,
598                                    final int lo, final int hi, final int d,
599                                    final int lastShadow) {
600         final int bigN = hi - lo + 1;
601         if (bigN < 2) {
602             return this.firstAttempt && (this.workDone > this.workLimit);
603         }
604 
605         int hp = 0;
606         while (INCS[hp] < bigN) {
607             hp++;
608         }
609 
610         final int[] fmap = dataShadow.fmap;
611         final char[] quadrant = this.quadrant;
612         final byte[] block = dataShadow.block;
613         final int lastPlus1 = lastShadow + 1;
614         final boolean firstAttemptShadow = this.firstAttempt;
615         final int workLimitShadow = this.workLimit;
616         int workDoneShadow = this.workDone;
617 
618         // Following block contains unrolled code which could be shortened by
619         // coding it in additional loops.
620 
621         HP: while (--hp >= 0) {
622             final int h = INCS[hp];
623             final int mj = lo + h - 1;
624 
625             for (int i = lo + h; i <= hi;) {
626                 // copy
627                 for (int k = 3; (i <= hi) && (--k >= 0); i++) {
628                     final int v = fmap[i];
629                     final int vd = v + d;
630                     int j = i;
631 
632                     // for (int a;
633                     // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
634                     // block, quadrant, lastShadow);
635                     // j -= h) {
636                     // fmap[j] = a;
637                     // }
638                     //
639                     // unrolled version:
640 
641                     // start inline mainGTU
642                     boolean onceRunned = false;
643                     int a = 0;
644 
645                     HAMMER: while (true) {
646                         if (onceRunned) {
647                             fmap[j] = a;
648                             if ((j -= h) <= mj) { //NOSONAR
649                                 break HAMMER;
650                             }
651                         } else {
652                             onceRunned = true;
653                         }
654 
655                         a = fmap[j - h];
656                         int i1 = a + d;
657                         int i2 = vd;
658 
659                         // following could be done in a loop, but
660                         // unrolled it for performance:
661                         if (block[i1 + 1] == block[i2 + 1]) {
662                             if (block[i1 + 2] == block[i2 + 2]) {
663                                 if (block[i1 + 3] == block[i2 + 3]) {
664                                     if (block[i1 + 4] == block[i2 + 4]) {
665                                         if (block[i1 + 5] == block[i2 + 5]) {
666                                             if (block[(i1 += 6)] == block[(i2 += 6)]) { //NOSONAR
667                                                 int x = lastShadow;
668                                                 X: while (x > 0) {
669                                                     x -= 4;
670 
671                                                     if (block[i1 + 1] == block[i2 + 1]) {
672                                                         if (quadrant[i1] == quadrant[i2]) {
673                                                             if (block[i1 + 2] == block[i2 + 2]) {
674                                                                 if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
675                                                                     if (block[i1 + 3] == block[i2 + 3]) {
676                                                                         if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
677                                                                             if (block[i1 + 4] == block[i2 + 4]) {
678                                                                                 if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
679                                                                                     if ((i1 += 4) >= lastPlus1) { //NOSONAR
680                                                                                         i1 -= lastPlus1;
681                                                                                     }
682                                                                                     if ((i2 += 4) >= lastPlus1) { //NOSONAR
683                                                                                         i2 -= lastPlus1;
684                                                                                     }
685                                                                                     workDoneShadow++;
686                                                                                     continue X;
687                                                                                 } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
688                                                                                     continue HAMMER;
689                                                                                 } else {
690                                                                                     break HAMMER;
691                                                                                 }
692                                                                             } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
693                                                                                 continue HAMMER;
694                                                                             } else {
695                                                                                 break HAMMER;
696                                                                             }
697                                                                         } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
698                                                                             continue HAMMER;
699                                                                         } else {
700                                                                             break HAMMER;
701                                                                         }
702                                                                     } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
703                                                                         continue HAMMER;
704                                                                     } else {
705                                                                         break HAMMER;
706                                                                     }
707                                                                 } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
708                                                                     continue HAMMER;
709                                                                 } else {
710                                                                     break HAMMER;
711                                                                 }
712                                                             } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
713                                                                 continue HAMMER;
714                                                             } else {
715                                                                 break HAMMER;
716                                                             }
717                                                         } else if ((quadrant[i1] > quadrant[i2])) {
718                                                             continue HAMMER;
719                                                         } else {
720                                                             break HAMMER;
721                                                         }
722                                                     } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
723                                                         continue HAMMER;
724                                                     } else {
725                                                         break HAMMER;
726                                                     }
727 
728                                                 }
729                                                 break HAMMER;
730                                             } // while x > 0
731                                             if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
732                                                 continue HAMMER;
733                                             }
734                                             break HAMMER;
735                                         } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
736                                             continue HAMMER;
737                                         } else {
738                                             break HAMMER;
739                                         }
740                                     } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
741                                         continue HAMMER;
742                                     } else {
743                                         break HAMMER;
744                                     }
745                                 } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
746                                     continue HAMMER;
747                                 } else {
748                                     break HAMMER;
749                                 }
750                             } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
751                                 continue HAMMER;
752                             } else {
753                                 break HAMMER;
754                             }
755                         } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
756                             continue HAMMER;
757                         } else {
758                             break HAMMER;
759                         }
760 
761                     } // HAMMER
762                     // end inline mainGTU
763 
764                     fmap[j] = v;
765                 }
766 
767                 if (firstAttemptShadow && (i <= hi)
768                     && (workDoneShadow > workLimitShadow)) {
769                     break HP;
770                 }
771             }
772         }
773 
774         this.workDone = workDoneShadow;
775         return firstAttemptShadow && (workDoneShadow > workLimitShadow);
776     }
777 
778 /*--
779    LBZ2: The following is an implementation of
780    an elegant 3-way quicksort for strings,
781    described in a paper "Fast Algorithms for
782    Sorting and Searching Strings", by Robert
783    Sedgewick and Jon L. Bentley.
784 --*/
785 
786     private static void vswap(final int[] fmap, int p1, int p2, int n) {
787         n += p1;
788         while (p1 < n) {
789             final int t = fmap[p1];
790             fmap[p1++] = fmap[p2];
791             fmap[p2++] = t;
792         }
793     }
794 
795     private static byte med3(final byte a, final byte b, final byte c) {
796         return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c
797                                                         : a);
798     }
799 
800     private static final int SMALL_THRESH = 20;
801     private static final int DEPTH_THRESH = 10;
802     private static final int WORK_FACTOR = 30;
803 
804     /**
805      * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
806      */
807     private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
808                             final int loSt, final int hiSt, final int dSt,
809                             final int last) {
810         final int[] stack_ll = this.stack_ll;
811         final int[] stack_hh = this.stack_hh;
812         final int[] stack_dd = this.stack_dd;
813         final int[] fmap = dataShadow.fmap;
814         final byte[] block = dataShadow.block;
815 
816         stack_ll[0] = loSt;
817         stack_hh[0] = hiSt;
818         stack_dd[0] = dSt;
819 
820         for (int sp = 1; --sp >= 0;) {
821             final int lo = stack_ll[sp];
822             final int hi = stack_hh[sp];
823             final int d = stack_dd[sp];
824 
825             if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
826                 if (mainSimpleSort(dataShadow, lo, hi, d, last)) {
827                     return;
828                 }
829             } else {
830                 final int d1 = d + 1;
831                 final int med = med3(block[fmap[lo] + d1],
832                                      block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff;
833 
834                 int unLo = lo;
835                 int unHi = hi;
836                 int ltLo = lo;
837                 int gtHi = hi;
838 
839                 while (true) {
840                     while (unLo <= unHi) {
841                         final int n = (block[fmap[unLo] + d1] & 0xff)
842                             - med;
843                         if (n == 0) {
844                             final int temp = fmap[unLo];
845                             fmap[unLo++] = fmap[ltLo];
846                             fmap[ltLo++] = temp;
847                         } else if (n < 0) {
848                             unLo++;
849                         } else {
850                             break;
851                         }
852                     }
853 
854                     while (unLo <= unHi) {
855                         final int n = (block[fmap[unHi] + d1] & 0xff)
856                             - med;
857                         if (n == 0) {
858                             final int temp = fmap[unHi];
859                             fmap[unHi--] = fmap[gtHi];
860                             fmap[gtHi--] = temp;
861                         } else if (n > 0) {
862                             unHi--;
863                         } else {
864                             break;
865                         }
866                     }
867 
868                     if (unLo <= unHi) {
869                         final int temp = fmap[unLo];
870                         fmap[unLo++] = fmap[unHi];
871                         fmap[unHi--] = temp;
872                     } else {
873                         break;
874                     }
875                 }
876 
877                 if (gtHi < ltLo) {
878                     stack_ll[sp] = lo;
879                     stack_hh[sp] = hi;
880                     stack_dd[sp] = d1;
881                     sp++;
882                 } else {
883                     int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
884                         : (unLo - ltLo);
885                     vswap(fmap, lo, unLo - n, n);
886                     int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
887                         : (gtHi - unHi);
888                     vswap(fmap, unLo, hi - m + 1, m);
889 
890                     n = lo + unLo - ltLo - 1;
891                     m = hi - (gtHi - unHi) + 1;
892 
893                     stack_ll[sp] = lo;
894                     stack_hh[sp] = n;
895                     stack_dd[sp] = d;
896                     sp++;
897 
898                     stack_ll[sp] = n + 1;
899                     stack_hh[sp] = m - 1;
900                     stack_dd[sp] = d1;
901                     sp++;
902 
903                     stack_ll[sp] = m;
904                     stack_hh[sp] = hi;
905                     stack_dd[sp] = d;
906                     sp++;
907                 }
908             }
909         }
910     }
911 
912     private static final int SETMASK = (1 << 21);
913     private static final int CLEARMASK = (~SETMASK);
914 
915     final void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
916                         final int lastShadow) {
917         final int[] runningOrder = this.mainSort_runningOrder;
918         final int[] copy = this.mainSort_copy;
919         final boolean[] bigDone = this.mainSort_bigDone;
920         final int[] ftab = this.ftab;
921         final byte[] block = dataShadow.block;
922         final int[] fmap = dataShadow.fmap;
923         final char[] quadrant = this.quadrant;
924         final int workLimitShadow = this.workLimit;
925         final boolean firstAttemptShadow = this.firstAttempt;
926 
927         // LBZ2: Set up the 2-byte frequency table
928         for (int i = 65537; --i >= 0;) {
929             ftab[i] = 0;
930         }
931 
932         /*
933          * In the various block-sized structures, live data runs from 0 to
934          * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
935          * for block.
936          */
937         for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
938             block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
939         }
940         for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) {
941             quadrant[i] = 0;
942         }
943         block[0] = block[lastShadow + 1];
944 
945         // LBZ2: Complete the initial radix sort:
946 
947         int c1 = block[0] & 0xff;
948         for (int i = 0; i <= lastShadow; i++) {
949             final int c2 = block[i + 1] & 0xff;
950             ftab[(c1 << 8) + c2]++;
951             c1 = c2;
952         }
953 
954         for (int i = 1; i <= 65536; i++) {
955             ftab[i] += ftab[i - 1];
956         }
957 
958         c1 = block[1] & 0xff;
959         for (int i = 0; i < lastShadow; i++) {
960             final int c2 = block[i + 2] & 0xff;
961             fmap[--ftab[(c1 << 8) + c2]] = i;
962             c1 = c2;
963         }
964 
965         fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
966 
967         /*
968          * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the
969          * running order, from smallest to largest big bucket.
970          */
971         for (int i = 256; --i >= 0;) {
972             bigDone[i] = false;
973             runningOrder[i] = i;
974         }
975 
976         // h = 364, 121, 40, 13, 4, 1
977         for (int h = 364; h != 1;) { //NOSONAR
978             h /= 3;
979             for (int i = h; i <= 255; i++) {
980                 final int vv = runningOrder[i];
981                 final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
982                 final int b = h - 1;
983                 int j = i;
984                 for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
985                                                                                                                 - h]) {
986                     runningOrder[j] = ro;
987                     j -= h;
988                     if (j <= b) {
989                         break;
990                     }
991                 }
992                 runningOrder[j] = vv;
993             }
994         }
995 
996         /*
997          * LBZ2: The main sorting loop.
998          */
999         for (int i = 0; i <= 255; i++) {
1000             /*
1001              * LBZ2: Process big buckets, starting with the least full.
1002              */
1003             final int ss = runningOrder[i];
1004 
1005             // Step 1:
1006             /*
1007              * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small
1008              * buckets [ss, j]. Hopefully previous pointer-scanning phases have
1009              * already completed many of the small buckets [ss, j], so we don't
1010              * have to sort them at all.
1011              */
1012             for (int j = 0; j <= 255; j++) {
1013                 final int sb = (ss << 8) + j;
1014                 final int ftab_sb = ftab[sb];
1015                 if ((ftab_sb & SETMASK) != SETMASK) {
1016                     final int lo = ftab_sb & CLEARMASK;
1017                     final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
1018                     if (hi > lo) {
1019                         mainQSort3(dataShadow, lo, hi, 2, lastShadow);
1020                         if (firstAttemptShadow
1021                             && (this.workDone > workLimitShadow)) {
1022                             return;
1023                         }
1024                     }
1025                     ftab[sb] = ftab_sb | SETMASK;
1026                 }
1027             }
1028 
1029             // Step 2:
1030             // LBZ2: Now scan this big bucket so as to synthesise the
1031             // sorted order for small buckets [t, ss] for all t != ss.
1032 
1033             for (int j = 0; j <= 255; j++) {
1034                 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
1035             }
1036 
1037             for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
1038                 final int fmap_j = fmap[j];
1039                 c1 = block[fmap_j] & 0xff;
1040                 if (!bigDone[c1]) {
1041                     fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
1042                     copy[c1]++;
1043                 }
1044             }
1045 
1046             for (int j = 256; --j >= 0;) {
1047                 ftab[(j << 8) + ss] |= SETMASK;
1048             }
1049 
1050             // Step 3:
1051             /*
1052              * LBZ2: The ss big bucket is now done. Record this fact, and update the
1053              * quadrant descriptors. Remember to update quadrants in the
1054              * overshoot area too, if necessary. The "if (i < 255)" test merely
1055              * skips this updating for the last bucket processed, since updating
1056              * for the last bucket is pointless.
1057              */
1058             bigDone[ss] = true;
1059 
1060             if (i < 255) {
1061                 final int bbStart = ftab[ss << 8] & CLEARMASK;
1062                 final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
1063                 int shifts = 0;
1064 
1065                 while ((bbSize >> shifts) > 65534) {
1066                     shifts++;
1067                 }
1068 
1069                 for (int j = 0; j < bbSize; j++) {
1070                     final int a2update = fmap[bbStart + j];
1071                     final char qVal = (char) (j >> shifts);
1072                     quadrant[a2update] = qVal;
1073                     if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
1074                         quadrant[a2update + lastShadow + 1] = qVal;
1075                     }
1076                 }
1077             }
1078 
1079         }
1080     }
1081 
1082 }