001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.sequence;
018
019import java.util.List;
020
021import org.apache.commons.collections4.Equator;
022import org.apache.commons.collections4.functors.DefaultEquator;
023
024/**
025 * This class allows to compare two objects sequences.
026 * <p>
027 * The two sequences can hold any object type, as only the <code>equals</code>
028 * method is used to compare the elements of the sequences. It is guaranteed
029 * that the comparisons will always be done as <code>o1.equals(o2)</code> where
030 * <code>o1</code> belongs to the first sequence and <code>o2</code> belongs to
031 * the second sequence. This can be important if subclassing is used for some
032 * elements in the first sequence and the <code>equals</code> method is
033 * specialized.
034 * <p>
035 * Comparison can be seen from two points of view: either as giving the smallest
036 * modification allowing to transform the first sequence into the second one, or
037 * as giving the longest sequence which is a subsequence of both initial
038 * sequences. The <code>equals</code> method is used to compare objects, so any
039 * object can be put into sequences. Modifications include deleting, inserting
040 * or keeping one object, starting from the beginning of the first sequence.
041 * <p>
042 * This class implements the comparison algorithm, which is the very efficient
043 * algorithm from Eugene W. Myers
044 * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
045 * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
046 * the shortest possible
047 * {@link EditScript edit script}
048 * containing all the
049 * {@link EditCommand commands}
050 * needed to transform the first sequence into the second one.
051 *
052 * @see EditScript
053 * @see EditCommand
054 * @see CommandVisitor
055 *
056 * @since 4.0
057 * @version $Id: SequencesComparator.html 972421 2015-11-14 20:00:04Z tn $
058 */
059public class SequencesComparator<T> {
060
061    /** First sequence. */
062    private final List<T> sequence1;
063
064    /** Second sequence. */
065    private final List<T> sequence2;
066
067    /** The equator used for testing object equality. */
068    private final Equator<? super T> equator;
069
070    /** Temporary variables. */
071    private final int[] vDown;
072    private final int[] vUp;
073
074    /**
075     * Simple constructor.
076     * <p>
077     * Creates a new instance of SequencesComparator using a {@link DefaultEquator}.
078     * <p>
079     * It is <em>guaranteed</em> that the comparisons will always be done as
080     * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
081     * sequence and <code>o2</code> belongs to the second sequence. This can be
082     * important if subclassing is used for some elements in the first sequence
083     * and the <code>equals</code> method is specialized.
084     *
085     * @param sequence1  first sequence to be compared
086     * @param sequence2  second sequence to be compared
087     */
088    public SequencesComparator(final List<T> sequence1, final List<T> sequence2) {
089        this(sequence1, sequence2, DefaultEquator.defaultEquator());
090    }
091
092    /**
093     * Simple constructor.
094     * <p>
095     * Creates a new instance of SequencesComparator with a custom {@link Equator}.
096     * <p>
097     * It is <em>guaranteed</em> that the comparisons will always be done as
098     * <code>Equator.equate(o1, o2)</code> where <code>o1</code> belongs to the first
099     * sequence and <code>o2</code> belongs to the second sequence.
100     *
101     * @param sequence1  first sequence to be compared
102     * @param sequence2  second sequence to be compared
103     * @param equator  the equator to use for testing object equality
104     */
105    public SequencesComparator(final List<T> sequence1, final List<T> sequence2, final Equator<? super T> equator) {
106        this.sequence1 = sequence1;
107        this.sequence2 = sequence2;
108        this.equator = equator;
109
110        final int size = sequence1.size() + sequence2.size() + 2;
111        vDown = new int[size];
112        vUp   = new int[size];
113    }
114
115    /**
116     * Get the {@link EditScript} object.
117     * <p>
118     * It is guaranteed that the objects embedded in the {@link InsertCommand
119     * insert commands} come from the second sequence and that the objects
120     * embedded in either the {@link DeleteCommand delete commands} or
121     * {@link KeepCommand keep commands} come from the first sequence. This can
122     * be important if subclassing is used for some elements in the first
123     * sequence and the <code>equals</code> method is specialized.
124     *
125     * @return the edit script resulting from the comparison of the two
126     *         sequences
127     */
128    public EditScript<T> getScript() {
129        final EditScript<T> script = new EditScript<T>();
130        buildScript(0, sequence1.size(), 0, sequence2.size(), script);
131        return script;
132    }
133
134    /**
135     * Build a snake.
136     *
137     * @param start  the value of the start of the snake
138     * @param diag  the value of the diagonal of the snake
139     * @param end1  the value of the end of the first sequence to be compared
140     * @param end2  the value of the end of the second sequence to be compared
141     * @return the snake built
142     */
143    private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
144        int end = start;
145        while (end - diag < end2
146                && end < end1
147                && equator.equate(sequence1.get(end), sequence2.get(end - diag))) {
148            ++end;
149        }
150        return new Snake(start, end, diag);
151    }
152
153    /**
154     * Get the middle snake corresponding to two subsequences of the
155     * main sequences.
156     * <p>
157     * The snake is found using the MYERS Algorithm (this algorithms has
158     * also been implemented in the GNU diff program). This algorithm is
159     * explained in Eugene Myers article:
160     * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
161     * An O(ND) Difference Algorithm and Its Variations</a>.
162     *
163     * @param start1  the begin of the first sequence to be compared
164     * @param end1  the end of the first sequence to be compared
165     * @param start2  the begin of the second sequence to be compared
166     * @param end2  the end of the second sequence to be compared
167     * @return the middle snake
168     */
169    private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) {
170        // Myers Algorithm
171        // Initialisations
172        final int m = end1 - start1;
173        final int n = end2 - start2;
174        if (m == 0 || n == 0) {
175            return null;
176        }
177
178        final int delta  = m - n;
179        final int sum    = n + m;
180        final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
181        vDown[1+offset] = start1;
182        vUp[1+offset]   = end1 + 1;
183
184        for (int d = 0; d <= offset ; ++d) {
185            // Down
186            for (int k = -d; k <= d; k += 2) {
187                // First step
188
189                final int i = k + offset;
190                if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
191                    vDown[i] = vDown[i+1];
192                } else {
193                    vDown[i] = vDown[i-1] + 1;
194                }
195
196                int x = vDown[i];
197                int y = x - start1 + start2 - k;
198
199                while (x < end1 && y < end2 && equator.equate(sequence1.get(x), sequence2.get(y))) {
200                    vDown[i] = ++x;
201                    ++y;
202                }
203                // Second step
204                if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
205                    if (vUp[i-delta] <= vDown[i]) {
206                        return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
207                    }
208                }
209            }
210
211            // Up
212            for (int k = delta - d; k <= delta + d; k += 2) {
213                // First step
214                final int i = k + offset - delta;
215                if (k == delta - d
216                        || k != delta + d && vUp[i+1] <= vUp[i-1]) {
217                    vUp[i] = vUp[i+1] - 1;
218                } else {
219                    vUp[i] = vUp[i-1];
220                }
221
222                int x = vUp[i] - 1;
223                int y = x - start1 + start2 - k;
224                while (x >= start1 && y >= start2
225                        && equator.equate(sequence1.get(x), sequence2.get(y))) {
226                    vUp[i] = x--;
227                    y--;
228                }
229                // Second step
230                if (delta % 2 == 0 && -d <= k && k <= d ) {
231                    if (vUp[i] <= vDown[i + delta]) {
232                        return buildSnake(vUp[i], k + start1 - start2, end1, end2);
233                    }
234                }
235            }
236        }
237
238        // this should not happen
239        throw new RuntimeException("Internal Error");
240    }
241
242
243    /**
244     * Build an edit script.
245     *
246     * @param start1  the begin of the first sequence to be compared
247     * @param end1  the end of the first sequence to be compared
248     * @param start2  the begin of the second sequence to be compared
249     * @param end2  the end of the second sequence to be compared
250     * @param script the edited script
251     */
252    private void buildScript(final int start1, final int end1, final int start2, final int end2,
253                             final EditScript<T> script) {
254
255        final Snake middle = getMiddleSnake(start1, end1, start2, end2);
256
257        if (middle == null
258                || middle.getStart() == end1 && middle.getDiag() == end1 - end2
259                || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
260
261            int i = start1;
262            int j = start2;
263            while (i < end1 || j < end2) {
264                if (i < end1 && j < end2 && equator.equate(sequence1.get(i), sequence2.get(j))) {
265                    script.append(new KeepCommand<T>(sequence1.get(i)));
266                    ++i;
267                    ++j;
268                } else {
269                    if (end1 - start1 > end2 - start2) {
270                        script.append(new DeleteCommand<T>(sequence1.get(i)));
271                        ++i;
272                    } else {
273                        script.append(new InsertCommand<T>(sequence2.get(j)));
274                        ++j;
275                    }
276                }
277            }
278
279        } else {
280
281            buildScript(start1, middle.getStart(),
282                        start2, middle.getStart() - middle.getDiag(),
283                        script);
284            for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
285                script.append(new KeepCommand<T>(sequence1.get(i)));
286            }
287            buildScript(middle.getEnd(), end1,
288                        middle.getEnd() - middle.getDiag(), end2,
289                        script);
290        }
291    }
292
293    /**
294     * This class is a simple placeholder to hold the end part of a path
295     * under construction in a {@link SequencesComparator SequencesComparator}.
296     */
297    private static class Snake {
298
299        /** Start index. */
300        private final int start;
301
302        /** End index. */
303        private final int end;
304
305        /** Diagonal number. */
306        private final int diag;
307
308        /**
309         * Simple constructor. Creates a new instance of Snake with specified indices.
310         *
311         * @param start  start index of the snake
312         * @param end  end index of the snake
313         * @param diag  diagonal number
314         */
315        public Snake(final int start, final int end, final int diag) {
316            this.start = start;
317            this.end   = end;
318            this.diag  = diag;
319        }
320
321        /**
322         * Get the start index of the snake.
323         *
324         * @return start index of the snake
325         */
326        public int getStart() {
327            return start;
328        }
329
330        /**
331         * Get the end index of the snake.
332         *
333         * @return end index of the snake
334         */
335        public int getEnd() {
336            return end;
337        }
338
339        /**
340         * Get the diagonal number of the snake.
341         *
342         * @return diagonal number of the snake
343         */
344        public int getDiag() {
345            return diag;
346        }
347    }
348}