001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.sequence;
018
019import java.util.List;
020
021import org.apache.commons.collections4.Equator;
022import org.apache.commons.collections4.functors.DefaultEquator;
023
024/**
025 * This class allows to compare two objects sequences.
026 * <p>
027 * The two sequences can hold any object type, as only the <code>equals</code>
028 * method is used to compare the elements of the sequences. It is guaranteed
029 * that the comparisons will always be done as <code>o1.equals(o2)</code> where
030 * <code>o1</code> belongs to the first sequence and <code>o2</code> belongs to
031 * the second sequence. This can be important if subclassing is used for some
032 * elements in the first sequence and the <code>equals</code> method is
033 * specialized.
034 * <p>
035 * Comparison can be seen from two points of view: either as giving the smallest
036 * modification allowing to transform the first sequence into the second one, or
037 * as giving the longest sequence which is a subsequence of both initial
038 * sequences. The <code>equals</code> method is used to compare objects, so any
039 * object can be put into sequences. Modifications include deleting, inserting
040 * or keeping one object, starting from the beginning of the first sequence.
041 * <p>
042 * This class implements the comparison algorithm, which is the very efficient
043 * algorithm from Eugene W. Myers
044 * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
045 * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
046 * the shortest possible
047 * {@link EditScript edit script}
048 * containing all the
049 * {@link EditCommand commands}
050 * needed to transform the first sequence into the second one.
051 *
052 * @see EditScript
053 * @see EditCommand
054 * @see CommandVisitor
055 *
056 * @since 4.0
057 */
058public class SequencesComparator<T> {
059
060    /** First sequence. */
061    private final List<T> sequence1;
062
063    /** Second sequence. */
064    private final List<T> sequence2;
065
066    /** The equator used for testing object equality. */
067    private final Equator<? super T> equator;
068
069    /** Temporary variables. */
070    private final int[] vDown;
071    private final int[] vUp;
072
073    /**
074     * Simple constructor.
075     * <p>
076     * Creates a new instance of SequencesComparator using a {@link DefaultEquator}.
077     * <p>
078     * It is <em>guaranteed</em> that the comparisons will always be done as
079     * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
080     * sequence and <code>o2</code> belongs to the second sequence. This can be
081     * important if subclassing is used for some elements in the first sequence
082     * and the <code>equals</code> method is specialized.
083     *
084     * @param sequence1  first sequence to be compared
085     * @param sequence2  second sequence to be compared
086     */
087    public SequencesComparator(final List<T> sequence1, final List<T> sequence2) {
088        this(sequence1, sequence2, DefaultEquator.defaultEquator());
089    }
090
091    /**
092     * Simple constructor.
093     * <p>
094     * Creates a new instance of SequencesComparator with a custom {@link Equator}.
095     * <p>
096     * It is <em>guaranteed</em> that the comparisons will always be done as
097     * <code>Equator.equate(o1, o2)</code> where <code>o1</code> belongs to the first
098     * sequence and <code>o2</code> belongs to the second sequence.
099     *
100     * @param sequence1  first sequence to be compared
101     * @param sequence2  second sequence to be compared
102     * @param equator  the equator to use for testing object equality
103     */
104    public SequencesComparator(final List<T> sequence1, final List<T> sequence2, final Equator<? super T> equator) {
105        this.sequence1 = sequence1;
106        this.sequence2 = sequence2;
107        this.equator = equator;
108
109        final int size = sequence1.size() + sequence2.size() + 2;
110        vDown = new int[size];
111        vUp   = new int[size];
112    }
113
114    /**
115     * Get the {@link EditScript} object.
116     * <p>
117     * It is guaranteed that the objects embedded in the {@link InsertCommand
118     * insert commands} come from the second sequence and that the objects
119     * embedded in either the {@link DeleteCommand delete commands} or
120     * {@link KeepCommand keep commands} come from the first sequence. This can
121     * be important if subclassing is used for some elements in the first
122     * sequence and the <code>equals</code> method is specialized.
123     *
124     * @return the edit script resulting from the comparison of the two
125     *         sequences
126     */
127    public EditScript<T> getScript() {
128        final EditScript<T> script = new EditScript<>();
129        buildScript(0, sequence1.size(), 0, sequence2.size(), script);
130        return script;
131    }
132
133    /**
134     * Build a snake.
135     *
136     * @param start  the value of the start of the snake
137     * @param diag  the value of the diagonal of the snake
138     * @param end1  the value of the end of the first sequence to be compared
139     * @param end2  the value of the end of the second sequence to be compared
140     * @return the snake built
141     */
142    private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
143        int end = start;
144        while (end - diag < end2
145                && end < end1
146                && equator.equate(sequence1.get(end), sequence2.get(end - diag))) {
147            ++end;
148        }
149        return new Snake(start, end, diag);
150    }
151
152    /**
153     * Get the middle snake corresponding to two subsequences of the
154     * main sequences.
155     * <p>
156     * The snake is found using the MYERS Algorithm (this algorithms has
157     * also been implemented in the GNU diff program). This algorithm is
158     * explained in Eugene Myers article:
159     * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
160     * An O(ND) Difference Algorithm and Its Variations</a>.
161     *
162     * @param start1  the begin of the first sequence to be compared
163     * @param end1  the end of the first sequence to be compared
164     * @param start2  the begin of the second sequence to be compared
165     * @param end2  the end of the second sequence to be compared
166     * @return the middle snake
167     */
168    private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) {
169        // Myers Algorithm
170        // Initialisations
171        final int m = end1 - start1;
172        final int n = end2 - start2;
173        if (m == 0 || n == 0) {
174            return null;
175        }
176
177        final int delta  = m - n;
178        final int sum    = n + m;
179        final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
180        vDown[1+offset] = start1;
181        vUp[1+offset]   = end1 + 1;
182
183        for (int d = 0; d <= offset ; ++d) {
184            // Down
185            for (int k = -d; k <= d; k += 2) {
186                // First step
187
188                final int i = k + offset;
189                if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
190                    vDown[i] = vDown[i+1];
191                } else {
192                    vDown[i] = vDown[i-1] + 1;
193                }
194
195                int x = vDown[i];
196                int y = x - start1 + start2 - k;
197
198                while (x < end1 && y < end2 && equator.equate(sequence1.get(x), sequence2.get(y))) {
199                    vDown[i] = ++x;
200                    ++y;
201                }
202                // Second step
203                if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
204                    if (vUp[i-delta] <= vDown[i]) { // NOPMD
205                        return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
206                    }
207                }
208            }
209
210            // Up
211            for (int k = delta - d; k <= delta + d; k += 2) {
212                // First step
213                final int i = k + offset - delta;
214                if (k == delta - d
215                        || k != delta + d && vUp[i+1] <= vUp[i-1]) {
216                    vUp[i] = vUp[i+1] - 1;
217                } else {
218                    vUp[i] = vUp[i-1];
219                }
220
221                int x = vUp[i] - 1;
222                int y = x - start1 + start2 - k;
223                while (x >= start1 && y >= start2
224                        && equator.equate(sequence1.get(x), sequence2.get(y))) {
225                    vUp[i] = x--;
226                    y--;
227                }
228                // Second step
229                if (delta % 2 == 0 && -d <= k && k <= d ) {
230                    if (vUp[i] <= vDown[i + delta]) { // NOPMD
231                        return buildSnake(vUp[i], k + start1 - start2, end1, end2);
232                    }
233                }
234            }
235        }
236
237        // this should not happen
238        throw new RuntimeException("Internal Error");
239    }
240
241
242    /**
243     * Build an edit script.
244     *
245     * @param start1  the begin of the first sequence to be compared
246     * @param end1  the end of the first sequence to be compared
247     * @param start2  the begin of the second sequence to be compared
248     * @param end2  the end of the second sequence to be compared
249     * @param script the edited script
250     */
251    private void buildScript(final int start1, final int end1, final int start2, final int end2,
252                             final EditScript<T> script) {
253
254        final Snake middle = getMiddleSnake(start1, end1, start2, end2);
255
256        if (middle == null
257                || middle.getStart() == end1 && middle.getDiag() == end1 - end2
258                || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
259
260            int i = start1;
261            int j = start2;
262            while (i < end1 || j < end2) {
263                if (i < end1 && j < end2 && equator.equate(sequence1.get(i), sequence2.get(j))) {
264                    script.append(new KeepCommand<>(sequence1.get(i)));
265                    ++i;
266                    ++j;
267                } else {
268                    if (end1 - start1 > end2 - start2) {
269                        script.append(new DeleteCommand<>(sequence1.get(i)));
270                        ++i;
271                    } else {
272                        script.append(new InsertCommand<>(sequence2.get(j)));
273                        ++j;
274                    }
275                }
276            }
277
278        } else {
279
280            buildScript(start1, middle.getStart(),
281                        start2, middle.getStart() - middle.getDiag(),
282                        script);
283            for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
284                script.append(new KeepCommand<>(sequence1.get(i)));
285            }
286            buildScript(middle.getEnd(), end1,
287                        middle.getEnd() - middle.getDiag(), end2,
288                        script);
289        }
290    }
291
292    /**
293     * This class is a simple placeholder to hold the end part of a path
294     * under construction in a {@link SequencesComparator SequencesComparator}.
295     */
296    private static class Snake {
297
298        /** Start index. */
299        private final int start;
300
301        /** End index. */
302        private final int end;
303
304        /** Diagonal number. */
305        private final int diag;
306
307        /**
308         * Simple constructor. Creates a new instance of Snake with specified indices.
309         *
310         * @param start  start index of the snake
311         * @param end  end index of the snake
312         * @param diag  diagonal number
313         */
314        public Snake(final int start, final int end, final int diag) {
315            this.start = start;
316            this.end   = end;
317            this.diag  = diag;
318        }
319
320        /**
321         * Get the start index of the snake.
322         *
323         * @return start index of the snake
324         */
325        public int getStart() {
326            return start;
327        }
328
329        /**
330         * Get the end index of the snake.
331         *
332         * @return end index of the snake
333         */
334        public int getEnd() {
335            return end;
336        }
337
338        /**
339         * Get the diagonal number of the snake.
340         *
341         * @return diagonal number of the snake
342         */
343        public int getDiag() {
344            return diag;
345        }
346    }
347}