1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.sequence;
18
19 import java.util.List;
20
21 import org.apache.commons.collections4.Equator;
22 import org.apache.commons.collections4.functors.DefaultEquator;
23
24 /**
25 * This class allows to compare two objects sequences.
26 * <p>
27 * The two sequences can hold any object type, as only the <code>equals</code>
28 * method is used to compare the elements of the sequences. It is guaranteed
29 * that the comparisons will always be done as <code>o1.equals(o2)</code> where
30 * <code>o1</code> belongs to the first sequence and <code>o2</code> belongs to
31 * the second sequence. This can be important if subclassing is used for some
32 * elements in the first sequence and the <code>equals</code> method is
33 * specialized.
34 * </p>
35 * <p>
36 * Comparison can be seen from two points of view: either as giving the smallest
37 * modification allowing to transform the first sequence into the second one, or
38 * as giving the longest sequence which is a subsequence of both initial
39 * sequences. The <code>equals</code> method is used to compare objects, so any
40 * object can be put into sequences. Modifications include deleting, inserting
41 * or keeping one object, starting from the beginning of the first sequence.
42 * </p>
43 * <p>
44 * This class implements the comparison algorithm, which is the very efficient
45 * algorithm from Eugene W. Myers
46 * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
47 * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
48 * the shortest possible
49 * {@link EditScript edit script}
50 * containing all the
51 * {@link EditCommand commands}
52 * needed to transform the first sequence into the second one.
53 * </p>
54 *
55 * @see EditScript
56 * @see EditCommand
57 * @see CommandVisitor
58 *
59 * @since 4.0
60 */
61 public class SequencesComparator<T> {
62
63 /** First sequence. */
64 private final List<T> sequence1;
65
66 /** Second sequence. */
67 private final List<T> sequence2;
68
69 /** The equator used for testing object equality. */
70 private final Equator<? super T> equator;
71
72 /** Temporary variables. */
73 private final int[] vDown;
74 private final int[] vUp;
75
76 /**
77 * Simple constructor.
78 * <p>
79 * Creates a new instance of SequencesComparator using a {@link DefaultEquator}.
80 * <p>
81 * It is <em>guaranteed</em> that the comparisons will always be done as
82 * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
83 * sequence and <code>o2</code> belongs to the second sequence. This can be
84 * important if subclassing is used for some elements in the first sequence
85 * and the <code>equals</code> method is specialized.
86 *
87 * @param sequence1 first sequence to be compared
88 * @param sequence2 second sequence to be compared
89 */
90 public SequencesComparator(final List<T> sequence1, final List<T> sequence2) {
91 this(sequence1, sequence2, DefaultEquator.defaultEquator());
92 }
93
94 /**
95 * Simple constructor.
96 * <p>
97 * Creates a new instance of SequencesComparator with a custom {@link Equator}.
98 * <p>
99 * It is <em>guaranteed</em> that the comparisons will always be done as
100 * <code>Equator.equate(o1, o2)</code> where <code>o1</code> belongs to the first
101 * sequence and <code>o2</code> belongs to the second sequence.
102 *
103 * @param sequence1 first sequence to be compared
104 * @param sequence2 second sequence to be compared
105 * @param equator the equator to use for testing object equality
106 */
107 public SequencesComparator(final List<T> sequence1, final List<T> sequence2, final Equator<? super T> equator) {
108 this.sequence1 = sequence1;
109 this.sequence2 = sequence2;
110 this.equator = equator;
111
112 final int size = sequence1.size() + sequence2.size() + 2;
113 vDown = new int[size];
114 vUp = new int[size];
115 }
116
117 /**
118 * Get the {@link EditScript} object.
119 * <p>
120 * It is guaranteed that the objects embedded in the {@link InsertCommand
121 * insert commands} come from the second sequence and that the objects
122 * embedded in either the {@link DeleteCommand delete commands} or
123 * {@link KeepCommand keep commands} come from the first sequence. This can
124 * be important if subclassing is used for some elements in the first
125 * sequence and the <code>equals</code> method is specialized.
126 *
127 * @return the edit script resulting from the comparison of the two
128 * sequences
129 */
130 public EditScript<T> getScript() {
131 final EditScript<T> script = new EditScript<>();
132 buildScript(0, sequence1.size(), 0, sequence2.size(), script);
133 return script;
134 }
135
136 /**
137 * Build a snake.
138 *
139 * @param start the value of the start of the snake
140 * @param diag the value of the diagonal of the snake
141 * @param end1 the value of the end of the first sequence to be compared
142 * @param end2 the value of the end of the second sequence to be compared
143 * @return the snake built
144 */
145 private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
146 int end = start;
147 while (end - diag < end2
148 && end < end1
149 && equator.equate(sequence1.get(end), sequence2.get(end - diag))) {
150 ++end;
151 }
152 return new Snake(start, end, diag);
153 }
154
155 /**
156 * Get the middle snake corresponding to two subsequences of the
157 * main sequences.
158 * <p>
159 * The snake is found using the MYERS Algorithm (this algorithms has
160 * also been implemented in the GNU diff program). This algorithm is
161 * explained in Eugene Myers article:
162 * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
163 * An O(ND) Difference Algorithm and Its Variations</a>.
164 *
165 * @param start1 the begin of the first sequence to be compared
166 * @param end1 the end of the first sequence to be compared
167 * @param start2 the begin of the second sequence to be compared
168 * @param end2 the end of the second sequence to be compared
169 * @return the middle snake
170 */
171 private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) {
172 // Myers Algorithm
173 // Initialisations
174 final int m = end1 - start1;
175 final int n = end2 - start2;
176 if (m == 0 || n == 0) {
177 return null;
178 }
179
180 final int delta = m - n;
181 final int sum = n + m;
182 final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
183 vDown[1+offset] = start1;
184 vUp[1+offset] = end1 + 1;
185
186 for (int d = 0; d <= offset ; ++d) {
187 // Down
188 for (int k = -d; k <= d; k += 2) {
189 // First step
190
191 final int i = k + offset;
192 if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
193 vDown[i] = vDown[i+1];
194 } else {
195 vDown[i] = vDown[i-1] + 1;
196 }
197
198 int x = vDown[i];
199 int y = x - start1 + start2 - k;
200
201 while (x < end1 && y < end2 && equator.equate(sequence1.get(x), sequence2.get(y))) {
202 vDown[i] = ++x;
203 ++y;
204 }
205 // Second step
206 if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
207 if (vUp[i-delta] <= vDown[i]) { // NOPMD
208 return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
209 }
210 }
211 }
212
213 // Up
214 for (int k = delta - d; k <= delta + d; k += 2) {
215 // First step
216 final int i = k + offset - delta;
217 if (k == delta - d
218 || k != delta + d && vUp[i+1] <= vUp[i-1]) {
219 vUp[i] = vUp[i+1] - 1;
220 } else {
221 vUp[i] = vUp[i-1];
222 }
223
224 int x = vUp[i] - 1;
225 int y = x - start1 + start2 - k;
226 while (x >= start1 && y >= start2
227 && equator.equate(sequence1.get(x), sequence2.get(y))) {
228 vUp[i] = x--;
229 y--;
230 }
231 // Second step
232 if (delta % 2 == 0 && -d <= k && k <= d ) {
233 if (vUp[i] <= vDown[i + delta]) { // NOPMD
234 return buildSnake(vUp[i], k + start1 - start2, end1, end2);
235 }
236 }
237 }
238 }
239
240 // this should not happen
241 throw new RuntimeException("Internal Error");
242 }
243
244
245 /**
246 * Build an edit script.
247 *
248 * @param start1 the begin of the first sequence to be compared
249 * @param end1 the end of the first sequence to be compared
250 * @param start2 the begin of the second sequence to be compared
251 * @param end2 the end of the second sequence to be compared
252 * @param script the edited script
253 */
254 private void buildScript(final int start1, final int end1, final int start2, final int end2,
255 final EditScript<T> script) {
256
257 final Snake middle = getMiddleSnake(start1, end1, start2, end2);
258
259 if (middle == null
260 || middle.getStart() == end1 && middle.getDiag() == end1 - end2
261 || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
262
263 int i = start1;
264 int j = start2;
265 while (i < end1 || j < end2) {
266 if (i < end1 && j < end2 && equator.equate(sequence1.get(i), sequence2.get(j))) {
267 script.append(new KeepCommand<>(sequence1.get(i)));
268 ++i;
269 ++j;
270 } else {
271 if (end1 - start1 > end2 - start2) {
272 script.append(new DeleteCommand<>(sequence1.get(i)));
273 ++i;
274 } else {
275 script.append(new InsertCommand<>(sequence2.get(j)));
276 ++j;
277 }
278 }
279 }
280
281 } else {
282
283 buildScript(start1, middle.getStart(),
284 start2, middle.getStart() - middle.getDiag(),
285 script);
286 for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
287 script.append(new KeepCommand<>(sequence1.get(i)));
288 }
289 buildScript(middle.getEnd(), end1,
290 middle.getEnd() - middle.getDiag(), end2,
291 script);
292 }
293 }
294
295 /**
296 * This class is a simple placeholder to hold the end part of a path
297 * under construction in a {@link SequencesComparator SequencesComparator}.
298 */
299 private static class Snake {
300
301 /** Start index. */
302 private final int start;
303
304 /** End index. */
305 private final int end;
306
307 /** Diagonal number. */
308 private final int diag;
309
310 /**
311 * Simple constructor. Creates a new instance of Snake with specified indices.
312 *
313 * @param start start index of the snake
314 * @param end end index of the snake
315 * @param diag diagonal number
316 */
317 public Snake(final int start, final int end, final int diag) {
318 this.start = start;
319 this.end = end;
320 this.diag = diag;
321 }
322
323 /**
324 * Get the start index of the snake.
325 *
326 * @return start index of the snake
327 */
328 public int getStart() {
329 return start;
330 }
331
332 /**
333 * Get the end index of the snake.
334 *
335 * @return end index of the snake
336 */
337 public int getEnd() {
338 return end;
339 }
340
341 /**
342 * Get the diagonal number of the snake.
343 *
344 * @return diagonal number of the snake
345 */
346 public int getDiag() {
347 return diag;
348 }
349 }
350 }