001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.sequence; 018 019import java.util.List; 020 021import org.apache.commons.collections4.Equator; 022import org.apache.commons.collections4.functors.DefaultEquator; 023 024/** 025 * This class allows to compare two objects sequences. 026 * <p> 027 * The two sequences can hold any object type, as only the <code>equals</code> 028 * method is used to compare the elements of the sequences. It is guaranteed 029 * that the comparisons will always be done as <code>o1.equals(o2)</code> where 030 * <code>o1</code> belongs to the first sequence and <code>o2</code> belongs to 031 * the second sequence. This can be important if subclassing is used for some 032 * elements in the first sequence and the <code>equals</code> method is 033 * specialized. 034 * <p> 035 * Comparison can be seen from two points of view: either as giving the smallest 036 * modification allowing to transform the first sequence into the second one, or 037 * as giving the longest sequence which is a subsequence of both initial 038 * sequences. The <code>equals</code> method is used to compare objects, so any 039 * object can be put into sequences. Modifications include deleting, inserting 040 * or keeping one object, starting from the beginning of the first sequence. 041 * <p> 042 * This class implements the comparison algorithm, which is the very efficient 043 * algorithm from Eugene W. Myers 044 * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps"> 045 * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces 046 * the shortest possible 047 * {@link EditScript edit script} 048 * containing all the 049 * {@link EditCommand commands} 050 * needed to transform the first sequence into the second one. 051 * 052 * @see EditScript 053 * @see EditCommand 054 * @see CommandVisitor 055 * 056 * @since 4.0 057 * @version $Id: SequencesComparator.html 972421 2015-11-14 20:00:04Z tn $ 058 */ 059public class SequencesComparator<T> { 060 061 /** First sequence. */ 062 private final List<T> sequence1; 063 064 /** Second sequence. */ 065 private final List<T> sequence2; 066 067 /** The equator used for testing object equality. */ 068 private final Equator<? super T> equator; 069 070 /** Temporary variables. */ 071 private final int[] vDown; 072 private final int[] vUp; 073 074 /** 075 * Simple constructor. 076 * <p> 077 * Creates a new instance of SequencesComparator using a {@link DefaultEquator}. 078 * <p> 079 * It is <em>guaranteed</em> that the comparisons will always be done as 080 * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first 081 * sequence and <code>o2</code> belongs to the second sequence. This can be 082 * important if subclassing is used for some elements in the first sequence 083 * and the <code>equals</code> method is specialized. 084 * 085 * @param sequence1 first sequence to be compared 086 * @param sequence2 second sequence to be compared 087 */ 088 public SequencesComparator(final List<T> sequence1, final List<T> sequence2) { 089 this(sequence1, sequence2, DefaultEquator.defaultEquator()); 090 } 091 092 /** 093 * Simple constructor. 094 * <p> 095 * Creates a new instance of SequencesComparator with a custom {@link Equator}. 096 * <p> 097 * It is <em>guaranteed</em> that the comparisons will always be done as 098 * <code>Equator.equate(o1, o2)</code> where <code>o1</code> belongs to the first 099 * sequence and <code>o2</code> belongs to the second sequence. 100 * 101 * @param sequence1 first sequence to be compared 102 * @param sequence2 second sequence to be compared 103 * @param equator the equator to use for testing object equality 104 */ 105 public SequencesComparator(final List<T> sequence1, final List<T> sequence2, final Equator<? super T> equator) { 106 this.sequence1 = sequence1; 107 this.sequence2 = sequence2; 108 this.equator = equator; 109 110 final int size = sequence1.size() + sequence2.size() + 2; 111 vDown = new int[size]; 112 vUp = new int[size]; 113 } 114 115 /** 116 * Get the {@link EditScript} object. 117 * <p> 118 * It is guaranteed that the objects embedded in the {@link InsertCommand 119 * insert commands} come from the second sequence and that the objects 120 * embedded in either the {@link DeleteCommand delete commands} or 121 * {@link KeepCommand keep commands} come from the first sequence. This can 122 * be important if subclassing is used for some elements in the first 123 * sequence and the <code>equals</code> method is specialized. 124 * 125 * @return the edit script resulting from the comparison of the two 126 * sequences 127 */ 128 public EditScript<T> getScript() { 129 final EditScript<T> script = new EditScript<T>(); 130 buildScript(0, sequence1.size(), 0, sequence2.size(), script); 131 return script; 132 } 133 134 /** 135 * Build a snake. 136 * 137 * @param start the value of the start of the snake 138 * @param diag the value of the diagonal of the snake 139 * @param end1 the value of the end of the first sequence to be compared 140 * @param end2 the value of the end of the second sequence to be compared 141 * @return the snake built 142 */ 143 private Snake buildSnake(final int start, final int diag, final int end1, final int end2) { 144 int end = start; 145 while (end - diag < end2 146 && end < end1 147 && equator.equate(sequence1.get(end), sequence2.get(end - diag))) { 148 ++end; 149 } 150 return new Snake(start, end, diag); 151 } 152 153 /** 154 * Get the middle snake corresponding to two subsequences of the 155 * main sequences. 156 * <p> 157 * The snake is found using the MYERS Algorithm (this algorithms has 158 * also been implemented in the GNU diff program). This algorithm is 159 * explained in Eugene Myers article: 160 * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps"> 161 * An O(ND) Difference Algorithm and Its Variations</a>. 162 * 163 * @param start1 the begin of the first sequence to be compared 164 * @param end1 the end of the first sequence to be compared 165 * @param start2 the begin of the second sequence to be compared 166 * @param end2 the end of the second sequence to be compared 167 * @return the middle snake 168 */ 169 private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) { 170 // Myers Algorithm 171 // Initialisations 172 final int m = end1 - start1; 173 final int n = end2 - start2; 174 if (m == 0 || n == 0) { 175 return null; 176 } 177 178 final int delta = m - n; 179 final int sum = n + m; 180 final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2; 181 vDown[1+offset] = start1; 182 vUp[1+offset] = end1 + 1; 183 184 for (int d = 0; d <= offset ; ++d) { 185 // Down 186 for (int k = -d; k <= d; k += 2) { 187 // First step 188 189 final int i = k + offset; 190 if (k == -d || k != d && vDown[i-1] < vDown[i+1]) { 191 vDown[i] = vDown[i+1]; 192 } else { 193 vDown[i] = vDown[i-1] + 1; 194 } 195 196 int x = vDown[i]; 197 int y = x - start1 + start2 - k; 198 199 while (x < end1 && y < end2 && equator.equate(sequence1.get(x), sequence2.get(y))) { 200 vDown[i] = ++x; 201 ++y; 202 } 203 // Second step 204 if (delta % 2 != 0 && delta - d <= k && k <= delta + d) { 205 if (vUp[i-delta] <= vDown[i]) { 206 return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2); 207 } 208 } 209 } 210 211 // Up 212 for (int k = delta - d; k <= delta + d; k += 2) { 213 // First step 214 final int i = k + offset - delta; 215 if (k == delta - d 216 || k != delta + d && vUp[i+1] <= vUp[i-1]) { 217 vUp[i] = vUp[i+1] - 1; 218 } else { 219 vUp[i] = vUp[i-1]; 220 } 221 222 int x = vUp[i] - 1; 223 int y = x - start1 + start2 - k; 224 while (x >= start1 && y >= start2 225 && equator.equate(sequence1.get(x), sequence2.get(y))) { 226 vUp[i] = x--; 227 y--; 228 } 229 // Second step 230 if (delta % 2 == 0 && -d <= k && k <= d ) { 231 if (vUp[i] <= vDown[i + delta]) { 232 return buildSnake(vUp[i], k + start1 - start2, end1, end2); 233 } 234 } 235 } 236 } 237 238 // this should not happen 239 throw new RuntimeException("Internal Error"); 240 } 241 242 243 /** 244 * Build an edit script. 245 * 246 * @param start1 the begin of the first sequence to be compared 247 * @param end1 the end of the first sequence to be compared 248 * @param start2 the begin of the second sequence to be compared 249 * @param end2 the end of the second sequence to be compared 250 * @param script the edited script 251 */ 252 private void buildScript(final int start1, final int end1, final int start2, final int end2, 253 final EditScript<T> script) { 254 255 final Snake middle = getMiddleSnake(start1, end1, start2, end2); 256 257 if (middle == null 258 || middle.getStart() == end1 && middle.getDiag() == end1 - end2 259 || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) { 260 261 int i = start1; 262 int j = start2; 263 while (i < end1 || j < end2) { 264 if (i < end1 && j < end2 && equator.equate(sequence1.get(i), sequence2.get(j))) { 265 script.append(new KeepCommand<T>(sequence1.get(i))); 266 ++i; 267 ++j; 268 } else { 269 if (end1 - start1 > end2 - start2) { 270 script.append(new DeleteCommand<T>(sequence1.get(i))); 271 ++i; 272 } else { 273 script.append(new InsertCommand<T>(sequence2.get(j))); 274 ++j; 275 } 276 } 277 } 278 279 } else { 280 281 buildScript(start1, middle.getStart(), 282 start2, middle.getStart() - middle.getDiag(), 283 script); 284 for (int i = middle.getStart(); i < middle.getEnd(); ++i) { 285 script.append(new KeepCommand<T>(sequence1.get(i))); 286 } 287 buildScript(middle.getEnd(), end1, 288 middle.getEnd() - middle.getDiag(), end2, 289 script); 290 } 291 } 292 293 /** 294 * This class is a simple placeholder to hold the end part of a path 295 * under construction in a {@link SequencesComparator SequencesComparator}. 296 */ 297 private static class Snake { 298 299 /** Start index. */ 300 private final int start; 301 302 /** End index. */ 303 private final int end; 304 305 /** Diagonal number. */ 306 private final int diag; 307 308 /** 309 * Simple constructor. Creates a new instance of Snake with specified indices. 310 * 311 * @param start start index of the snake 312 * @param end end index of the snake 313 * @param diag diagonal number 314 */ 315 public Snake(final int start, final int end, final int diag) { 316 this.start = start; 317 this.end = end; 318 this.diag = diag; 319 } 320 321 /** 322 * Get the start index of the snake. 323 * 324 * @return start index of the snake 325 */ 326 public int getStart() { 327 return start; 328 } 329 330 /** 331 * Get the end index of the snake. 332 * 333 * @return end index of the snake 334 */ 335 public int getEnd() { 336 return end; 337 } 338 339 /** 340 * Get the diagonal number of the snake. 341 * 342 * @return diagonal number of the snake 343 */ 344 public int getDiag() { 345 return diag; 346 } 347 } 348}