001 /* $Id: ExtendedBaseRules.java 992060 2010-09-02 19:09:47Z simonetripodi $ 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one or more 004 * contributor license agreements. See the NOTICE file distributed with 005 * this work for additional information regarding copyright ownership. 006 * The ASF licenses this file to You under the Apache License, Version 2.0 007 * (the "License"); you may not use this file except in compliance with 008 * the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 020 package org.apache.commons.digester; 021 022 023 import java.util.ArrayList; 024 import java.util.Collections; 025 import java.util.Comparator; 026 import java.util.HashMap; 027 import java.util.Iterator; 028 import java.util.List; 029 import java.util.Map; 030 031 032 /** 033 * <p>Extension of {@link RulesBase} for complex schema.</p> 034 * 035 * <p>This is an extension of the basic pattern matching scheme 036 * intended to improve support for mapping complex xml-schema. 037 * It is intended to be a minimal extension of the standard rules 038 * big enough to support complex schema but without the full generality 039 * offered by more exotic matching pattern rules.</p> 040 * 041 * <h4>When should you use this rather than the original?</h4> 042 * 043 * <p> 044 * This pattern-matching engine is complex and slower than the basic 045 * default RulesBase class, but offers more functionality: 046 * <ul> 047 * <li>Universal patterns allow patterns to be specified which will match 048 * regardless of whether there are "better matching" patterns available.</li> 049 * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct 050 * children of a specified element.</li> 051 * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements 052 * nested within a specified element to any nesting depth.</li> 053 * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li> 054 * </ul> 055 * </p> 056 * 057 * <h4>Universal Match Patterns</h4> 058 * 059 * <p>The default RulesBase pattern-matching engine always attempts to find 060 * the "best matching pattern", and will ignore rules associated with other 061 * patterns that match but are not "as good". As an example, if the pattern 062 * "a/b/c" is associated with rules 1 and 2, and "*/c" is associated with 063 * rules 3 and 4 then element "a/b/c" will cause only rules 1 and 2 to execute. 064 * Rules 3 and 4 do have matching patterns, but because the patterns are shorter 065 * and include wildcard characters they are regarded as being "not as good" as 066 * a direct match. In general, exact patterns are better than wildcard patterns, 067 * and among multiple patterns with wildcards, the longest is preferred. 068 * See the RulesBase class for more information.</p> 069 * 070 * <p>This feature of preferring "better" patterns can be a powerful tool. 071 * However it also means that patterns can interact in unexpected ways.</p> 072 * 073 * <p>When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses 074 * the "best match" feature. Even if there is an exact match or a longer 075 * wildcard match, patterns prefixed by '!' will still be tested to see if 076 * they match, and if so their associated Rule objects will be included in 077 * the set of rules to be executed in the normal manner.</p> 078 * 079 * <ul> 080 * <li>Pattern <code>"!*/a/b"</code> matches whenever an 'b' element 081 * is inside an 'a'.</li> 082 * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent 083 * matching <code>"a/b"</code> (see "Parent Match Patterns").</li> 084 * <li>Pattern <code>"!*/a/b/?"</code> matches any child of a parent 085 * matching <code>"!*/a/b"</code> (see "Parent Match Patterns").</li> 086 * <li>Pattern <code>"!a/b/*"</code> matches any element whose path 087 * starts with "a" then "b" (see "Ancestor Match Patterns").</li> 088 * <li>Pattern <code>"!*/a/b/*"</code> matches any elements whose path 089 * contains 'a/b' (see "Ancestor Match Patterns").</li> 090 * </ul> 091 * 092 * <h4>Parent Match Patterns</h4> 093 * 094 * <p> 095 * These will match direct child elements of a particular parent element. 096 * <ul> 097 * <li> 098 * <code>"a/b/c/?"</code> matches any child whose parent matches 099 * <code>"a/b/c"</code>. Exact parent rules take precedence over Ancestor 100 * Match patterns. 101 * </li> 102 * <li> 103 * <code>"*/a/b/c/?"</code> matches any child whose parent matches 104 * <code>"*/a/b/c"</code>. The longest matching still applies to parent 105 * matches but the length excludes the '?', which effectively means 106 * that standard wildcard matches with the same level of depth are 107 * chosen in preference. 108 * </li> 109 * </ul> 110 * </p> 111 * 112 * <h4>Ancestor Match Patterns</h4> 113 * 114 * <p> 115 * These will match elements whose parentage includes a particular sequence 116 * of elements. 117 * <ul> 118 * <li> 119 * <code>"a/b/*"</code> matches any element whose path starts with 120 * 'a' then 'b'. Exact parent and parent match rules take precedence. 121 * The longest ancestor match will take precedence. 122 * </li> 123 * <li> 124 * <code>"*/a/b/*"</code> matches any elements whose path contains 125 * an element 'a' followed by an element 'b'. The longest matching still 126 * applies but the length excludes the '*' at the end. 127 * </li> 128 * </ul> 129 * </p> 130 * 131 * <h4>Completely Wild Patterns</h4> 132 * 133 * <p>Pattern <code>"*"</code> matches every pattern that isn't matched by 134 * any other basic rule.</p> 135 * 136 * <p>Pattern <code>"!*"</code> matches every pattern.</p> 137 * 138 * <h4>Using The Extended Rules</h4> 139 * 140 * <p>By default, a Digester instance uses a {@link RulesBase} instance as 141 * its pattern matching engine. To use an ExtendedBaseRules instance, call 142 * the Digester.setRules method before adding any Rule objects to the digester 143 * instance: 144 * <pre> 145 * Digester digester = new Digester(); 146 * digester.setRules( new ExtendedBaseRules() ); 147 * </pre></p> 148 * 149 * <p>The most important thing to remember when using the extended rules is 150 * that universal and non-universal patterns are completely independent. 151 * Universal patterns are never affected by the addition of new patterns 152 * or the removal of existing ones. Non-universal patterns are never affected 153 * by the addition of new <em>universal</em> patterns or the removal of 154 * existing <em>universal</em> patterns. As in the basic matching rules, 155 * non-universal (basic) patterns <strong>can</strong> be affected by the 156 * addition of new <em>non-universal</em> patterns or the removal of existing 157 * <em>non-universal</em> patterns, because only rules associated with the 158 * "best matching" pattern for each xml element are executed. 159 * 160 * <p> This means that you can use universal patterns to build up the simple 161 * parts of your structure - for example defining universal creation and 162 * property setting rules. More sophisticated and complex mapping will require 163 * non-universal patterns and this might mean that some of the universal rules 164 * will need to be replaced by a series of special cases using non-universal 165 * rules. But by using universal rules as your backbone, these additions 166 * should not break your existing rules.</p> 167 */ 168 169 170 public class ExtendedBaseRules extends RulesBase { 171 172 173 // ----------------------------------------------------- Instance Variables 174 175 /** 176 * Counts the entry number for the rules. 177 */ 178 private int counter = 0; 179 180 181 /** 182 * The decision algorithm used (unfortunately) doesn't preserve the entry 183 * order. 184 * This map is used by a comparator which orders the list of matches 185 * before it's returned. 186 * This map stores the entry number keyed by the rule. 187 */ 188 private Map<Rule, Integer> order = new HashMap<Rule, Integer>(); 189 190 191 // --------------------------------------------------------- Public Methods 192 193 194 /** 195 * Register a new Rule instance matching the specified pattern. 196 * 197 * @param pattern Nesting pattern to be matched for this Rule 198 * @param rule Rule instance to be registered 199 */ 200 @Override 201 public void add(String pattern, Rule rule) { 202 super.add(pattern, rule); 203 counter++; 204 order.put(rule, counter); 205 } 206 207 208 /** 209 * Return a List of all registered Rule instances that match the specified 210 * nesting pattern, or a zero-length List if there are no matches. If more 211 * than one Rule instance matches, they <strong>must</strong> be returned 212 * in the order originally registered through the <code>add()</code> 213 * method. 214 * 215 * @param pattern Nesting pattern to be matched 216 */ 217 @Override 218 public List<Rule> match(String namespace, String pattern) { 219 // calculate the pattern of the parent 220 // (if the element has one) 221 String parentPattern = ""; 222 int lastIndex = pattern.lastIndexOf('/'); 223 224 boolean hasParent = true; 225 if (lastIndex == -1) { 226 // element has no parent 227 hasParent = false; 228 229 } else { 230 // calculate the pattern of the parent 231 parentPattern = pattern.substring(0, lastIndex); 232 233 } 234 235 236 // we keep the list of universal matches separate 237 List<Rule> universalList = new ArrayList<Rule>(counter); 238 239 // Universal all wildards ('!*') 240 // These are always matched so always add them 241 List<Rule> tempList = this.cache.get("!*"); 242 if (tempList != null) { 243 universalList.addAll(tempList); 244 } 245 246 // Universal exact parent match 247 // need to get this now since only wildcards are considered later 248 tempList = this.cache.get("!" + parentPattern + "/?"); 249 if (tempList != null) { 250 universalList.addAll(tempList); 251 } 252 253 254 // base behaviour means that if we certain matches, we don't continue 255 // but we just have a single combined loop and so we have to set 256 // a variable 257 boolean ignoreBasicMatches = false; 258 259 260 // see if we have an exact basic pattern match 261 List<Rule> rulesList = this.cache.get(pattern); 262 if (rulesList != null) { 263 // we have a match! 264 // so ignore all basic matches from now on 265 ignoreBasicMatches = true; 266 267 } else { 268 269 // see if we have an exact child match 270 if (hasParent) { 271 // matching children takes preference 272 rulesList = this.cache.get(parentPattern + "/?"); 273 if (rulesList != null) { 274 // we have a match! 275 // so ignore all basic matches from now on 276 ignoreBasicMatches = true; 277 278 } else { 279 // we don't have a match yet - so try exact ancester 280 // 281 rulesList = findExactAncesterMatch(pattern); 282 if (rulesList != null) { 283 // we have a match! 284 // so ignore all basic matches from now on 285 ignoreBasicMatches = true; 286 } 287 } 288 } 289 } 290 291 292 // OK - we're ready for the big loop! 293 // Unlike the basic rules case, 294 // we have to go through for all those universal rules in all cases. 295 296 // Find the longest key, ie more discriminant 297 String longKey = ""; 298 int longKeyLength = 0; 299 300 for (String key : this.cache.keySet()) { 301 302 // find out if it's a univeral pattern 303 // set a flag 304 boolean isUniversal = key.startsWith("!"); 305 if (isUniversal) { 306 // and find the underlying key 307 key = key.substring(1, key.length()); 308 } 309 310 311 // don't need to check exact matches 312 boolean wildcardMatchStart = key.startsWith("*/"); 313 boolean wildcardMatchEnd = key.endsWith("/*"); 314 if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) { 315 316 boolean parentMatched = false; 317 boolean basicMatched = false; 318 boolean ancesterMatched = false; 319 320 boolean parentMatchEnd = key.endsWith("/?"); 321 if (parentMatchEnd) { 322 // try for a parent match 323 parentMatched = parentMatch(key, pattern, parentPattern); 324 325 } else if (wildcardMatchEnd) { 326 // check for ancester match 327 if (wildcardMatchStart) { 328 String patternBody = key.substring(2, key.length() - 2); 329 if (pattern.endsWith(patternBody)) { 330 ancesterMatched = true; 331 } else { 332 ancesterMatched = (pattern.indexOf(patternBody + "/") > -1); 333 } 334 } else { 335 String bodyPattern = key.substring(0, key.length() - 2); 336 if (pattern.startsWith(bodyPattern)) 337 { 338 if (pattern.length() == bodyPattern.length()) { 339 // exact match 340 ancesterMatched = true; 341 } else { 342 ancesterMatched = ( pattern.charAt(bodyPattern.length()) == '/' ); 343 } 344 } else { 345 ancesterMatched = false; 346 } 347 } 348 } else { 349 // try for a base match 350 basicMatched = basicMatch(key, pattern); 351 } 352 353 if (parentMatched || basicMatched || ancesterMatched) { 354 if (isUniversal) { 355 // universal rules go straight in 356 // (no longest matching rule) 357 tempList = this.cache.get("!" + key); 358 if (tempList != null) { 359 universalList.addAll(tempList); 360 } 361 362 } else { 363 if (!ignoreBasicMatches) { 364 // ensure that all parent matches are SHORTER 365 // than rules with same level of matching. 366 // 367 // the calculations below don't work for universal 368 // matching, but we don't care because in that case 369 // this if-stmt is not entered. 370 int keyLength = key.length(); 371 if (wildcardMatchStart) { 372 --keyLength; 373 } 374 if (wildcardMatchEnd) { 375 --keyLength; 376 } else if (parentMatchEnd) { 377 --keyLength; 378 } 379 380 if (keyLength > longKeyLength) { 381 rulesList = this.cache.get(key); 382 longKey = key; 383 longKeyLength = keyLength; 384 } 385 } 386 } 387 } 388 } 389 } 390 391 392 // '*' works in practice as a default matching 393 // (this is because anything is a deeper match!) 394 if (rulesList == null) { 395 rulesList = this.cache.get("*"); 396 } 397 398 // if we've matched a basic pattern, then add to the universal list 399 if (rulesList != null) { 400 universalList.addAll(rulesList); 401 } 402 403 404 // don't filter if namespace is null 405 if (namespace != null) { 406 // remove invalid namespaces 407 Iterator<Rule> it = universalList.iterator(); 408 while (it.hasNext()) { 409 Rule rule = it.next(); 410 String ns_uri = rule.getNamespaceURI(); 411 if (ns_uri != null && !ns_uri.equals(namespace)) { 412 it.remove(); 413 } 414 } 415 } 416 417 418 // need to make sure that the collection is sort in the order 419 // of addition. We use a custom comparator for this 420 Collections.sort( 421 universalList, 422 new Comparator<Rule>() { 423 424 public int compare(Rule r1, Rule r2) throws ClassCastException { 425 // Get the entry order from the map 426 Integer i1 = order.get(r1); 427 Integer i2 = order.get(r2); 428 429 // and use that to perform the comparison 430 if (i1 == null) { 431 if (i2 == null) { 432 433 return 0; 434 435 } else { 436 437 return -1; 438 439 } 440 } else if (i2 == null) { 441 return 1; 442 } 443 444 return (i1.intValue() - i2.intValue()); 445 } 446 }); 447 448 return universalList; 449 } 450 451 /** 452 * Matching parent. 453 */ 454 private boolean parentMatch(String key, String pattern, String parentPattern) { 455 return parentPattern.endsWith(key.substring(1, key.length() - 2)); 456 } 457 458 /** 459 * Standard match. 460 * Matches the end of the pattern to the key. 461 */ 462 private boolean basicMatch(String key, String pattern) { 463 return (pattern.equals(key.substring(2)) || 464 pattern.endsWith(key.substring(1))); 465 } 466 467 /** 468 * Finds an exact ancester match for given pattern 469 */ 470 private List<Rule> findExactAncesterMatch(String parentPattern) { 471 List<Rule> matchingRules = null; 472 int lastIndex = parentPattern.length(); 473 while (lastIndex-- > 0) { 474 lastIndex = parentPattern.lastIndexOf('/', lastIndex); 475 if (lastIndex > 0) { 476 matchingRules = this.cache.get(parentPattern.substring(0, lastIndex) + "/*"); 477 if (matchingRules != null) { 478 return matchingRules; 479 } 480 } 481 } 482 return null; 483 } 484 }