001    /* $Id: ExtendedBaseRules.java 471661 2006-11-06 08:09:25Z skitching $
002     *
003     * Licensed to the Apache Software Foundation (ASF) under one or more
004     * contributor license agreements.  See the NOTICE file distributed with
005     * this work for additional information regarding copyright ownership.
006     * The ASF licenses this file to You under the Apache License, Version 2.0
007     * (the "License"); you may not use this file except in compliance with
008     * the License.  You may obtain a copy of the License at
009     * 
010     *      http://www.apache.org/licenses/LICENSE-2.0
011     * 
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */ 
018    
019    
020    package org.apache.commons.digester;
021    
022    
023    import java.util.ArrayList;
024    import java.util.Collections;
025    import java.util.Comparator;
026    import java.util.HashMap;
027    import java.util.Iterator;
028    import java.util.List;
029    import java.util.Map;
030    
031    
032    /**
033     * <p>Extension of {@link RulesBase} for complex schema.</p>
034     *
035     * <p>This is an extension of the basic pattern matching scheme
036     * intended to improve support for mapping complex xml-schema.
037     * It is intended to be a minimal extension of the standard rules
038     * big enough to support complex schema but without the full generality
039     * offered by more exotic matching pattern rules.</p>
040     *
041     * <h4>When should you use this rather than the original?</h4>
042     *
043     * <p>
044     * This pattern-matching engine is complex and slower than the basic
045     * default RulesBase class, but offers more functionality:
046     * <ul>
047     * <li>Universal patterns allow patterns to be specified which will match
048     *  regardless of whether there are "better matching" patterns available.</li>
049     * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct
050     *  children of a specified element.</li>
051     * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements
052     *  nested within a specified element to any nesting depth.</li>
053     * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li>
054     * </ul>
055     * </p>
056     *
057     * <h4>Universal Match Patterns</h4>
058     * 
059     * <p>The default RulesBase pattern-matching engine always attempts to find
060     * the "best matching pattern", and will ignore rules associated with other
061     * patterns that match but are not "as good". As an example, if the pattern
062     * "a/b/c" is associated with rules 1 and 2, and "*&#47;c" is associated with
063     * rules 3 and 4 then element "a/b/c" will cause only rules 1 and 2 to execute.
064     * Rules 3 and 4 do have matching patterns, but because the patterns are shorter
065     * and include wildcard characters they are regarded as being "not as good" as
066     * a direct match. In general, exact patterns are better than wildcard patterns,
067     * and among multiple patterns with wildcards, the longest is preferred.
068     * See the RulesBase class for more information.</p>
069     *
070     * <p>This feature of preferring "better" patterns can be a powerful tool.
071     * However it also means that patterns can interact in unexpected ways.</p>
072     *
073     * <p>When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses 
074     * the "best match" feature. Even if there is an exact match or a longer 
075     * wildcard match, patterns prefixed by '!' will still be tested to see if 
076     * they match, and if so their associated Rule objects will be included in
077     * the set of rules to be executed in the normal manner.</p>
078     *
079     * <ul>
080     * <li>Pattern <code>"!*&#47;a/b"</code> matches whenever an 'b' element
081     *     is inside an 'a'.</li>
082     * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent
083     *     matching <code>"a/b"</code> (see "Parent Match Patterns").</li>
084     * <li>Pattern <code>"!*&#47;a/b/?"</code> matches any child of a parent
085     *     matching <code>"!*&#47;a/b"</code> (see "Parent Match Patterns").</li>
086     * <li>Pattern <code>"!a/b/*"</code> matches any element whose path
087     *     starts with "a" then "b" (see "Ancestor Match Patterns").</li>
088     * <li>Pattern <code>"!*&#47;a/b/*"</code> matches any elements whose path
089     *     contains 'a/b' (see "Ancestor Match Patterns").</li>
090     * </ul>
091     *
092     * <h4>Parent Match Patterns</h4>
093     *
094     * <p>
095     * These will match direct child elements of a particular parent element.
096     * <ul>
097     * <li>
098     *  <code>"a/b/c/?"</code> matches any child whose parent matches
099     *  <code>"a/b/c"</code>.  Exact parent rules take precedence over Ancestor
100     *  Match patterns.
101     * </li>
102     * <li>
103     *  <code>"*&#47;a/b/c/?"</code> matches any child whose parent matches
104     *  <code>"*&#47;a/b/c"</code>.  The longest matching still applies to parent
105     *  matches but the length excludes the '?', which effectively means
106     *  that standard wildcard matches with the same level of depth are
107     *  chosen in preference.
108     * </li>
109     * </ul>
110     * </p>
111     *
112     * <h4>Ancestor Match Patterns</h4>
113     *
114     * <p>
115     * These will match elements whose parentage includes a particular sequence
116     * of elements.
117     * <ul>
118     * <li>
119     *  <code>"a/b/*"</code> matches any element whose path starts with
120     *  'a' then 'b'. Exact parent and parent match rules take precedence. 
121     *  The longest ancestor match will take precedence.
122     * </li>
123     * <li>
124     *  <code>"*&#47;a/b/*"</code> matches any elements whose path contains
125     *  an element 'a' followed by an element 'b'. The longest matching still 
126     *  applies but the length excludes the '*' at the end.
127     * </li>
128     * </ul>
129     * </p>
130     *
131     * <h4>Completely Wild Patterns</h4>
132     *
133     * <p>Pattern <code>"*"</code> matches every pattern that isn't matched by
134     * any other basic rule.</p>
135     *
136     * <p>Pattern <code>"!*"</code> matches every pattern.</p>
137     *
138     * <h4>Using The Extended Rules</h4>
139     *
140     * <p>By default, a Digester instance uses a {@link RulesBase} instance as
141     * its pattern matching engine. To use an ExtendedBaseRules instance, call
142     * the Digester.setRules method before adding any Rule objects to the digester
143     * instance:
144     * <pre>
145     *     Digester digester = new Digester();
146     *     digester.setRules( new ExtendedBaseRules() );
147     * </pre></p>
148     *
149     * <p>The most important thing to remember when using the extended rules is 
150     * that universal and non-universal patterns are completely independent.
151     * Universal patterns are never affected by the addition of new patterns
152     * or the removal of existing ones. Non-universal patterns are never affected
153     * by the addition of new <em>universal</em> patterns or the removal of 
154     * existing <em>universal</em> patterns. As in the basic matching rules, 
155     * non-universal (basic) patterns <strong>can</strong> be affected by the 
156     * addition of new <em>non-universal</em> patterns or the removal of existing 
157     * <em>non-universal</em> patterns, because only rules associated with the
158     * "best matching" pattern for each xml element are executed.
159     *
160     * <p> This means that you can use universal patterns to build up the simple 
161     * parts of your structure - for example defining universal creation and 
162     * property setting rules. More sophisticated and complex mapping will require 
163     * non-universal patterns and this might mean that some of the universal rules 
164     * will need to be replaced by a series of special cases using non-universal 
165     * rules. But by using universal rules as your backbone, these additions 
166     * should not break your existing rules.</p>
167     */
168    
169    
170    public class ExtendedBaseRules extends RulesBase {
171    
172    
173        // ----------------------------------------------------- Instance Variables
174    
175        /**
176         * Counts the entry number for the rules.
177         */
178        private int counter = 0;
179    
180    
181        /**
182         * The decision algorithm used (unfortunately) doesn't preserve the entry
183         * order.
184         * This map is used by a comparator which orders the list of matches
185         * before it's returned.
186         * This map stores the entry number keyed by the rule.
187         */
188        private Map order = new HashMap();
189    
190    
191        // --------------------------------------------------------- Public Methods
192    
193    
194        /**
195         * Register a new Rule instance matching the specified pattern.
196         *
197         * @param pattern Nesting pattern to be matched for this Rule
198         * @param rule Rule instance to be registered
199         */
200        public void add(String pattern, Rule rule) {
201            super.add(pattern, rule);
202            counter++;
203            order.put(rule, new Integer(counter));
204        }
205    
206    
207        /**
208         * Return a List of all registered Rule instances that match the specified
209         * nesting pattern, or a zero-length List if there are no matches.  If more
210         * than one Rule instance matches, they <strong>must</strong> be returned
211         * in the order originally registered through the <code>add()</code>
212         * method.
213         *
214         * @param pattern Nesting pattern to be matched
215         */
216        public List match(String namespace, String pattern) {
217            // calculate the pattern of the parent
218            // (if the element has one)
219            String parentPattern = "";
220            int lastIndex = pattern.lastIndexOf('/');
221    
222            boolean hasParent = true;
223            if (lastIndex == -1) {
224                // element has no parent
225                hasParent = false;
226    
227            } else {
228                // calculate the pattern of the parent
229                parentPattern = pattern.substring(0, lastIndex);
230    
231            }
232    
233    
234            // we keep the list of universal matches separate
235            List universalList = new ArrayList(counter);
236    
237            // Universal all wildards ('!*')
238            // These are always matched so always add them
239            List tempList = (List) this.cache.get("!*");
240            if (tempList != null) {
241                universalList.addAll(tempList);
242            }
243    
244            // Universal exact parent match
245            // need to get this now since only wildcards are considered later
246            tempList = (List) this.cache.get("!" + parentPattern + "/?");
247            if (tempList != null) {
248                universalList.addAll(tempList);
249            }
250    
251    
252            // base behaviour means that if we certain matches, we don't continue
253            // but we just have a single combined loop and so we have to set
254            // a variable
255            boolean ignoreBasicMatches = false;
256    
257    
258            // see if we have an exact basic pattern match
259            List rulesList = (List) this.cache.get(pattern);
260            if (rulesList != null) {
261                // we have a match!
262                // so ignore all basic matches from now on
263                ignoreBasicMatches = true;
264    
265            } else {
266    
267                // see if we have an exact child match
268                if (hasParent) {
269                    // matching children takes preference
270                    rulesList = (List) this.cache.get(parentPattern + "/?");
271                    if (rulesList != null) {
272                        // we have a match!
273                        // so ignore all basic matches from now on
274                        ignoreBasicMatches = true;
275                        
276                    } else {
277                        // we don't have a match yet - so try exact ancester
278                        //
279                        rulesList = findExactAncesterMatch(pattern);
280                        if (rulesList != null) {
281                            // we have a match!
282                            // so ignore all basic matches from now on
283                            ignoreBasicMatches = true;
284                        }
285                    }
286                }
287            }
288    
289    
290            // OK - we're ready for the big loop!
291            // Unlike the basic rules case,
292            // we have to go through for all those universal rules in all cases.
293    
294            // Find the longest key, ie more discriminant
295            String longKey = "";
296            int longKeyLength = 0;
297            
298            Iterator keys = this.cache.keySet().iterator();
299            while (keys.hasNext()) {
300                String key = (String) keys.next();
301    
302                // find out if it's a univeral pattern
303                // set a flag
304                boolean isUniversal = key.startsWith("!");
305                if (isUniversal) {
306                    // and find the underlying key
307                    key = key.substring(1, key.length());
308                }
309    
310                        
311                // don't need to check exact matches
312                boolean wildcardMatchStart = key.startsWith("*/");
313                boolean wildcardMatchEnd = key.endsWith("/*");
314                if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) {
315    
316                    boolean parentMatched = false;
317                    boolean basicMatched = false;
318                    boolean ancesterMatched = false;
319                    
320                    boolean parentMatchEnd = key.endsWith("/?");
321                    if (parentMatchEnd) {
322                        // try for a parent match
323                        parentMatched = parentMatch(key, pattern, parentPattern);
324    
325                    } else if (wildcardMatchEnd) {
326                        // check for ancester match
327                        if (wildcardMatchStart) {
328                            String patternBody = key.substring(2, key.length() - 2);
329                            if (pattern.endsWith(patternBody)) {
330                                ancesterMatched = true;
331                            } else {
332                                ancesterMatched = (pattern.indexOf(patternBody + "/") > -1);
333                            }
334                        } else {
335                            String bodyPattern = key.substring(0, key.length() - 2);
336                            if (pattern.startsWith(bodyPattern))
337                            {
338                                if (pattern.length() == bodyPattern.length()) {
339                                    // exact match
340                                    ancesterMatched = true;
341                                } else {
342                                    ancesterMatched = ( pattern.charAt(bodyPattern.length()) == '/' );
343                                }
344                            } else {
345                                ancesterMatched = false;  
346                            } 
347                        }               
348                    } else {
349                        // try for a base match
350                        basicMatched = basicMatch(key, pattern);
351                    }
352    
353                    if (parentMatched || basicMatched || ancesterMatched) {
354                        if (isUniversal) {
355                            // universal rules go straight in
356                            // (no longest matching rule)
357                            tempList = (List) this.cache.get("!" + key);
358                            if (tempList != null) {
359                                universalList.addAll(tempList);
360                            }
361    
362                        } else {
363                            if (!ignoreBasicMatches) {
364                                // ensure that all parent matches are SHORTER
365                                // than rules with same level of matching.
366                                //
367                                // the calculations below don't work for universal
368                                // matching, but we don't care because in that case
369                                // this if-stmt is not entered.
370                                int keyLength = key.length();
371                                if (wildcardMatchStart) {
372                                    --keyLength;
373                                }
374                                if (wildcardMatchEnd) {
375                                    --keyLength;
376                                } else if (parentMatchEnd) {
377                                    --keyLength;
378                                }
379    
380                                if (keyLength > longKeyLength) {
381                                    rulesList = (List) this.cache.get(key);
382                                    longKey = key;
383                                    longKeyLength = keyLength;
384                                }
385                            }
386                        }
387                    }
388                }
389            }
390    
391    
392            // '*' works in practice as a default matching
393            // (this is because anything is a deeper match!)
394            if (rulesList == null) {
395                rulesList = (List) this.cache.get("*");
396            }
397    
398            // if we've matched a basic pattern, then add to the universal list
399            if (rulesList != null) {
400                universalList.addAll(rulesList);
401            }
402    
403    
404            // don't filter if namespace is null
405            if (namespace != null) {
406                // remove invalid namespaces
407                Iterator it = universalList.iterator();
408                while (it.hasNext()) {
409                    Rule rule = (Rule) it.next();
410                    String ns_uri = rule.getNamespaceURI();
411                    if (ns_uri != null && !ns_uri.equals(namespace)) {
412                        it.remove();
413                    }
414                }
415            }
416    
417    
418            // need to make sure that the collection is sort in the order
419            // of addition.  We use a custom comparator for this
420            Collections.sort(
421                    universalList,
422                    new Comparator() {
423    
424                        public int compare(Object o1, Object o2) throws ClassCastException {
425                            // Get the entry order from the map
426                            Integer i1 = (Integer) order.get(o1);
427                            Integer i2 = (Integer) order.get(o2);
428    
429                            // and use that to perform the comparison
430                            if (i1 == null) {
431                                if (i2 == null) {
432    
433                                    return 0;
434    
435                                } else {
436    
437                                    return -1;
438    
439                                }
440                            } else if (i2 == null) {
441                                return 1;
442                            }
443    
444                            return (i1.intValue() - i2.intValue());
445                        }
446                    });
447    
448            return universalList;
449        }
450    
451        /**
452         * Matching parent.
453         */
454        private boolean parentMatch(String key, String pattern, String parentPattern) {
455            return parentPattern.endsWith(key.substring(1, key.length() - 2));
456        }
457    
458        /**
459         * Standard match.
460         * Matches the end of the pattern to the key.
461         */
462        private boolean basicMatch(String key, String pattern) {
463            return (pattern.equals(key.substring(2)) ||
464                    pattern.endsWith(key.substring(1)));
465        }
466        
467        /**
468         * Finds an exact ancester match for given pattern
469         */
470        private List findExactAncesterMatch(String parentPattern) {
471            List matchingRules = null;
472            int lastIndex = parentPattern.length();
473            while (lastIndex-- > 0) {
474                lastIndex = parentPattern.lastIndexOf('/', lastIndex);
475                if (lastIndex > 0) {
476                    matchingRules = (List) this.cache.get(parentPattern.substring(0, lastIndex) + "/*");
477                    if (matchingRules != null) {
478                        return matchingRules;
479                    }
480                }
481            }
482            return null;
483        }
484    }