View Javadoc

1   package org.apache.commons.digester3;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import javax.xml.parsers.DocumentBuilder;
23  import javax.xml.parsers.DocumentBuilderFactory;
24  import javax.xml.parsers.ParserConfigurationException;
25  
26  import org.w3c.dom.Attr;
27  import org.w3c.dom.DOMException;
28  import org.w3c.dom.Document;
29  import org.w3c.dom.Element;
30  import org.w3c.dom.Node;
31  import org.xml.sax.Attributes;
32  import org.xml.sax.ContentHandler;
33  import org.xml.sax.SAXException;
34  import org.xml.sax.helpers.DefaultHandler;
35  
36  /**
37   * A rule implementation that creates a DOM {@link org.w3c.dom.Node Node} containing the XML at the element that matched
38   * the rule. Two concrete types of nodes can be created by this rule:
39   * <ul>
40   * <li>the default is to create an {@link org.w3c.dom.Element Element} node. The created element will correspond to the
41   * element that matched the rule, containing all XML content underneath that element.</li>
42   * <li>alternatively, this rule can create nodes of type {@link org.w3c.dom.DocumentFragment DocumentFragment}, which
43   * will contain only the XML content under the element the rule was trigged on.</li>
44   * </ul>
45   * The created node will be normalized, meaning it will not contain text nodes that only contain white space characters.
46   * <p>
47   * The created <code>Node</code> will be pushed on Digester's object stack when done. To use it in the context of
48   * another DOM {@link org.w3c.dom.Document Document}, it must be imported first, using the Document method
49   * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
50   * </p>
51   * <p>
52   * <strong>Important Note:</strong> This is implemented by replacing the SAX {@link org.xml.sax.ContentHandler
53   * ContentHandler} in the parser used by Digester, and resetting it when the matched element is closed. As a side
54   * effect, rules that would match XML nodes under the element that matches a <code>NodeCreateRule</code> will never be
55   * triggered by Digester, which usually is the behavior one would expect.
56   * </p>
57   * <p>
58   * <strong>Note</strong> that the current implementation does not set the namespace prefixes in the exported nodes. The
59   * (usually more important) namespace URIs are set, of course.
60   * </p>
61   * 
62   * @since Digester 1.4
63   */
64  public class NodeCreateRule
65      extends Rule
66  {
67  
68      // ---------------------------------------------------------- Inner Classes
69  
70      /**
71       * The SAX content handler that does all the actual work of assembling the DOM node tree from the SAX events.
72       */
73      private class NodeBuilder
74          extends DefaultHandler
75      {
76  
77          // ------------------------------------------------------- Constructors
78  
79          /**
80           * Constructor.
81           * <p>
82           * Stores the content handler currently used by Digester so it can be reset when done, and initializes the DOM
83           * objects needed to build the node.
84           * </p>
85           * 
86           * @param doc the document to use to create nodes
87           * @param root the root node
88           * @throws ParserConfigurationException if the DocumentBuilderFactory could not be instantiated
89           * @throws SAXException if the XMLReader could not be instantiated by Digester (should not happen)
90           */
91          public NodeBuilder( Document doc, Node root )
92              throws ParserConfigurationException, SAXException
93          {
94              this.doc = doc;
95              this.root = root;
96              this.top = root;
97  
98              oldContentHandler = getDigester().getCustomContentHandler();
99          }
100 
101         // ------------------------------------------------- Instance Variables
102 
103         /**
104          * The content handler used by Digester before it was set to this content handler.
105          */
106         protected ContentHandler oldContentHandler = null;
107 
108         /**
109          * Depth of the current node, relative to the element where the content handler was put into action.
110          */
111         protected int depth = 0;
112 
113         /**
114          * A DOM Document used to create the various Node instances.
115          */
116         protected Document doc = null;
117 
118         /**
119          * The DOM node that will be pushed on Digester's stack.
120          */
121         protected Node root = null;
122 
123         /**
124          * The current top DOM mode.
125          */
126         protected Node top = null;
127 
128         /**
129          * The text content of the current top DOM node.
130          */
131         protected StringBuilder topText = new StringBuilder();
132 
133         // --------------------------------------------- Helper Methods
134 
135         /**
136          * Appends a {@link org.w3c.dom.Text Text} node to the current node if the content reported by the parser is not
137          * purely whitespace.
138          */
139         private void addTextIfPresent()
140             throws SAXException
141         {
142             if ( topText.length() > 0 )
143             {
144                 String str = topText.toString();
145                 topText.setLength( 0 );
146 
147                 if ( str.trim().length() > 0 )
148                 {
149                     // The contained text is not *pure* whitespace, so create
150                     // a text node to hold it. Note that the "untrimmed" text
151                     // is stored in the node.
152                     try
153                     {
154                         top.appendChild( doc.createTextNode( str ) );
155                     }
156                     catch ( DOMException e )
157                     {
158                         throw new SAXException( e.getMessage() );
159                     }
160                 }
161             }
162         }
163 
164         // --------------------------------------------- ContentHandler Methods
165 
166         /**
167          * Handle notification about text embedded within the current node.
168          * <p>
169          * An xml parser calls this when text is found. We need to ensure that this text gets attached to the new Node
170          * we are creating - except in the case where the only text in the node is whitespace.
171          * <p>
172          * There is a catch, however. According to the sax specification, a parser does not need to pass all of the text
173          * content of a node in one go; it can make multiple calls passing part of the data on each call. In particular,
174          * when the body of an element includes xml entity-references, at least some parsers make a separate call to
175          * this method to pass just the entity content.
176          * <p>
177          * In this method, we therefore just append the provided text to a "current text" buffer. When the element end
178          * is found, or a child element is found then we can check whether we have all-whitespace. See method
179          * addTextIfPresent.
180          * 
181          * @param ch the characters from the XML document
182          * @param start the start position in the array
183          * @param length the number of characters to read from the array
184          * @throws SAXException if the DOM implementation throws an exception
185          */
186         @Override
187         public void characters( char[] ch, int start, int length )
188             throws SAXException
189         {
190             topText.append( ch, start, length );
191         }
192 
193         /**
194          * Checks whether control needs to be returned to Digester.
195          * 
196          * @param namespaceURI the namespace URI
197          * @param localName the local name
198          * @param qName the qualified (prefixed) name
199          * @throws SAXException if the DOM implementation throws an exception
200          */
201         @Override
202         public void endElement( String namespaceURI, String localName, String qName )
203             throws SAXException
204         {
205             addTextIfPresent();
206 
207             try
208             {
209                 if ( depth == 0 )
210                 {
211                     getDigester().setCustomContentHandler( oldContentHandler );
212                     getDigester().push( root );
213                     getDigester().endElement( namespaceURI, localName, qName );
214                 }
215 
216                 top = top.getParentNode();
217                 depth--;
218             }
219             catch ( DOMException e )
220             {
221                 throw new SAXException( e.getMessage() );
222             }
223         }
224 
225         /**
226          * Adds a new {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to the current node.
227          * 
228          * @param target the processing instruction target
229          * @param data the processing instruction data, or null if none was supplied
230          * @throws SAXException if the DOM implementation throws an exception
231          */
232         @Override
233         public void processingInstruction( String target, String data )
234             throws SAXException
235         {
236             try
237             {
238                 top.appendChild( doc.createProcessingInstruction( target, data ) );
239             }
240             catch ( DOMException e )
241             {
242                 throw new SAXException( e.getMessage() );
243             }
244         }
245 
246         /**
247          * Adds a new child {@link org.w3c.dom.Element Element} to the current node.
248          * 
249          * @param namespaceURI the namespace URI
250          * @param localName the local name
251          * @param qName the qualified (prefixed) name
252          * @param atts the list of attributes
253          * @throws SAXException if the DOM implementation throws an exception
254          */
255         @Override
256         public void startElement( String namespaceURI, String localName, String qName, Attributes atts )
257             throws SAXException
258         {
259             addTextIfPresent();
260 
261             try
262             {
263                 Node previousTop = top;
264                 if ( ( localName == null ) || ( localName.length() == 0 ) )
265                 {
266                     top = doc.createElement( qName );
267                 }
268                 else
269                 {
270                     top = doc.createElementNS( namespaceURI, localName );
271                 }
272                 for ( int i = 0; i < atts.getLength(); i++ )
273                 {
274                     Attr attr = null;
275                     if ( ( atts.getLocalName( i ) == null ) || ( atts.getLocalName( i ).length() == 0 ) )
276                     {
277                         attr = doc.createAttribute( atts.getQName( i ) );
278                         attr.setNodeValue( atts.getValue( i ) );
279                         ( (Element) top ).setAttributeNode( attr );
280                     }
281                     else
282                     {
283                         attr = doc.createAttributeNS( atts.getURI( i ), atts.getLocalName( i ) );
284                         attr.setNodeValue( atts.getValue( i ) );
285                         ( (Element) top ).setAttributeNodeNS( attr );
286                     }
287                 }
288                 previousTop.appendChild( top );
289                 depth++;
290             }
291             catch ( DOMException e )
292             {
293                 throw new SAXException( e.getMessage() );
294             }
295         }
296     }
297 
298     // ----------------------------------------------------------- Constructors
299 
300     /**
301      * Default constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}.
302      *
303      * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
304      *         configuration requested.
305      * @see DocumentBuilderFactory#newDocumentBuilder()
306      */
307     public NodeCreateRule()
308         throws ParserConfigurationException
309     {
310         this( Node.ELEMENT_NODE );
311     }
312 
313     /**
314      * Constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}, but
315      * lets you specify the JAXP <code>DocumentBuilder</code> that should be used when constructing the node tree.
316      * 
317      * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
318      */
319     public NodeCreateRule( DocumentBuilder documentBuilder )
320     {
321         this( Node.ELEMENT_NODE, documentBuilder );
322     }
323 
324     /**
325      * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
326      * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
327      * <code>nodeType</code> parameter.
328      * 
329      * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
330      *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
331      * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
332      *         configuration requested.
333      * @see DocumentBuilderFactory#newDocumentBuilder()
334      */
335     public NodeCreateRule( int nodeType )
336         throws ParserConfigurationException
337     {
338         this( nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder() );
339     }
340 
341     /**
342      * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
343      * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
344      * <code>nodeType</code> parameter. This constructor lets you specify the JAXP <code>DocumentBuilder</code> that
345      * should be used when constructing the node tree.
346      * 
347      * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
348      *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
349      * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
350      */
351     public NodeCreateRule( int nodeType, DocumentBuilder documentBuilder )
352     {
353         if ( !( ( nodeType == Node.DOCUMENT_FRAGMENT_NODE ) || ( nodeType == Node.ELEMENT_NODE ) ) )
354         {
355             throw new IllegalArgumentException( "Can only create nodes of type DocumentFragment and Element" );
356         }
357         this.nodeType = nodeType;
358         this.documentBuilder = documentBuilder;
359     }
360 
361     // ----------------------------------------------------- Instance Variables
362 
363     /**
364      * The JAXP <code>DocumentBuilder</code> to use.
365      */
366     private DocumentBuilder documentBuilder = null;
367 
368     /**
369      * The type of the node that should be created. Must be one of the constants defined in {@link org.w3c.dom.Node
370      * Node}, but currently only {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
371      * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} are allowed values.
372      */
373     private int nodeType = Node.ELEMENT_NODE;
374 
375     // ----------------------------------------------------------- Rule Methods
376 
377     /**
378      * When this method fires, the digester is told to forward all SAX ContentHandler events to the builder object,
379      * resulting in a DOM being built instead of normal digester rule-handling occurring. When the end of the current
380      * xml element is encountered, the original content handler is restored (expected to be NULL, allowing normal
381      * Digester operations to continue).
382      * 
383      * @param namespaceURI the namespace URI of the matching element, or an empty string if the parser is not namespace
384      *            aware or the element has no namespace
385      * @param name the local name if the parser is namespace aware, or just the element name otherwise
386      * @param attributes The attribute list of this element
387      * @throws Exception indicates a JAXP configuration problem
388      */
389     @Override
390     public void begin( String namespaceURI, String name, Attributes attributes )
391         throws Exception
392     {
393         Document doc = documentBuilder.newDocument();
394         NodeBuilder builder = null;
395         if ( nodeType == Node.ELEMENT_NODE )
396         {
397             Element element = null;
398             if ( getDigester().getNamespaceAware() )
399             {
400                 element = doc.createElementNS( namespaceURI, name );
401                 for ( int i = 0; i < attributes.getLength(); i++ )
402                 {
403                     element.setAttributeNS( attributes.getURI( i ), attributes.getQName( i ),
404                                             attributes.getValue( i ) );
405                 }
406             }
407             else
408             {
409                 element = doc.createElement( name );
410                 for ( int i = 0; i < attributes.getLength(); i++ )
411                 {
412                     element.setAttribute( attributes.getQName( i ), attributes.getValue( i ) );
413                 }
414             }
415             builder = new NodeBuilder( doc, element );
416         }
417         else
418         {
419             builder = new NodeBuilder( doc, doc.createDocumentFragment() );
420         }
421         // the NodeBuilder constructor has already saved the original
422         // value of the digester's custom content handler (expected to
423         // be null, but we save it just in case). So now we just
424         // need to tell the digester to forward events to the builder.
425         getDigester().setCustomContentHandler( builder );
426     }
427 
428     /**
429      * {@inheritDoc}
430      */
431     @Override
432     public void end( String namespace, String name )
433         throws Exception
434     {
435         getDigester().pop();
436     }
437 
438 }