001    package org.apache.commons.digester3;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one
005     * or more contributor license agreements.  See the NOTICE file
006     * distributed with this work for additional information
007     * regarding copyright ownership.  The ASF licenses this file
008     * to you under the Apache License, Version 2.0 (the
009     * "License"); you may not use this file except in compliance
010     * with the License.  You may obtain a copy of the License at
011     *
012     *   http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing,
015     * software distributed under the License is distributed on an
016     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017     * KIND, either express or implied.  See the License for the
018     * specific language governing permissions and limitations
019     * under the License.
020     */
021    
022    import javax.xml.parsers.DocumentBuilder;
023    import javax.xml.parsers.DocumentBuilderFactory;
024    import javax.xml.parsers.ParserConfigurationException;
025    
026    import org.w3c.dom.Attr;
027    import org.w3c.dom.DOMException;
028    import org.w3c.dom.Document;
029    import org.w3c.dom.Element;
030    import org.w3c.dom.Node;
031    import org.xml.sax.Attributes;
032    import org.xml.sax.ContentHandler;
033    import org.xml.sax.SAXException;
034    import org.xml.sax.helpers.DefaultHandler;
035    
036    /**
037     * A rule implementation that creates a DOM {@link org.w3c.dom.Node Node} containing the XML at the element that matched
038     * the rule. Two concrete types of nodes can be created by this rule:
039     * <ul>
040     * <li>the default is to create an {@link org.w3c.dom.Element Element} node. The created element will correspond to the
041     * element that matched the rule, containing all XML content underneath that element.</li>
042     * <li>alternatively, this rule can create nodes of type {@link org.w3c.dom.DocumentFragment DocumentFragment}, which
043     * will contain only the XML content under the element the rule was trigged on.</li>
044     * </ul>
045     * The created node will be normalized, meaning it will not contain text nodes that only contain white space characters.
046     * <p>
047     * The created <code>Node</code> will be pushed on Digester's object stack when done. To use it in the context of
048     * another DOM {@link org.w3c.dom.Document Document}, it must be imported first, using the Document method
049     * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
050     * </p>
051     * <p>
052     * <strong>Important Note:</strong> This is implemented by replacing the SAX {@link org.xml.sax.ContentHandler
053     * ContentHandler} in the parser used by Digester, and resetting it when the matched element is closed. As a side
054     * effect, rules that would match XML nodes under the element that matches a <code>NodeCreateRule</code> will never be
055     * triggered by Digester, which usually is the behavior one would expect.
056     * </p>
057     * <p>
058     * <strong>Note</strong> that the current implementation does not set the namespace prefixes in the exported nodes. The
059     * (usually more important) namespace URIs are set, of course.
060     * </p>
061     * 
062     * @since Digester 1.4
063     */
064    public class NodeCreateRule
065        extends Rule
066    {
067    
068        // ---------------------------------------------------------- Inner Classes
069    
070        /**
071         * The SAX content handler that does all the actual work of assembling the DOM node tree from the SAX events.
072         */
073        private class NodeBuilder
074            extends DefaultHandler
075        {
076    
077            // ------------------------------------------------------- Constructors
078    
079            /**
080             * Constructor.
081             * <p>
082             * Stores the content handler currently used by Digester so it can be reset when done, and initializes the DOM
083             * objects needed to build the node.
084             * </p>
085             * 
086             * @param doc the document to use to create nodes
087             * @param root the root node
088             * @throws ParserConfigurationException if the DocumentBuilderFactory could not be instantiated
089             * @throws SAXException if the XMLReader could not be instantiated by Digester (should not happen)
090             */
091            public NodeBuilder( Document doc, Node root )
092                throws ParserConfigurationException, SAXException
093            {
094                this.doc = doc;
095                this.root = root;
096                this.top = root;
097    
098                oldContentHandler = getDigester().getCustomContentHandler();
099            }
100    
101            // ------------------------------------------------- Instance Variables
102    
103            /**
104             * The content handler used by Digester before it was set to this content handler.
105             */
106            protected ContentHandler oldContentHandler = null;
107    
108            /**
109             * Depth of the current node, relative to the element where the content handler was put into action.
110             */
111            protected int depth = 0;
112    
113            /**
114             * A DOM Document used to create the various Node instances.
115             */
116            protected Document doc = null;
117    
118            /**
119             * The DOM node that will be pushed on Digester's stack.
120             */
121            protected Node root = null;
122    
123            /**
124             * The current top DOM mode.
125             */
126            protected Node top = null;
127    
128            /**
129             * The text content of the current top DOM node.
130             */
131            protected StringBuilder topText = new StringBuilder();
132    
133            // --------------------------------------------- Helper Methods
134    
135            /**
136             * Appends a {@link org.w3c.dom.Text Text} node to the current node if the content reported by the parser is not
137             * purely whitespace.
138             */
139            private void addTextIfPresent()
140                throws SAXException
141            {
142                if ( topText.length() > 0 )
143                {
144                    String str = topText.toString();
145                    topText.setLength( 0 );
146    
147                    if ( str.trim().length() > 0 )
148                    {
149                        // The contained text is not *pure* whitespace, so create
150                        // a text node to hold it. Note that the "untrimmed" text
151                        // is stored in the node.
152                        try
153                        {
154                            top.appendChild( doc.createTextNode( str ) );
155                        }
156                        catch ( DOMException e )
157                        {
158                            throw new SAXException( e.getMessage() );
159                        }
160                    }
161                }
162            }
163    
164            // --------------------------------------------- ContentHandler Methods
165    
166            /**
167             * Handle notification about text embedded within the current node.
168             * <p>
169             * An xml parser calls this when text is found. We need to ensure that this text gets attached to the new Node
170             * we are creating - except in the case where the only text in the node is whitespace.
171             * <p>
172             * There is a catch, however. According to the sax specification, a parser does not need to pass all of the text
173             * content of a node in one go; it can make multiple calls passing part of the data on each call. In particular,
174             * when the body of an element includes xml entity-references, at least some parsers make a separate call to
175             * this method to pass just the entity content.
176             * <p>
177             * In this method, we therefore just append the provided text to a "current text" buffer. When the element end
178             * is found, or a child element is found then we can check whether we have all-whitespace. See method
179             * addTextIfPresent.
180             * 
181             * @param ch the characters from the XML document
182             * @param start the start position in the array
183             * @param length the number of characters to read from the array
184             * @throws SAXException if the DOM implementation throws an exception
185             */
186            @Override
187            public void characters( char[] ch, int start, int length )
188                throws SAXException
189            {
190                topText.append( ch, start, length );
191            }
192    
193            /**
194             * Checks whether control needs to be returned to Digester.
195             * 
196             * @param namespaceURI the namespace URI
197             * @param localName the local name
198             * @param qName the qualified (prefixed) name
199             * @throws SAXException if the DOM implementation throws an exception
200             */
201            @Override
202            public void endElement( String namespaceURI, String localName, String qName )
203                throws SAXException
204            {
205                addTextIfPresent();
206    
207                try
208                {
209                    if ( depth == 0 )
210                    {
211                        getDigester().setCustomContentHandler( oldContentHandler );
212                        getDigester().push( root );
213                        getDigester().endElement( namespaceURI, localName, qName );
214                    }
215    
216                    top = top.getParentNode();
217                    depth--;
218                }
219                catch ( DOMException e )
220                {
221                    throw new SAXException( e.getMessage() );
222                }
223            }
224    
225            /**
226             * Adds a new {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to the current node.
227             * 
228             * @param target the processing instruction target
229             * @param data the processing instruction data, or null if none was supplied
230             * @throws SAXException if the DOM implementation throws an exception
231             */
232            @Override
233            public void processingInstruction( String target, String data )
234                throws SAXException
235            {
236                try
237                {
238                    top.appendChild( doc.createProcessingInstruction( target, data ) );
239                }
240                catch ( DOMException e )
241                {
242                    throw new SAXException( e.getMessage() );
243                }
244            }
245    
246            /**
247             * Adds a new child {@link org.w3c.dom.Element Element} to the current node.
248             * 
249             * @param namespaceURI the namespace URI
250             * @param localName the local name
251             * @param qName the qualified (prefixed) name
252             * @param atts the list of attributes
253             * @throws SAXException if the DOM implementation throws an exception
254             */
255            @Override
256            public void startElement( String namespaceURI, String localName, String qName, Attributes atts )
257                throws SAXException
258            {
259                addTextIfPresent();
260    
261                try
262                {
263                    Node previousTop = top;
264                    if ( ( localName == null ) || ( localName.length() == 0 ) )
265                    {
266                        top = doc.createElement( qName );
267                    }
268                    else
269                    {
270                        top = doc.createElementNS( namespaceURI, localName );
271                    }
272                    for ( int i = 0; i < atts.getLength(); i++ )
273                    {
274                        Attr attr = null;
275                        if ( ( atts.getLocalName( i ) == null ) || ( atts.getLocalName( i ).length() == 0 ) )
276                        {
277                            attr = doc.createAttribute( atts.getQName( i ) );
278                            attr.setNodeValue( atts.getValue( i ) );
279                            ( (Element) top ).setAttributeNode( attr );
280                        }
281                        else
282                        {
283                            attr = doc.createAttributeNS( atts.getURI( i ), atts.getLocalName( i ) );
284                            attr.setNodeValue( atts.getValue( i ) );
285                            ( (Element) top ).setAttributeNodeNS( attr );
286                        }
287                    }
288                    previousTop.appendChild( top );
289                    depth++;
290                }
291                catch ( DOMException e )
292                {
293                    throw new SAXException( e.getMessage() );
294                }
295            }
296        }
297    
298        // ----------------------------------------------------------- Constructors
299    
300        /**
301         * Default constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}.
302         *
303         * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
304         *         configuration requested.
305         * @see DocumentBuilderFactory#newDocumentBuilder()
306         */
307        public NodeCreateRule()
308            throws ParserConfigurationException
309        {
310            this( Node.ELEMENT_NODE );
311        }
312    
313        /**
314         * Constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}, but
315         * lets you specify the JAXP <code>DocumentBuilder</code> that should be used when constructing the node tree.
316         * 
317         * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
318         */
319        public NodeCreateRule( DocumentBuilder documentBuilder )
320        {
321            this( Node.ELEMENT_NODE, documentBuilder );
322        }
323    
324        /**
325         * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
326         * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
327         * <code>nodeType</code> parameter.
328         * 
329         * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
330         *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
331         * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
332         *         configuration requested.
333         * @see DocumentBuilderFactory#newDocumentBuilder()
334         */
335        public NodeCreateRule( int nodeType )
336            throws ParserConfigurationException
337        {
338            this( nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder() );
339        }
340    
341        /**
342         * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
343         * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
344         * <code>nodeType</code> parameter. This constructor lets you specify the JAXP <code>DocumentBuilder</code> that
345         * should be used when constructing the node tree.
346         * 
347         * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
348         *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
349         * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
350         */
351        public NodeCreateRule( int nodeType, DocumentBuilder documentBuilder )
352        {
353            if ( !( ( nodeType == Node.DOCUMENT_FRAGMENT_NODE ) || ( nodeType == Node.ELEMENT_NODE ) ) )
354            {
355                throw new IllegalArgumentException( "Can only create nodes of type DocumentFragment and Element" );
356            }
357            this.nodeType = nodeType;
358            this.documentBuilder = documentBuilder;
359        }
360    
361        // ----------------------------------------------------- Instance Variables
362    
363        /**
364         * The JAXP <code>DocumentBuilder</code> to use.
365         */
366        private DocumentBuilder documentBuilder = null;
367    
368        /**
369         * The type of the node that should be created. Must be one of the constants defined in {@link org.w3c.dom.Node
370         * Node}, but currently only {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
371         * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} are allowed values.
372         */
373        private int nodeType = Node.ELEMENT_NODE;
374    
375        // ----------------------------------------------------------- Rule Methods
376    
377        /**
378         * When this method fires, the digester is told to forward all SAX ContentHandler events to the builder object,
379         * resulting in a DOM being built instead of normal digester rule-handling occurring. When the end of the current
380         * xml element is encountered, the original content handler is restored (expected to be NULL, allowing normal
381         * Digester operations to continue).
382         * 
383         * @param namespaceURI the namespace URI of the matching element, or an empty string if the parser is not namespace
384         *            aware or the element has no namespace
385         * @param name the local name if the parser is namespace aware, or just the element name otherwise
386         * @param attributes The attribute list of this element
387         * @throws Exception indicates a JAXP configuration problem
388         */
389        @Override
390        public void begin( String namespaceURI, String name, Attributes attributes )
391            throws Exception
392        {
393            Document doc = documentBuilder.newDocument();
394            NodeBuilder builder = null;
395            if ( nodeType == Node.ELEMENT_NODE )
396            {
397                Element element = null;
398                if ( getDigester().getNamespaceAware() )
399                {
400                    element = doc.createElementNS( namespaceURI, name );
401                    for ( int i = 0; i < attributes.getLength(); i++ )
402                    {
403                        element.setAttributeNS( attributes.getURI( i ), attributes.getQName( i ),
404                                                attributes.getValue( i ) );
405                    }
406                }
407                else
408                {
409                    element = doc.createElement( name );
410                    for ( int i = 0; i < attributes.getLength(); i++ )
411                    {
412                        element.setAttribute( attributes.getQName( i ), attributes.getValue( i ) );
413                    }
414                }
415                builder = new NodeBuilder( doc, element );
416            }
417            else
418            {
419                builder = new NodeBuilder( doc, doc.createDocumentFragment() );
420            }
421            // the NodeBuilder constructor has already saved the original
422            // value of the digester's custom content handler (expected to
423            // be null, but we save it just in case). So now we just
424            // need to tell the digester to forward events to the builder.
425            getDigester().setCustomContentHandler( builder );
426        }
427    
428        /**
429         * {@inheritDoc}
430         */
431        @Override
432        public void end( String namespace, String name )
433            throws Exception
434        {
435            getDigester().pop();
436        }
437    
438    }