001package org.apache.commons.digester3;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import javax.xml.parsers.DocumentBuilder;
023import javax.xml.parsers.DocumentBuilderFactory;
024import javax.xml.parsers.ParserConfigurationException;
025
026import org.w3c.dom.Attr;
027import org.w3c.dom.DOMException;
028import org.w3c.dom.Document;
029import org.w3c.dom.Element;
030import org.w3c.dom.Node;
031import org.xml.sax.Attributes;
032import org.xml.sax.ContentHandler;
033import org.xml.sax.SAXException;
034import org.xml.sax.helpers.DefaultHandler;
035
036/**
037 * A rule implementation that creates a DOM {@link org.w3c.dom.Node Node} containing the XML at the element that matched
038 * the rule. Two concrete types of nodes can be created by this rule:
039 * <ul>
040 * <li>the default is to create an {@link org.w3c.dom.Element Element} node. The created element will correspond to the
041 * element that matched the rule, containing all XML content underneath that element.</li>
042 * <li>alternatively, this rule can create nodes of type {@link org.w3c.dom.DocumentFragment DocumentFragment}, which
043 * will contain only the XML content under the element the rule was trigged on.</li>
044 * </ul>
045 * The created node will be normalized, meaning it will not contain text nodes that only contain white space characters.
046 * <p>
047 * The created <code>Node</code> will be pushed on Digester's object stack when done. To use it in the context of
048 * another DOM {@link org.w3c.dom.Document Document}, it must be imported first, using the Document method
049 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
050 * </p>
051 * <p>
052 * <strong>Important Note:</strong> This is implemented by replacing the SAX {@link org.xml.sax.ContentHandler
053 * ContentHandler} in the parser used by Digester, and resetting it when the matched element is closed. As a side
054 * effect, rules that would match XML nodes under the element that matches a <code>NodeCreateRule</code> will never be
055 * triggered by Digester, which usually is the behavior one would expect.
056 * </p>
057 * <p>
058 * <strong>Note</strong> that the current implementation does not set the namespace prefixes in the exported nodes. The
059 * (usually more important) namespace URIs are set, of course.
060 * </p>
061 * 
062 * @since Digester 1.4
063 */
064public class NodeCreateRule
065    extends Rule
066{
067
068    // ---------------------------------------------------------- Inner Classes
069
070    /**
071     * The SAX content handler that does all the actual work of assembling the DOM node tree from the SAX events.
072     */
073    private class NodeBuilder
074        extends DefaultHandler
075    {
076
077        // ------------------------------------------------------- Constructors
078
079        /**
080         * Constructor.
081         * <p>
082         * Stores the content handler currently used by Digester so it can be reset when done, and initializes the DOM
083         * objects needed to build the node.
084         * </p>
085         * 
086         * @param doc the document to use to create nodes
087         * @param root the root node
088         * @throws ParserConfigurationException if the DocumentBuilderFactory could not be instantiated
089         * @throws SAXException if the XMLReader could not be instantiated by Digester (should not happen)
090         */
091        public NodeBuilder( Document doc, Node root )
092            throws ParserConfigurationException, SAXException
093        {
094            this.doc = doc;
095            this.root = root;
096            this.top = root;
097
098            oldContentHandler = getDigester().getCustomContentHandler();
099        }
100
101        // ------------------------------------------------- Instance Variables
102
103        /**
104         * The content handler used by Digester before it was set to this content handler.
105         */
106        protected ContentHandler oldContentHandler = null;
107
108        /**
109         * Depth of the current node, relative to the element where the content handler was put into action.
110         */
111        protected int depth = 0;
112
113        /**
114         * A DOM Document used to create the various Node instances.
115         */
116        protected Document doc = null;
117
118        /**
119         * The DOM node that will be pushed on Digester's stack.
120         */
121        protected Node root = null;
122
123        /**
124         * The current top DOM mode.
125         */
126        protected Node top = null;
127
128        /**
129         * The text content of the current top DOM node.
130         */
131        protected StringBuilder topText = new StringBuilder();
132
133        // --------------------------------------------- Helper Methods
134
135        /**
136         * Appends a {@link org.w3c.dom.Text Text} node to the current node if the content reported by the parser is not
137         * purely whitespace.
138         */
139        private void addTextIfPresent()
140            throws SAXException
141        {
142            if ( topText.length() > 0 )
143            {
144                String str = topText.toString();
145                topText.setLength( 0 );
146
147                if ( str.trim().length() > 0 )
148                {
149                    // The contained text is not *pure* whitespace, so create
150                    // a text node to hold it. Note that the "untrimmed" text
151                    // is stored in the node.
152                    try
153                    {
154                        top.appendChild( doc.createTextNode( str ) );
155                    }
156                    catch ( DOMException e )
157                    {
158                        throw new SAXException( e.getMessage() );
159                    }
160                }
161            }
162        }
163
164        // --------------------------------------------- ContentHandler Methods
165
166        /**
167         * Handle notification about text embedded within the current node.
168         * <p>
169         * An xml parser calls this when text is found. We need to ensure that this text gets attached to the new Node
170         * we are creating - except in the case where the only text in the node is whitespace.
171         * <p>
172         * There is a catch, however. According to the sax specification, a parser does not need to pass all of the text
173         * content of a node in one go; it can make multiple calls passing part of the data on each call. In particular,
174         * when the body of an element includes xml entity-references, at least some parsers make a separate call to
175         * this method to pass just the entity content.
176         * <p>
177         * In this method, we therefore just append the provided text to a "current text" buffer. When the element end
178         * is found, or a child element is found then we can check whether we have all-whitespace. See method
179         * addTextIfPresent.
180         * 
181         * @param ch the characters from the XML document
182         * @param start the start position in the array
183         * @param length the number of characters to read from the array
184         * @throws SAXException if the DOM implementation throws an exception
185         */
186        @Override
187        public void characters( char[] ch, int start, int length )
188            throws SAXException
189        {
190            topText.append( ch, start, length );
191        }
192
193        /**
194         * Checks whether control needs to be returned to Digester.
195         * 
196         * @param namespaceURI the namespace URI
197         * @param localName the local name
198         * @param qName the qualified (prefixed) name
199         * @throws SAXException if the DOM implementation throws an exception
200         */
201        @Override
202        public void endElement( String namespaceURI, String localName, String qName )
203            throws SAXException
204        {
205            addTextIfPresent();
206
207            try
208            {
209                if ( depth == 0 )
210                {
211                    getDigester().setCustomContentHandler( oldContentHandler );
212                    getDigester().push( root );
213                    getDigester().endElement( namespaceURI, localName, qName );
214                }
215
216                top = top.getParentNode();
217                depth--;
218            }
219            catch ( DOMException e )
220            {
221                throw new SAXException( e.getMessage() );
222            }
223        }
224
225        /**
226         * Adds a new {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to the current node.
227         * 
228         * @param target the processing instruction target
229         * @param data the processing instruction data, or null if none was supplied
230         * @throws SAXException if the DOM implementation throws an exception
231         */
232        @Override
233        public void processingInstruction( String target, String data )
234            throws SAXException
235        {
236            try
237            {
238                top.appendChild( doc.createProcessingInstruction( target, data ) );
239            }
240            catch ( DOMException e )
241            {
242                throw new SAXException( e.getMessage() );
243            }
244        }
245
246        /**
247         * Adds a new child {@link org.w3c.dom.Element Element} to the current node.
248         * 
249         * @param namespaceURI the namespace URI
250         * @param localName the local name
251         * @param qName the qualified (prefixed) name
252         * @param atts the list of attributes
253         * @throws SAXException if the DOM implementation throws an exception
254         */
255        @Override
256        public void startElement( String namespaceURI, String localName, String qName, Attributes atts )
257            throws SAXException
258        {
259            addTextIfPresent();
260
261            try
262            {
263                Node previousTop = top;
264                if ( ( localName == null ) || ( localName.length() == 0 ) )
265                {
266                    top = doc.createElement( qName );
267                }
268                else
269                {
270                    top = doc.createElementNS( namespaceURI, localName );
271                }
272                for ( int i = 0; i < atts.getLength(); i++ )
273                {
274                    Attr attr = null;
275                    if ( ( atts.getLocalName( i ) == null ) || ( atts.getLocalName( i ).length() == 0 ) )
276                    {
277                        attr = doc.createAttribute( atts.getQName( i ) );
278                        attr.setNodeValue( atts.getValue( i ) );
279                        ( (Element) top ).setAttributeNode( attr );
280                    }
281                    else
282                    {
283                        attr = doc.createAttributeNS( atts.getURI( i ), atts.getLocalName( i ) );
284                        attr.setNodeValue( atts.getValue( i ) );
285                        ( (Element) top ).setAttributeNodeNS( attr );
286                    }
287                }
288                previousTop.appendChild( top );
289                depth++;
290            }
291            catch ( DOMException e )
292            {
293                throw new SAXException( e.getMessage() );
294            }
295        }
296    }
297
298    // ----------------------------------------------------------- Constructors
299
300    /**
301     * Default constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}.
302     *
303     * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
304     *         configuration requested.
305     * @see DocumentBuilderFactory#newDocumentBuilder()
306     */
307    public NodeCreateRule()
308        throws ParserConfigurationException
309    {
310        this( Node.ELEMENT_NODE );
311    }
312
313    /**
314     * Constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}, but
315     * lets you specify the JAXP <code>DocumentBuilder</code> that should be used when constructing the node tree.
316     * 
317     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
318     */
319    public NodeCreateRule( DocumentBuilder documentBuilder )
320    {
321        this( Node.ELEMENT_NODE, documentBuilder );
322    }
323
324    /**
325     * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
326     * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
327     * <code>nodeType</code> parameter.
328     * 
329     * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
330     *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
331     * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
332     *         configuration requested.
333     * @see DocumentBuilderFactory#newDocumentBuilder()
334     */
335    public NodeCreateRule( int nodeType )
336        throws ParserConfigurationException
337    {
338        this( nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder() );
339    }
340
341    /**
342     * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
343     * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
344     * <code>nodeType</code> parameter. This constructor lets you specify the JAXP <code>DocumentBuilder</code> that
345     * should be used when constructing the node tree.
346     * 
347     * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
348     *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
349     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
350     */
351    public NodeCreateRule( int nodeType, DocumentBuilder documentBuilder )
352    {
353        if ( !( ( nodeType == Node.DOCUMENT_FRAGMENT_NODE ) || ( nodeType == Node.ELEMENT_NODE ) ) )
354        {
355            throw new IllegalArgumentException( "Can only create nodes of type DocumentFragment and Element" );
356        }
357        this.nodeType = nodeType;
358        this.documentBuilder = documentBuilder;
359    }
360
361    // ----------------------------------------------------- Instance Variables
362
363    /**
364     * The JAXP <code>DocumentBuilder</code> to use.
365     */
366    private DocumentBuilder documentBuilder = null;
367
368    /**
369     * The type of the node that should be created. Must be one of the constants defined in {@link org.w3c.dom.Node
370     * Node}, but currently only {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
371     * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} are allowed values.
372     */
373    private int nodeType = Node.ELEMENT_NODE;
374
375    // ----------------------------------------------------------- Rule Methods
376
377    /**
378     * When this method fires, the digester is told to forward all SAX ContentHandler events to the builder object,
379     * resulting in a DOM being built instead of normal digester rule-handling occurring. When the end of the current
380     * xml element is encountered, the original content handler is restored (expected to be NULL, allowing normal
381     * Digester operations to continue).
382     * 
383     * @param namespaceURI the namespace URI of the matching element, or an empty string if the parser is not namespace
384     *            aware or the element has no namespace
385     * @param name the local name if the parser is namespace aware, or just the element name otherwise
386     * @param attributes The attribute list of this element
387     * @throws Exception indicates a JAXP configuration problem
388     */
389    @Override
390    public void begin( String namespaceURI, String name, Attributes attributes )
391        throws Exception
392    {
393        Document doc = documentBuilder.newDocument();
394        NodeBuilder builder = null;
395        if ( nodeType == Node.ELEMENT_NODE )
396        {
397            Element element = null;
398            if ( getDigester().getNamespaceAware() )
399            {
400                element = doc.createElementNS( namespaceURI, name );
401                for ( int i = 0; i < attributes.getLength(); i++ )
402                {
403                    element.setAttributeNS( attributes.getURI( i ), attributes.getQName( i ),
404                                            attributes.getValue( i ) );
405                }
406            }
407            else
408            {
409                element = doc.createElement( name );
410                for ( int i = 0; i < attributes.getLength(); i++ )
411                {
412                    element.setAttribute( attributes.getQName( i ), attributes.getValue( i ) );
413                }
414            }
415            builder = new NodeBuilder( doc, element );
416        }
417        else
418        {
419            builder = new NodeBuilder( doc, doc.createDocumentFragment() );
420        }
421        // the NodeBuilder constructor has already saved the original
422        // value of the digester's custom content handler (expected to
423        // be null, but we save it just in case). So now we just
424        // need to tell the digester to forward events to the builder.
425        getDigester().setCustomContentHandler( builder );
426    }
427
428    /**
429     * {@inheritDoc}
430     */
431    @Override
432    public void end( String namespace, String name )
433        throws Exception
434    {
435        getDigester().pop();
436    }
437
438}