NodeCreateRule

package org.apache.commons.digester3;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * A rule implementation that creates a DOM {@link org.w3c.dom.Node Node} containing the XML at the element that matched
 * the rule. Two concrete types of nodes can be created by this rule:
 * <ul>
 * <li>the default is to create an {@link org.w3c.dom.Element Element} node. The created element will correspond to the
 * element that matched the rule, containing all XML content underneath that element.</li>
 * <li>alternatively, this rule can create nodes of type {@link org.w3c.dom.DocumentFragment DocumentFragment}, which
 * will contain only the XML content under the element the rule was trigged on.</li>
 * </ul>
 * The created node will be normalized, meaning it will not contain text nodes that only contain white space characters.
 * <p>
 * The created <code>Node</code> will be pushed on Digester's object stack when done. To use it in the context of
 * another DOM {@link org.w3c.dom.Document Document}, it must be imported first, using the Document method
 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
 * </p>
 * <p>
 * <strong>Important Note:</strong> This is implemented by replacing the SAX {@link org.xml.sax.ContentHandler
 * ContentHandler} in the parser used by Digester, and resetting it when the matched element is closed. As a side
 * effect, rules that would match XML nodes under the element that matches a <code>NodeCreateRule</code> will never be
 * triggered by Digester, which usually is the behavior one would expect.
 * </p>
 * <p>
 * <strong>Note</strong> that the current implementation does not set the namespace prefixes in the exported nodes. The
 * (usually more important) namespace URIs are set, of course.
 * </p>
 *
 * @since Digester 1.4
 */
public class NodeCreateRule
    extends Rule
{

    // ---------------------------------------------------------- Inner Classes

    /**
     * The SAX content handler that does all the actual work of assembling the DOM node tree from the SAX events.
     */
    private class NodeBuilder
        extends DefaultHandler
    {

        // ------------------------------------------------------- Constructors

        /**
         * Constructor.
         * <p>
         * Stores the content handler currently used by Digester so it can be reset when done, and initializes the DOM
         * objects needed to build the node.
         * </p>
         *
         * @param doc the document to use to create nodes
         * @param root the root node
         * @throws ParserConfigurationException if the DocumentBuilderFactory could not be instantiated
         * @throws SAXException if the XMLReader could not be instantiated by Digester (should not happen)
         */
        public NodeBuilder( Document doc, Node root )
            throws ParserConfigurationException, SAXException
        {
            this.doc = doc;
            this.root = root;
            this.top = root;

            oldContentHandler = getDigester().getCustomContentHandler();
        }

        // ------------------------------------------------- Instance Variables

        /**
         * The content handler used by Digester before it was set to this content handler.
         */
        protected ContentHandler oldContentHandler = null;

        /**
         * Depth of the current node, relative to the element where the content handler was put into action.
         */
        protected int depth = 0;

        /**
         * A DOM Document used to create the various Node instances.
         */
        protected Document doc = null;

        /**
         * The DOM node that will be pushed on Digester's stack.
         */
        protected Node root = null;

        /**
         * The current top DOM mode.
         */
        protected Node top = null;

        /**
         * The text content of the current top DOM node.
         */
        protected StringBuilder topText = new StringBuilder();

        // --------------------------------------------- Helper Methods

        /**
         * Appends a {@link org.w3c.dom.Text Text} node to the current node if the content reported by the parser is not
         * purely whitespace.
         */
        private void addTextIfPresent()
            throws SAXException
        {
            if ( topText.length() > 0 )
            {
                String str = topText.toString();
                topText.setLength( 0 );

                if ( str.trim().length() > 0 )
                {
                    // The contained text is not *pure* whitespace, so create
                    // a text node to hold it. Note that the "untrimmed" text
                    // is stored in the node.
                    try
                    {
                        top.appendChild( doc.createTextNode( str ) );
                    }
                    catch ( DOMException e )
                    {
                        throw new SAXException( e.getMessage() );
                    }
                }
            }
        }

        // --------------------------------------------- ContentHandler Methods

        /**
         * Handle notification about text embedded within the current node.
         * <p>
         * An xml parser calls this when text is found. We need to ensure that this text gets attached to the new Node
         * we are creating - except in the case where the only text in the node is whitespace.
         * <p>
         * There is a catch, however. According to the sax specification, a parser does not need to pass all of the text
         * content of a node in one go; it can make multiple calls passing part of the data on each call. In particular,
         * when the body of an element includes xml entity-references, at least some parsers make a separate call to
         * this method to pass just the entity content.
         * <p>
         * In this method, we therefore just append the provided text to a "current text" buffer. When the element end
         * is found, or a child element is found then we can check whether we have all-whitespace. See method
         * addTextIfPresent.
         *
         * @param ch the characters from the XML document
         * @param start the start position in the array
         * @param length the number of characters to read from the array
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void characters( char[] ch, int start, int length )
            throws SAXException
        {
            topText.append( ch, start, length );
        }

        /**
         * Checks whether control needs to be returned to Digester.
         *
         * @param namespaceURI the namespace URI
         * @param localName the local name
         * @param qName the qualified (prefixed) name
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void endElement( String namespaceURI, String localName, String qName )
            throws SAXException
        {
            addTextIfPresent();

            try
            {
                if ( depth == 0 )
                {
                    getDigester().setCustomContentHandler( oldContentHandler );
                    getDigester().push( root );
                    getDigester().endElement( namespaceURI, localName, qName );
                }

                top = top.getParentNode();
                depth--;
            }
            catch ( DOMException e )
            {
                throw new SAXException( e.getMessage() );
            }
        }

        /**
         * Adds a new {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to the current node.
         *
         * @param target the processing instruction target
         * @param data the processing instruction data, or null if none was supplied
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void processingInstruction( String target, String data )
            throws SAXException
        {
            try
            {
                top.appendChild( doc.createProcessingInstruction( target, data ) );
            }
            catch ( DOMException e )
            {
                throw new SAXException( e.getMessage() );
            }
        }

        /**
         * Adds a new child {@link org.w3c.dom.Element Element} to the current node.
         *
         * @param namespaceURI the namespace URI
         * @param localName the local name
         * @param qName the qualified (prefixed) name
         * @param atts the list of attributes
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void startElement( String namespaceURI, String localName, String qName, Attributes atts )
            throws SAXException
        {
            addTextIfPresent();

            try
            {
                Node previousTop = top;
                if ( ( localName == null ) || ( localName.length() == 0 ) )
                {
                    top = doc.createElement( qName );
                }
                else
                {
                    top = doc.createElementNS( namespaceURI, localName );
                }
                for ( int i = 0; i < atts.getLength(); i++ )
                {
                    Attr attr = null;
                    if ( ( atts.getLocalName( i ) == null ) || ( atts.getLocalName( i ).length() == 0 ) )
                    {
                        attr = doc.createAttribute( atts.getQName( i ) );
                        attr.setNodeValue( atts.getValue( i ) );
                        ( (Element) top ).setAttributeNode( attr );
                    }
                    else
                    {
                        attr = doc.createAttributeNS( atts.getURI( i ), atts.getLocalName( i ) );
                        attr.setNodeValue( atts.getValue( i ) );
                        ( (Element) top ).setAttributeNodeNS( attr );
                    }
                }
                previousTop.appendChild( top );
                depth++;
            }
            catch ( DOMException e )
            {
                throw new SAXException( e.getMessage() );
            }
        }
    }

    // ----------------------------------------------------------- Constructors

    /**
     * Default constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}.
     *
     * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
     *         configuration requested.
     * @see DocumentBuilderFactory#newDocumentBuilder()
     */
    public NodeCreateRule()
        throws ParserConfigurationException
    {
        this( Node.ELEMENT_NODE );
    }

    /**
     * Constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}, but
     * lets you specify the JAXP <code>DocumentBuilder</code> that should be used when constructing the node tree.
     *
     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
     */
    public NodeCreateRule( DocumentBuilder documentBuilder )
    {
        this( Node.ELEMENT_NODE, documentBuilder );
    }

    /**
     * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
     * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
     * <code>nodeType</code> parameter.
     *
     * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
     *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
     * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
     *         configuration requested.
     * @see DocumentBuilderFactory#newDocumentBuilder()
     */
    public NodeCreateRule( int nodeType )
        throws ParserConfigurationException
    {
        this( nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder() );
    }

    /**
     * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
     * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
     * <code>nodeType</code> parameter. This constructor lets you specify the JAXP <code>DocumentBuilder</code> that
     * should be used when constructing the node tree.
     *
     * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
     *            Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
     */
    public NodeCreateRule( int nodeType, DocumentBuilder documentBuilder )
    {
        if ( !( ( nodeType == Node.DOCUMENT_FRAGMENT_NODE ) || ( nodeType == Node.ELEMENT_NODE ) ) )
        {
            throw new IllegalArgumentException( "Can only create nodes of type DocumentFragment and Element" );
        }
        this.nodeType = nodeType;
        this.documentBuilder = documentBuilder;
    }

    // ----------------------------------------------------- Instance Variables

    /**
     * The JAXP <code>DocumentBuilder</code> to use.
     */
    private DocumentBuilder documentBuilder = null;

    /**
     * The type of the node that should be created. Must be one of the constants defined in {@link org.w3c.dom.Node
     * Node}, but currently only {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
     * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} are allowed values.
     */
    private int nodeType = Node.ELEMENT_NODE;

    // ----------------------------------------------------------- Rule Methods

    /**
     * When this method fires, the digester is told to forward all SAX ContentHandler events to the builder object,
     * resulting in a DOM being built instead of normal digester rule-handling occurring. When the end of the current
     * xml element is encountered, the original content handler is restored (expected to be NULL, allowing normal
     * Digester operations to continue).
     *
     * @param namespaceURI the namespace URI of the matching element, or an empty string if the parser is not namespace
     *            aware or the element has no namespace
     * @param name the local name if the parser is namespace aware, or just the element name otherwise
     * @param attributes The attribute list of this element
     * @throws Exception indicates a JAXP configuration problem
     */
    @Override
    public void begin( String namespaceURI, String name, Attributes attributes )
        throws Exception
    {
        Document doc = documentBuilder.newDocument();
        NodeBuilder builder = null;
        if ( nodeType == Node.ELEMENT_NODE )
        {
            Element element = null;
            if ( getDigester().getNamespaceAware() )
            {
                element = doc.createElementNS( namespaceURI, name );
                for ( int i = 0; i < attributes.getLength(); i++ )
                {
                    element.setAttributeNS( attributes.getURI( i ), attributes.getQName( i ),
                                            attributes.getValue( i ) );
                }
            }
            else
            {
                element = doc.createElement( name );
                for ( int i = 0; i < attributes.getLength(); i++ )
                {
                    element.setAttribute( attributes.getQName( i ), attributes.getValue( i ) );
                }
            }
            builder = new NodeBuilder( doc, element );
        }
        else
        {
            builder = new NodeBuilder( doc, doc.createDocumentFragment() );
        }
        // the NodeBuilder constructor has already saved the original
        // value of the digester's custom content handler (expected to
        // be null, but we save it just in case). So now we just
        // need to tell the digester to forward events to the builder.
        getDigester().setCustomContentHandler( builder );
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void end( String namespace, String name )
        throws Exception
    {
        getDigester().pop();
    }

}