001    /* $Id: NodeCreateRule.java 992060 2010-09-02 19:09:47Z simonetripodi $
002     *
003     * Licensed to the Apache Software Foundation (ASF) under one or more
004     * contributor license agreements.  See the NOTICE file distributed with
005     * this work for additional information regarding copyright ownership.
006     * The ASF licenses this file to You under the Apache License, Version 2.0
007     * (the "License"); you may not use this file except in compliance with
008     * the License.  You may obtain a copy of the License at
009     *
010     *      http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    
020    package org.apache.commons.digester;
021    
022    
023    import javax.xml.parsers.DocumentBuilder;
024    import javax.xml.parsers.DocumentBuilderFactory;
025    import javax.xml.parsers.ParserConfigurationException;
026    
027    import org.w3c.dom.Attr;
028    import org.w3c.dom.DOMException;
029    import org.w3c.dom.Document;
030    import org.w3c.dom.Element;
031    import org.w3c.dom.Node;
032    import org.xml.sax.Attributes;
033    import org.xml.sax.ContentHandler;
034    import org.xml.sax.SAXException;
035    import org.xml.sax.helpers.DefaultHandler;
036    
037    
038    /**
039     * A rule implementation that creates a DOM
040     * {@link org.w3c.dom.Node Node} containing the XML at the element that matched
041     * the rule. Two concrete types of nodes can be created by this rule:
042     * <ul>
043     *   <li>the default is to create an {@link org.w3c.dom.Element Element} node.
044     *   The created element will correspond to the element that matched the rule,
045     *   containing all XML content underneath that element.</li>
046     *   <li>alternatively, this rule can create nodes of type
047     *   {@link org.w3c.dom.DocumentFragment DocumentFragment}, which will contain
048     *   only the XML content under the element the rule was trigged on.</li>
049     * </ul>
050     * The created node will be normalized, meaning it will not contain text nodes 
051     * that only contain white space characters.
052     * 
053    
054     * 
055     * <p>The created <code>Node</code> will be pushed on Digester's object stack
056     * when done. To use it in the context of another DOM
057     * {@link org.w3c.dom.Document Document}, it must be imported first, using the
058     * Document method
059     * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
060     * </p>
061     *
062     * <p><strong>Important Note:</strong> This is implemented by replacing the SAX
063     * {@link org.xml.sax.ContentHandler ContentHandler} in the parser used by 
064     * Digester, and resetting it when the matched element is closed. As a side 
065     * effect, rules that would match XML nodes under the element that matches 
066     * a <code>NodeCreateRule</code> will never be triggered by Digester, which 
067     * usually is the behavior one would expect.</p>
068     * 
069     * <p><strong>Note</strong> that the current implementation does not set the namespace prefixes
070     * in the exported nodes. The (usually more important) namespace URIs are set,
071     * of course.</p>
072     *
073     * @since Digester 1.4
074     */
075    
076    public class NodeCreateRule extends Rule {
077    
078    
079        // ---------------------------------------------------------- Inner Classes
080    
081    
082        /**
083         * The SAX content handler that does all the actual work of assembling the 
084         * DOM node tree from the SAX events.
085         */
086        private class NodeBuilder
087            extends DefaultHandler {
088    
089    
090            // ------------------------------------------------------- Constructors
091    
092    
093            /**
094             * Constructor.
095             * 
096             * <p>Stores the content handler currently used by Digester so it can 
097             * be reset when done, and initializes the DOM objects needed to 
098             * build the node.</p>
099             * 
100             * @param doc the document to use to create nodes
101             * @param root the root node
102             * @throws ParserConfigurationException if the DocumentBuilderFactory 
103             *   could not be instantiated
104             * @throws SAXException if the XMLReader could not be instantiated by 
105             *   Digester (should not happen)
106             */
107            public NodeBuilder(Document doc, Node root)
108                throws ParserConfigurationException, SAXException {
109    
110                this.doc = doc;
111                this.root = root;
112                this.top = root;
113                
114                oldContentHandler = digester.getCustomContentHandler();
115    
116            }
117    
118    
119            // ------------------------------------------------- Instance Variables
120    
121    
122            /**
123             * The content handler used by Digester before it was set to this 
124             * content handler.
125             */
126            protected ContentHandler oldContentHandler = null;
127    
128    
129            /**
130             * Depth of the current node, relative to the element where the content
131             * handler was put into action.
132             */
133            protected int depth = 0;
134    
135    
136            /**
137             * A DOM Document used to create the various Node instances.
138             */
139            protected Document doc = null;
140    
141    
142            /**
143             * The DOM node that will be pushed on Digester's stack.
144             */
145            protected Node root = null;
146    
147    
148            /**
149             * The current top DOM mode.
150             */
151            protected Node top = null;
152    
153            /**
154             * The text content of the current top DOM node.
155             */
156            protected StringBuffer topText = new StringBuffer();
157    
158    
159            // --------------------------------------------- Helper Methods
160    
161            /**
162             * Appends a {@link org.w3c.dom.Text Text} node to the current node
163             * if the content reported by the parser is not purely whitespace.
164             */
165            private void addTextIfPresent() throws SAXException {
166                if (topText.length() > 0) {
167                    String str = topText.toString();
168                    topText.setLength(0);
169    
170                    if (str.trim().length() > 0) {
171                        // The contained text is not *pure* whitespace, so create
172                        // a text node to hold it. Note that the "untrimmed" text
173                        // is stored in the node.
174                        try {
175                            top.appendChild(doc.createTextNode(str));
176                        } catch (DOMException e) {
177                            throw new SAXException(e.getMessage());
178                        }
179                    }
180                }
181            }
182    
183            // --------------------------------------------- ContentHandler Methods
184    
185    
186            /**
187             * Handle notification about text embedded within the current node.
188             * <p>
189             * An xml parser calls this when text is found. We need to ensure that this
190             * text gets attached to the new Node we are creating - except in the case
191             * where the only text in the node is whitespace.
192             * <p>
193             * There is a catch, however. According to the sax specification, a parser
194             * does not need to pass all of the text content of a node in one go; it can
195             * make multiple calls passing part of the data on each call. In particular,
196             * when the body of an element includes xml entity-references, at least some
197             * parsers make a separate call to this method to pass just the entity content.
198             * <p>
199             * In this method, we therefore just append the provided text to a
200             * "current text" buffer. When the element end is found, or a child element
201             * is found then we can check whether we have all-whitespace. See method
202             * addTextIfPresent. 
203             * 
204             * @param ch the characters from the XML document
205             * @param start the start position in the array
206             * @param length the number of characters to read from the array
207             * @throws SAXException if the DOM implementation throws an exception
208             */
209            @Override
210            public void characters(char[] ch, int start, int length)
211                throws SAXException {
212    
213                topText.append(ch, start, length);
214            }
215    
216    
217            /**
218             * Checks whether control needs to be returned to Digester.
219             * 
220             * @param namespaceURI the namespace URI
221             * @param localName the local name
222             * @param qName the qualified (prefixed) name
223             * @throws SAXException if the DOM implementation throws an exception
224             */
225            @Override
226            public void endElement(String namespaceURI, String localName,
227                                   String qName)
228                throws SAXException {
229                
230                addTextIfPresent();
231    
232                try {
233                    if (depth == 0) {
234                        getDigester().setCustomContentHandler(oldContentHandler);
235                        getDigester().push(root);
236                        getDigester().endElement(namespaceURI, localName, qName);
237                    }
238        
239                    top = top.getParentNode();
240                    depth--;
241                } catch (DOMException e) {
242                    throw new SAXException(e.getMessage());
243                }
244    
245            }
246    
247    
248            /**
249             * Adds a new
250             * {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to 
251             * the current node.
252             * 
253             * @param target the processing instruction target
254             * @param data the processing instruction data, or null if none was 
255             *   supplied
256             * @throws SAXException if the DOM implementation throws an exception
257             */
258            @Override
259            public void processingInstruction(String target, String data)
260                throws SAXException {
261                
262                try {
263                    top.appendChild(doc.createProcessingInstruction(target, data));
264                } catch (DOMException e) {
265                    throw new SAXException(e.getMessage());
266                }
267    
268            }
269    
270    
271            /**
272             * Adds a new child {@link org.w3c.dom.Element Element} to the current
273             * node.
274             * 
275             * @param namespaceURI the namespace URI
276             * @param localName the local name
277             * @param qName the qualified (prefixed) name
278             * @param atts the list of attributes
279             * @throws SAXException if the DOM implementation throws an exception
280             */
281            @Override
282            public void startElement(String namespaceURI, String localName,
283                                     String qName, Attributes atts)
284                throws SAXException {
285    
286                addTextIfPresent();
287    
288                try {
289                    Node previousTop = top;
290                    if ((localName == null) || (localName.length() == 0)) { 
291                        top = doc.createElement(qName);
292                    } else {
293                        top = doc.createElementNS(namespaceURI, localName);
294                    }
295                    for (int i = 0; i < atts.getLength(); i++) {
296                        Attr attr = null;
297                        if ((atts.getLocalName(i) == null) ||
298                            (atts.getLocalName(i).length() == 0)) {
299                            attr = doc.createAttribute(atts.getQName(i));
300                            attr.setNodeValue(atts.getValue(i));
301                            ((Element)top).setAttributeNode(attr);
302                        } else {
303                            attr = doc.createAttributeNS(atts.getURI(i),
304                                                         atts.getLocalName(i));
305                            attr.setNodeValue(atts.getValue(i));
306                            ((Element)top).setAttributeNodeNS(attr);
307                        }
308                    }
309                    previousTop.appendChild(top);
310                    depth++;
311                } catch (DOMException e) {
312                    throw new SAXException(e.getMessage());
313                }
314    
315            }
316    
317        }
318    
319    
320        // ----------------------------------------------------------- Constructors
321    
322    
323        /**
324         * Default constructor. Creates an instance of this rule that will create a
325         * DOM {@link org.w3c.dom.Element Element}.
326         */
327        public NodeCreateRule() throws ParserConfigurationException {
328    
329            this(Node.ELEMENT_NODE);
330    
331        }
332    
333    
334        /**
335         * Constructor. Creates an instance of this rule that will create a DOM
336         * {@link org.w3c.dom.Element Element}, but lets you specify the JAXP 
337         * <code>DocumentBuilder</code> that should be used when constructing the
338         * node tree.
339         * 
340         * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
341         */
342        public NodeCreateRule(DocumentBuilder documentBuilder) {
343    
344            this(Node.ELEMENT_NODE, documentBuilder);
345    
346        }
347    
348    
349        /**
350         * Constructor. Creates an instance of this rule that will create either a 
351         * DOM {@link org.w3c.dom.Element Element} or a DOM 
352         * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
353         * value of the <code>nodeType</code> parameter.
354         * 
355         * @param nodeType the type of node to create, which can be either
356         *   {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 
357         *   {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
358         * @throws IllegalArgumentException if the node type is not supported
359         */
360        public NodeCreateRule(int nodeType) throws ParserConfigurationException {
361    
362            this(nodeType,
363                 DocumentBuilderFactory.newInstance().newDocumentBuilder());
364    
365        }
366    
367    
368        /**
369         * Constructor. Creates an instance of this rule that will create either a 
370         * DOM {@link org.w3c.dom.Element Element} or a DOM 
371         * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
372         * value of the <code>nodeType</code> parameter. This constructor lets you
373         * specify the JAXP <code>DocumentBuilder</code> that should be used when
374         * constructing the node tree.
375         * 
376         * @param nodeType the type of node to create, which can be either
377         *   {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 
378         *   {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
379         * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
380         * @throws IllegalArgumentException if the node type is not supported
381         */
382        public NodeCreateRule(int nodeType, DocumentBuilder documentBuilder) {
383    
384            if (!((nodeType == Node.DOCUMENT_FRAGMENT_NODE) ||
385                  (nodeType == Node.ELEMENT_NODE))) {
386                throw new IllegalArgumentException(
387                    "Can only create nodes of type DocumentFragment and Element");
388            }
389            this.nodeType = nodeType;
390            this.documentBuilder = documentBuilder;
391    
392        }
393    
394    
395        // ----------------------------------------------------- Instance Variables
396    
397    
398        /**
399         * The JAXP <code>DocumentBuilder</code> to use.
400         */
401        private DocumentBuilder documentBuilder = null;
402    
403    
404        /**
405         * The type of the node that should be created. Must be one of the
406         * constants defined in {@link org.w3c.dom.Node Node}, but currently only
407         * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and 
408         * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
409         * are allowed values.
410         */
411        private int nodeType = Node.ELEMENT_NODE;
412    
413    
414        // ----------------------------------------------------------- Rule Methods
415    
416    
417        /**
418         * When this method fires, the digester is told to forward all SAX
419         * ContentHandler events to the builder object, resulting in a DOM being
420         * built instead of normal digester rule-handling occurring. When the
421         * end of the current xml element is encountered, the original content 
422         * handler is restored (expected to be NULL, allowing normal Digester
423         * operations to continue).
424         * 
425         * @param namespaceURI the namespace URI of the matching element, or an 
426         *   empty string if the parser is not namespace aware or the element has
427         *   no namespace
428         * @param name the local name if the parser is namespace aware, or just 
429         *   the element name otherwise
430         * @param attributes The attribute list of this element
431         * @throws Exception indicates a JAXP configuration problem
432         */
433        @Override
434        public void begin(String namespaceURI, String name, Attributes attributes)
435            throws Exception {
436    
437            Document doc = documentBuilder.newDocument();
438            NodeBuilder builder = null;
439            if (nodeType == Node.ELEMENT_NODE) {
440                Element element = null;
441                if (getDigester().getNamespaceAware()) {
442                    element =
443                        doc.createElementNS(namespaceURI, name);
444                    for (int i = 0; i < attributes.getLength(); i++) {
445                        element.setAttributeNS(attributes.getURI(i),
446                                               attributes.getQName(i),
447                                               attributes.getValue(i));
448                    }
449                } else {
450                    element = doc.createElement(name);
451                    for (int i = 0; i < attributes.getLength(); i++) {
452                        element.setAttribute(attributes.getQName(i),
453                                             attributes.getValue(i));
454                    }
455                }
456                builder = new NodeBuilder(doc, element);
457            } else {
458                builder = new NodeBuilder(doc, doc.createDocumentFragment());
459            }
460            // the NodeBuilder constructor has already saved the original
461            // value of the digester's custom content handler (expected to
462            // be null, but we save it just in case). So now we just
463            // need to tell the digester to forward events to the builder.
464            getDigester().setCustomContentHandler(builder);
465        }
466    
467    
468        /**
469         * Pop the Node off the top of the stack.
470         */
471        @Override
472        public void end() throws Exception {
473    
474            digester.pop();
475    
476        }
477    
478    
479    }