1 package org.apache.commons.digester3;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import javax.xml.parsers.DocumentBuilder;
23 import javax.xml.parsers.DocumentBuilderFactory;
24 import javax.xml.parsers.ParserConfigurationException;
25
26 import org.w3c.dom.Attr;
27 import org.w3c.dom.DOMException;
28 import org.w3c.dom.Document;
29 import org.w3c.dom.Element;
30 import org.w3c.dom.Node;
31 import org.xml.sax.Attributes;
32 import org.xml.sax.ContentHandler;
33 import org.xml.sax.SAXException;
34 import org.xml.sax.helpers.DefaultHandler;
35
36 /**
37 * A rule implementation that creates a DOM {@link org.w3c.dom.Node Node} containing the XML at the element that matched
38 * the rule. Two concrete types of nodes can be created by this rule:
39 * <ul>
40 * <li>the default is to create an {@link org.w3c.dom.Element Element} node. The created element will correspond to the
41 * element that matched the rule, containing all XML content underneath that element.</li>
42 * <li>alternatively, this rule can create nodes of type {@link org.w3c.dom.DocumentFragment DocumentFragment}, which
43 * will contain only the XML content under the element the rule was trigged on.</li>
44 * </ul>
45 * The created node will be normalized, meaning it will not contain text nodes that only contain white space characters.
46 * <p>
47 * The created <code>Node</code> will be pushed on Digester's object stack when done. To use it in the context of
48 * another DOM {@link org.w3c.dom.Document Document}, it must be imported first, using the Document method
49 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
50 * </p>
51 * <p>
52 * <strong>Important Note:</strong> This is implemented by replacing the SAX {@link org.xml.sax.ContentHandler
53 * ContentHandler} in the parser used by Digester, and resetting it when the matched element is closed. As a side
54 * effect, rules that would match XML nodes under the element that matches a <code>NodeCreateRule</code> will never be
55 * triggered by Digester, which usually is the behavior one would expect.
56 * </p>
57 * <p>
58 * <strong>Note</strong> that the current implementation does not set the namespace prefixes in the exported nodes. The
59 * (usually more important) namespace URIs are set, of course.
60 * </p>
61 *
62 * @since Digester 1.4
63 */
64 public class NodeCreateRule
65 extends Rule
66 {
67
68 // ---------------------------------------------------------- Inner Classes
69
70 /**
71 * The SAX content handler that does all the actual work of assembling the DOM node tree from the SAX events.
72 */
73 private class NodeBuilder
74 extends DefaultHandler
75 {
76
77 // ------------------------------------------------------- Constructors
78
79 /**
80 * Constructor.
81 * <p>
82 * Stores the content handler currently used by Digester so it can be reset when done, and initializes the DOM
83 * objects needed to build the node.
84 * </p>
85 *
86 * @param doc the document to use to create nodes
87 * @param root the root node
88 * @throws ParserConfigurationException if the DocumentBuilderFactory could not be instantiated
89 * @throws SAXException if the XMLReader could not be instantiated by Digester (should not happen)
90 */
91 public NodeBuilder( Document doc, Node root )
92 throws ParserConfigurationException, SAXException
93 {
94 this.doc = doc;
95 this.root = root;
96 this.top = root;
97
98 oldContentHandler = getDigester().getCustomContentHandler();
99 }
100
101 // ------------------------------------------------- Instance Variables
102
103 /**
104 * The content handler used by Digester before it was set to this content handler.
105 */
106 protected ContentHandler oldContentHandler = null;
107
108 /**
109 * Depth of the current node, relative to the element where the content handler was put into action.
110 */
111 protected int depth = 0;
112
113 /**
114 * A DOM Document used to create the various Node instances.
115 */
116 protected Document doc = null;
117
118 /**
119 * The DOM node that will be pushed on Digester's stack.
120 */
121 protected Node root = null;
122
123 /**
124 * The current top DOM mode.
125 */
126 protected Node top = null;
127
128 /**
129 * The text content of the current top DOM node.
130 */
131 protected StringBuilder topText = new StringBuilder();
132
133 // --------------------------------------------- Helper Methods
134
135 /**
136 * Appends a {@link org.w3c.dom.Text Text} node to the current node if the content reported by the parser is not
137 * purely whitespace.
138 */
139 private void addTextIfPresent()
140 throws SAXException
141 {
142 if ( topText.length() > 0 )
143 {
144 String str = topText.toString();
145 topText.setLength( 0 );
146
147 if ( str.trim().length() > 0 )
148 {
149 // The contained text is not *pure* whitespace, so create
150 // a text node to hold it. Note that the "untrimmed" text
151 // is stored in the node.
152 try
153 {
154 top.appendChild( doc.createTextNode( str ) );
155 }
156 catch ( DOMException e )
157 {
158 throw new SAXException( e.getMessage() );
159 }
160 }
161 }
162 }
163
164 // --------------------------------------------- ContentHandler Methods
165
166 /**
167 * Handle notification about text embedded within the current node.
168 * <p>
169 * An xml parser calls this when text is found. We need to ensure that this text gets attached to the new Node
170 * we are creating - except in the case where the only text in the node is whitespace.
171 * <p>
172 * There is a catch, however. According to the sax specification, a parser does not need to pass all of the text
173 * content of a node in one go; it can make multiple calls passing part of the data on each call. In particular,
174 * when the body of an element includes xml entity-references, at least some parsers make a separate call to
175 * this method to pass just the entity content.
176 * <p>
177 * In this method, we therefore just append the provided text to a "current text" buffer. When the element end
178 * is found, or a child element is found then we can check whether we have all-whitespace. See method
179 * addTextIfPresent.
180 *
181 * @param ch the characters from the XML document
182 * @param start the start position in the array
183 * @param length the number of characters to read from the array
184 * @throws SAXException if the DOM implementation throws an exception
185 */
186 @Override
187 public void characters( char[] ch, int start, int length )
188 throws SAXException
189 {
190 topText.append( ch, start, length );
191 }
192
193 /**
194 * Checks whether control needs to be returned to Digester.
195 *
196 * @param namespaceURI the namespace URI
197 * @param localName the local name
198 * @param qName the qualified (prefixed) name
199 * @throws SAXException if the DOM implementation throws an exception
200 */
201 @Override
202 public void endElement( String namespaceURI, String localName, String qName )
203 throws SAXException
204 {
205 addTextIfPresent();
206
207 try
208 {
209 if ( depth == 0 )
210 {
211 getDigester().setCustomContentHandler( oldContentHandler );
212 getDigester().push( root );
213 getDigester().endElement( namespaceURI, localName, qName );
214 }
215
216 top = top.getParentNode();
217 depth--;
218 }
219 catch ( DOMException e )
220 {
221 throw new SAXException( e.getMessage() );
222 }
223 }
224
225 /**
226 * Adds a new {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to the current node.
227 *
228 * @param target the processing instruction target
229 * @param data the processing instruction data, or null if none was supplied
230 * @throws SAXException if the DOM implementation throws an exception
231 */
232 @Override
233 public void processingInstruction( String target, String data )
234 throws SAXException
235 {
236 try
237 {
238 top.appendChild( doc.createProcessingInstruction( target, data ) );
239 }
240 catch ( DOMException e )
241 {
242 throw new SAXException( e.getMessage() );
243 }
244 }
245
246 /**
247 * Adds a new child {@link org.w3c.dom.Element Element} to the current node.
248 *
249 * @param namespaceURI the namespace URI
250 * @param localName the local name
251 * @param qName the qualified (prefixed) name
252 * @param atts the list of attributes
253 * @throws SAXException if the DOM implementation throws an exception
254 */
255 @Override
256 public void startElement( String namespaceURI, String localName, String qName, Attributes atts )
257 throws SAXException
258 {
259 addTextIfPresent();
260
261 try
262 {
263 Node previousTop = top;
264 if ( ( localName == null ) || ( localName.length() == 0 ) )
265 {
266 top = doc.createElement( qName );
267 }
268 else
269 {
270 top = doc.createElementNS( namespaceURI, localName );
271 }
272 for ( int i = 0; i < atts.getLength(); i++ )
273 {
274 Attr attr = null;
275 if ( ( atts.getLocalName( i ) == null ) || ( atts.getLocalName( i ).length() == 0 ) )
276 {
277 attr = doc.createAttribute( atts.getQName( i ) );
278 attr.setNodeValue( atts.getValue( i ) );
279 ( (Element) top ).setAttributeNode( attr );
280 }
281 else
282 {
283 attr = doc.createAttributeNS( atts.getURI( i ), atts.getLocalName( i ) );
284 attr.setNodeValue( atts.getValue( i ) );
285 ( (Element) top ).setAttributeNodeNS( attr );
286 }
287 }
288 previousTop.appendChild( top );
289 depth++;
290 }
291 catch ( DOMException e )
292 {
293 throw new SAXException( e.getMessage() );
294 }
295 }
296 }
297
298 // ----------------------------------------------------------- Constructors
299
300 /**
301 * Default constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}.
302 *
303 * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
304 * configuration requested.
305 * @see DocumentBuilderFactory#newDocumentBuilder()
306 */
307 public NodeCreateRule()
308 throws ParserConfigurationException
309 {
310 this( Node.ELEMENT_NODE );
311 }
312
313 /**
314 * Constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}, but
315 * lets you specify the JAXP <code>DocumentBuilder</code> that should be used when constructing the node tree.
316 *
317 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
318 */
319 public NodeCreateRule( DocumentBuilder documentBuilder )
320 {
321 this( Node.ELEMENT_NODE, documentBuilder );
322 }
323
324 /**
325 * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
326 * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
327 * <code>nodeType</code> parameter.
328 *
329 * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
330 * Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
331 * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the
332 * configuration requested.
333 * @see DocumentBuilderFactory#newDocumentBuilder()
334 */
335 public NodeCreateRule( int nodeType )
336 throws ParserConfigurationException
337 {
338 this( nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder() );
339 }
340
341 /**
342 * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element}
343 * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the
344 * <code>nodeType</code> parameter. This constructor lets you specify the JAXP <code>DocumentBuilder</code> that
345 * should be used when constructing the node tree.
346 *
347 * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE
348 * Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
349 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
350 */
351 public NodeCreateRule( int nodeType, DocumentBuilder documentBuilder )
352 {
353 if ( !( ( nodeType == Node.DOCUMENT_FRAGMENT_NODE ) || ( nodeType == Node.ELEMENT_NODE ) ) )
354 {
355 throw new IllegalArgumentException( "Can only create nodes of type DocumentFragment and Element" );
356 }
357 this.nodeType = nodeType;
358 this.documentBuilder = documentBuilder;
359 }
360
361 // ----------------------------------------------------- Instance Variables
362
363 /**
364 * The JAXP <code>DocumentBuilder</code> to use.
365 */
366 private DocumentBuilder documentBuilder = null;
367
368 /**
369 * The type of the node that should be created. Must be one of the constants defined in {@link org.w3c.dom.Node
370 * Node}, but currently only {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
371 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} are allowed values.
372 */
373 private int nodeType = Node.ELEMENT_NODE;
374
375 // ----------------------------------------------------------- Rule Methods
376
377 /**
378 * When this method fires, the digester is told to forward all SAX ContentHandler events to the builder object,
379 * resulting in a DOM being built instead of normal digester rule-handling occurring. When the end of the current
380 * xml element is encountered, the original content handler is restored (expected to be NULL, allowing normal
381 * Digester operations to continue).
382 *
383 * @param namespaceURI the namespace URI of the matching element, or an empty string if the parser is not namespace
384 * aware or the element has no namespace
385 * @param name the local name if the parser is namespace aware, or just the element name otherwise
386 * @param attributes The attribute list of this element
387 * @throws Exception indicates a JAXP configuration problem
388 */
389 @Override
390 public void begin( String namespaceURI, String name, Attributes attributes )
391 throws Exception
392 {
393 Document doc = documentBuilder.newDocument();
394 NodeBuilder builder = null;
395 if ( nodeType == Node.ELEMENT_NODE )
396 {
397 Element element = null;
398 if ( getDigester().getNamespaceAware() )
399 {
400 element = doc.createElementNS( namespaceURI, name );
401 for ( int i = 0; i < attributes.getLength(); i++ )
402 {
403 element.setAttributeNS( attributes.getURI( i ), attributes.getQName( i ),
404 attributes.getValue( i ) );
405 }
406 }
407 else
408 {
409 element = doc.createElement( name );
410 for ( int i = 0; i < attributes.getLength(); i++ )
411 {
412 element.setAttribute( attributes.getQName( i ), attributes.getValue( i ) );
413 }
414 }
415 builder = new NodeBuilder( doc, element );
416 }
417 else
418 {
419 builder = new NodeBuilder( doc, doc.createDocumentFragment() );
420 }
421 // the NodeBuilder constructor has already saved the original
422 // value of the digester's custom content handler (expected to
423 // be null, but we save it just in case). So now we just
424 // need to tell the digester to forward events to the builder.
425 getDigester().setCustomContentHandler( builder );
426 }
427
428 /**
429 * {@inheritDoc}
430 */
431 @Override
432 public void end( String namespace, String name )
433 throws Exception
434 {
435 getDigester().pop();
436 }
437
438 }