001package org.apache.commons.digester3; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import javax.xml.parsers.DocumentBuilder; 023import javax.xml.parsers.DocumentBuilderFactory; 024import javax.xml.parsers.ParserConfigurationException; 025 026import org.w3c.dom.Attr; 027import org.w3c.dom.DOMException; 028import org.w3c.dom.Document; 029import org.w3c.dom.Element; 030import org.w3c.dom.Node; 031import org.xml.sax.Attributes; 032import org.xml.sax.ContentHandler; 033import org.xml.sax.SAXException; 034import org.xml.sax.helpers.DefaultHandler; 035 036/** 037 * A rule implementation that creates a DOM {@link org.w3c.dom.Node Node} containing the XML at the element that matched 038 * the rule. Two concrete types of nodes can be created by this rule: 039 * <ul> 040 * <li>the default is to create an {@link org.w3c.dom.Element Element} node. The created element will correspond to the 041 * element that matched the rule, containing all XML content underneath that element.</li> 042 * <li>alternatively, this rule can create nodes of type {@link org.w3c.dom.DocumentFragment DocumentFragment}, which 043 * will contain only the XML content under the element the rule was trigged on.</li> 044 * </ul> 045 * The created node will be normalized, meaning it will not contain text nodes that only contain white space characters. 046 * <p> 047 * The created <code>Node</code> will be pushed on Digester's object stack when done. To use it in the context of 048 * another DOM {@link org.w3c.dom.Document Document}, it must be imported first, using the Document method 049 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}. 050 * </p> 051 * <p> 052 * <strong>Important Note:</strong> This is implemented by replacing the SAX {@link org.xml.sax.ContentHandler 053 * ContentHandler} in the parser used by Digester, and resetting it when the matched element is closed. As a side 054 * effect, rules that would match XML nodes under the element that matches a <code>NodeCreateRule</code> will never be 055 * triggered by Digester, which usually is the behavior one would expect. 056 * </p> 057 * <p> 058 * <strong>Note</strong> that the current implementation does not set the namespace prefixes in the exported nodes. The 059 * (usually more important) namespace URIs are set, of course. 060 * </p> 061 * 062 * @since Digester 1.4 063 */ 064public class NodeCreateRule 065 extends Rule 066{ 067 068 // ---------------------------------------------------------- Inner Classes 069 070 /** 071 * The SAX content handler that does all the actual work of assembling the DOM node tree from the SAX events. 072 */ 073 private class NodeBuilder 074 extends DefaultHandler 075 { 076 077 // ------------------------------------------------------- Constructors 078 079 /** 080 * Constructor. 081 * <p> 082 * Stores the content handler currently used by Digester so it can be reset when done, and initializes the DOM 083 * objects needed to build the node. 084 * </p> 085 * 086 * @param doc the document to use to create nodes 087 * @param root the root node 088 * @throws ParserConfigurationException if the DocumentBuilderFactory could not be instantiated 089 * @throws SAXException if the XMLReader could not be instantiated by Digester (should not happen) 090 */ 091 public NodeBuilder( Document doc, Node root ) 092 throws ParserConfigurationException, SAXException 093 { 094 this.doc = doc; 095 this.root = root; 096 this.top = root; 097 098 oldContentHandler = getDigester().getCustomContentHandler(); 099 } 100 101 // ------------------------------------------------- Instance Variables 102 103 /** 104 * The content handler used by Digester before it was set to this content handler. 105 */ 106 protected ContentHandler oldContentHandler = null; 107 108 /** 109 * Depth of the current node, relative to the element where the content handler was put into action. 110 */ 111 protected int depth = 0; 112 113 /** 114 * A DOM Document used to create the various Node instances. 115 */ 116 protected Document doc = null; 117 118 /** 119 * The DOM node that will be pushed on Digester's stack. 120 */ 121 protected Node root = null; 122 123 /** 124 * The current top DOM mode. 125 */ 126 protected Node top = null; 127 128 /** 129 * The text content of the current top DOM node. 130 */ 131 protected StringBuilder topText = new StringBuilder(); 132 133 // --------------------------------------------- Helper Methods 134 135 /** 136 * Appends a {@link org.w3c.dom.Text Text} node to the current node if the content reported by the parser is not 137 * purely whitespace. 138 */ 139 private void addTextIfPresent() 140 throws SAXException 141 { 142 if ( topText.length() > 0 ) 143 { 144 String str = topText.toString(); 145 topText.setLength( 0 ); 146 147 if ( str.trim().length() > 0 ) 148 { 149 // The contained text is not *pure* whitespace, so create 150 // a text node to hold it. Note that the "untrimmed" text 151 // is stored in the node. 152 try 153 { 154 top.appendChild( doc.createTextNode( str ) ); 155 } 156 catch ( DOMException e ) 157 { 158 throw new SAXException( e.getMessage() ); 159 } 160 } 161 } 162 } 163 164 // --------------------------------------------- ContentHandler Methods 165 166 /** 167 * Handle notification about text embedded within the current node. 168 * <p> 169 * An xml parser calls this when text is found. We need to ensure that this text gets attached to the new Node 170 * we are creating - except in the case where the only text in the node is whitespace. 171 * <p> 172 * There is a catch, however. According to the sax specification, a parser does not need to pass all of the text 173 * content of a node in one go; it can make multiple calls passing part of the data on each call. In particular, 174 * when the body of an element includes xml entity-references, at least some parsers make a separate call to 175 * this method to pass just the entity content. 176 * <p> 177 * In this method, we therefore just append the provided text to a "current text" buffer. When the element end 178 * is found, or a child element is found then we can check whether we have all-whitespace. See method 179 * addTextIfPresent. 180 * 181 * @param ch the characters from the XML document 182 * @param start the start position in the array 183 * @param length the number of characters to read from the array 184 * @throws SAXException if the DOM implementation throws an exception 185 */ 186 @Override 187 public void characters( char[] ch, int start, int length ) 188 throws SAXException 189 { 190 topText.append( ch, start, length ); 191 } 192 193 /** 194 * Checks whether control needs to be returned to Digester. 195 * 196 * @param namespaceURI the namespace URI 197 * @param localName the local name 198 * @param qName the qualified (prefixed) name 199 * @throws SAXException if the DOM implementation throws an exception 200 */ 201 @Override 202 public void endElement( String namespaceURI, String localName, String qName ) 203 throws SAXException 204 { 205 addTextIfPresent(); 206 207 try 208 { 209 if ( depth == 0 ) 210 { 211 getDigester().setCustomContentHandler( oldContentHandler ); 212 getDigester().push( root ); 213 getDigester().endElement( namespaceURI, localName, qName ); 214 } 215 216 top = top.getParentNode(); 217 depth--; 218 } 219 catch ( DOMException e ) 220 { 221 throw new SAXException( e.getMessage() ); 222 } 223 } 224 225 /** 226 * Adds a new {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to the current node. 227 * 228 * @param target the processing instruction target 229 * @param data the processing instruction data, or null if none was supplied 230 * @throws SAXException if the DOM implementation throws an exception 231 */ 232 @Override 233 public void processingInstruction( String target, String data ) 234 throws SAXException 235 { 236 try 237 { 238 top.appendChild( doc.createProcessingInstruction( target, data ) ); 239 } 240 catch ( DOMException e ) 241 { 242 throw new SAXException( e.getMessage() ); 243 } 244 } 245 246 /** 247 * Adds a new child {@link org.w3c.dom.Element Element} to the current node. 248 * 249 * @param namespaceURI the namespace URI 250 * @param localName the local name 251 * @param qName the qualified (prefixed) name 252 * @param atts the list of attributes 253 * @throws SAXException if the DOM implementation throws an exception 254 */ 255 @Override 256 public void startElement( String namespaceURI, String localName, String qName, Attributes atts ) 257 throws SAXException 258 { 259 addTextIfPresent(); 260 261 try 262 { 263 Node previousTop = top; 264 if ( ( localName == null ) || ( localName.length() == 0 ) ) 265 { 266 top = doc.createElement( qName ); 267 } 268 else 269 { 270 top = doc.createElementNS( namespaceURI, localName ); 271 } 272 for ( int i = 0; i < atts.getLength(); i++ ) 273 { 274 Attr attr = null; 275 if ( ( atts.getLocalName( i ) == null ) || ( atts.getLocalName( i ).length() == 0 ) ) 276 { 277 attr = doc.createAttribute( atts.getQName( i ) ); 278 attr.setNodeValue( atts.getValue( i ) ); 279 ( (Element) top ).setAttributeNode( attr ); 280 } 281 else 282 { 283 attr = doc.createAttributeNS( atts.getURI( i ), atts.getLocalName( i ) ); 284 attr.setNodeValue( atts.getValue( i ) ); 285 ( (Element) top ).setAttributeNodeNS( attr ); 286 } 287 } 288 previousTop.appendChild( top ); 289 depth++; 290 } 291 catch ( DOMException e ) 292 { 293 throw new SAXException( e.getMessage() ); 294 } 295 } 296 } 297 298 // ----------------------------------------------------------- Constructors 299 300 /** 301 * Default constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}. 302 * 303 * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the 304 * configuration requested. 305 * @see DocumentBuilderFactory#newDocumentBuilder() 306 */ 307 public NodeCreateRule() 308 throws ParserConfigurationException 309 { 310 this( Node.ELEMENT_NODE ); 311 } 312 313 /** 314 * Constructor. Creates an instance of this rule that will create a DOM {@link org.w3c.dom.Element Element}, but 315 * lets you specify the JAXP <code>DocumentBuilder</code> that should be used when constructing the node tree. 316 * 317 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use 318 */ 319 public NodeCreateRule( DocumentBuilder documentBuilder ) 320 { 321 this( Node.ELEMENT_NODE, documentBuilder ); 322 } 323 324 /** 325 * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element} 326 * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the 327 * <code>nodeType</code> parameter. 328 * 329 * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE 330 * Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} 331 * @throws ParserConfigurationException if a DocumentBuilder cannot be created which satisfies the 332 * configuration requested. 333 * @see DocumentBuilderFactory#newDocumentBuilder() 334 */ 335 public NodeCreateRule( int nodeType ) 336 throws ParserConfigurationException 337 { 338 this( nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder() ); 339 } 340 341 /** 342 * Constructor. Creates an instance of this rule that will create either a DOM {@link org.w3c.dom.Element Element} 343 * or a DOM {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the value of the 344 * <code>nodeType</code> parameter. This constructor lets you specify the JAXP <code>DocumentBuilder</code> that 345 * should be used when constructing the node tree. 346 * 347 * @param nodeType the type of node to create, which can be either {@link org.w3c.dom.Node#ELEMENT_NODE 348 * Node.ELEMENT_NODE} or {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} 349 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use 350 */ 351 public NodeCreateRule( int nodeType, DocumentBuilder documentBuilder ) 352 { 353 if ( !( ( nodeType == Node.DOCUMENT_FRAGMENT_NODE ) || ( nodeType == Node.ELEMENT_NODE ) ) ) 354 { 355 throw new IllegalArgumentException( "Can only create nodes of type DocumentFragment and Element" ); 356 } 357 this.nodeType = nodeType; 358 this.documentBuilder = documentBuilder; 359 } 360 361 // ----------------------------------------------------- Instance Variables 362 363 /** 364 * The JAXP <code>DocumentBuilder</code> to use. 365 */ 366 private DocumentBuilder documentBuilder = null; 367 368 /** 369 * The type of the node that should be created. Must be one of the constants defined in {@link org.w3c.dom.Node 370 * Node}, but currently only {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and 371 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} are allowed values. 372 */ 373 private int nodeType = Node.ELEMENT_NODE; 374 375 // ----------------------------------------------------------- Rule Methods 376 377 /** 378 * When this method fires, the digester is told to forward all SAX ContentHandler events to the builder object, 379 * resulting in a DOM being built instead of normal digester rule-handling occurring. When the end of the current 380 * xml element is encountered, the original content handler is restored (expected to be NULL, allowing normal 381 * Digester operations to continue). 382 * 383 * @param namespaceURI the namespace URI of the matching element, or an empty string if the parser is not namespace 384 * aware or the element has no namespace 385 * @param name the local name if the parser is namespace aware, or just the element name otherwise 386 * @param attributes The attribute list of this element 387 * @throws Exception indicates a JAXP configuration problem 388 */ 389 @Override 390 public void begin( String namespaceURI, String name, Attributes attributes ) 391 throws Exception 392 { 393 Document doc = documentBuilder.newDocument(); 394 NodeBuilder builder = null; 395 if ( nodeType == Node.ELEMENT_NODE ) 396 { 397 Element element = null; 398 if ( getDigester().getNamespaceAware() ) 399 { 400 element = doc.createElementNS( namespaceURI, name ); 401 for ( int i = 0; i < attributes.getLength(); i++ ) 402 { 403 element.setAttributeNS( attributes.getURI( i ), attributes.getQName( i ), 404 attributes.getValue( i ) ); 405 } 406 } 407 else 408 { 409 element = doc.createElement( name ); 410 for ( int i = 0; i < attributes.getLength(); i++ ) 411 { 412 element.setAttribute( attributes.getQName( i ), attributes.getValue( i ) ); 413 } 414 } 415 builder = new NodeBuilder( doc, element ); 416 } 417 else 418 { 419 builder = new NodeBuilder( doc, doc.createDocumentFragment() ); 420 } 421 // the NodeBuilder constructor has already saved the original 422 // value of the digester's custom content handler (expected to 423 // be null, but we save it just in case). So now we just 424 // need to tell the digester to forward events to the builder. 425 getDigester().setCustomContentHandler( builder ); 426 } 427 428 /** 429 * {@inheritDoc} 430 */ 431 @Override 432 public void end( String namespace, String name ) 433 throws Exception 434 { 435 getDigester().pop(); 436 } 437 438}