001 /* $Id: NodeCreateRule.java 992060 2010-09-02 19:09:47Z simonetripodi $ 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one or more 004 * contributor license agreements. See the NOTICE file distributed with 005 * this work for additional information regarding copyright ownership. 006 * The ASF licenses this file to You under the Apache License, Version 2.0 007 * (the "License"); you may not use this file except in compliance with 008 * the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 020 package org.apache.commons.digester; 021 022 023 import javax.xml.parsers.DocumentBuilder; 024 import javax.xml.parsers.DocumentBuilderFactory; 025 import javax.xml.parsers.ParserConfigurationException; 026 027 import org.w3c.dom.Attr; 028 import org.w3c.dom.DOMException; 029 import org.w3c.dom.Document; 030 import org.w3c.dom.Element; 031 import org.w3c.dom.Node; 032 import org.xml.sax.Attributes; 033 import org.xml.sax.ContentHandler; 034 import org.xml.sax.SAXException; 035 import org.xml.sax.helpers.DefaultHandler; 036 037 038 /** 039 * A rule implementation that creates a DOM 040 * {@link org.w3c.dom.Node Node} containing the XML at the element that matched 041 * the rule. Two concrete types of nodes can be created by this rule: 042 * <ul> 043 * <li>the default is to create an {@link org.w3c.dom.Element Element} node. 044 * The created element will correspond to the element that matched the rule, 045 * containing all XML content underneath that element.</li> 046 * <li>alternatively, this rule can create nodes of type 047 * {@link org.w3c.dom.DocumentFragment DocumentFragment}, which will contain 048 * only the XML content under the element the rule was trigged on.</li> 049 * </ul> 050 * The created node will be normalized, meaning it will not contain text nodes 051 * that only contain white space characters. 052 * 053 054 * 055 * <p>The created <code>Node</code> will be pushed on Digester's object stack 056 * when done. To use it in the context of another DOM 057 * {@link org.w3c.dom.Document Document}, it must be imported first, using the 058 * Document method 059 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}. 060 * </p> 061 * 062 * <p><strong>Important Note:</strong> This is implemented by replacing the SAX 063 * {@link org.xml.sax.ContentHandler ContentHandler} in the parser used by 064 * Digester, and resetting it when the matched element is closed. As a side 065 * effect, rules that would match XML nodes under the element that matches 066 * a <code>NodeCreateRule</code> will never be triggered by Digester, which 067 * usually is the behavior one would expect.</p> 068 * 069 * <p><strong>Note</strong> that the current implementation does not set the namespace prefixes 070 * in the exported nodes. The (usually more important) namespace URIs are set, 071 * of course.</p> 072 * 073 * @since Digester 1.4 074 */ 075 076 public class NodeCreateRule extends Rule { 077 078 079 // ---------------------------------------------------------- Inner Classes 080 081 082 /** 083 * The SAX content handler that does all the actual work of assembling the 084 * DOM node tree from the SAX events. 085 */ 086 private class NodeBuilder 087 extends DefaultHandler { 088 089 090 // ------------------------------------------------------- Constructors 091 092 093 /** 094 * Constructor. 095 * 096 * <p>Stores the content handler currently used by Digester so it can 097 * be reset when done, and initializes the DOM objects needed to 098 * build the node.</p> 099 * 100 * @param doc the document to use to create nodes 101 * @param root the root node 102 * @throws ParserConfigurationException if the DocumentBuilderFactory 103 * could not be instantiated 104 * @throws SAXException if the XMLReader could not be instantiated by 105 * Digester (should not happen) 106 */ 107 public NodeBuilder(Document doc, Node root) 108 throws ParserConfigurationException, SAXException { 109 110 this.doc = doc; 111 this.root = root; 112 this.top = root; 113 114 oldContentHandler = digester.getCustomContentHandler(); 115 116 } 117 118 119 // ------------------------------------------------- Instance Variables 120 121 122 /** 123 * The content handler used by Digester before it was set to this 124 * content handler. 125 */ 126 protected ContentHandler oldContentHandler = null; 127 128 129 /** 130 * Depth of the current node, relative to the element where the content 131 * handler was put into action. 132 */ 133 protected int depth = 0; 134 135 136 /** 137 * A DOM Document used to create the various Node instances. 138 */ 139 protected Document doc = null; 140 141 142 /** 143 * The DOM node that will be pushed on Digester's stack. 144 */ 145 protected Node root = null; 146 147 148 /** 149 * The current top DOM mode. 150 */ 151 protected Node top = null; 152 153 /** 154 * The text content of the current top DOM node. 155 */ 156 protected StringBuffer topText = new StringBuffer(); 157 158 159 // --------------------------------------------- Helper Methods 160 161 /** 162 * Appends a {@link org.w3c.dom.Text Text} node to the current node 163 * if the content reported by the parser is not purely whitespace. 164 */ 165 private void addTextIfPresent() throws SAXException { 166 if (topText.length() > 0) { 167 String str = topText.toString(); 168 topText.setLength(0); 169 170 if (str.trim().length() > 0) { 171 // The contained text is not *pure* whitespace, so create 172 // a text node to hold it. Note that the "untrimmed" text 173 // is stored in the node. 174 try { 175 top.appendChild(doc.createTextNode(str)); 176 } catch (DOMException e) { 177 throw new SAXException(e.getMessage()); 178 } 179 } 180 } 181 } 182 183 // --------------------------------------------- ContentHandler Methods 184 185 186 /** 187 * Handle notification about text embedded within the current node. 188 * <p> 189 * An xml parser calls this when text is found. We need to ensure that this 190 * text gets attached to the new Node we are creating - except in the case 191 * where the only text in the node is whitespace. 192 * <p> 193 * There is a catch, however. According to the sax specification, a parser 194 * does not need to pass all of the text content of a node in one go; it can 195 * make multiple calls passing part of the data on each call. In particular, 196 * when the body of an element includes xml entity-references, at least some 197 * parsers make a separate call to this method to pass just the entity content. 198 * <p> 199 * In this method, we therefore just append the provided text to a 200 * "current text" buffer. When the element end is found, or a child element 201 * is found then we can check whether we have all-whitespace. See method 202 * addTextIfPresent. 203 * 204 * @param ch the characters from the XML document 205 * @param start the start position in the array 206 * @param length the number of characters to read from the array 207 * @throws SAXException if the DOM implementation throws an exception 208 */ 209 @Override 210 public void characters(char[] ch, int start, int length) 211 throws SAXException { 212 213 topText.append(ch, start, length); 214 } 215 216 217 /** 218 * Checks whether control needs to be returned to Digester. 219 * 220 * @param namespaceURI the namespace URI 221 * @param localName the local name 222 * @param qName the qualified (prefixed) name 223 * @throws SAXException if the DOM implementation throws an exception 224 */ 225 @Override 226 public void endElement(String namespaceURI, String localName, 227 String qName) 228 throws SAXException { 229 230 addTextIfPresent(); 231 232 try { 233 if (depth == 0) { 234 getDigester().setCustomContentHandler(oldContentHandler); 235 getDigester().push(root); 236 getDigester().endElement(namespaceURI, localName, qName); 237 } 238 239 top = top.getParentNode(); 240 depth--; 241 } catch (DOMException e) { 242 throw new SAXException(e.getMessage()); 243 } 244 245 } 246 247 248 /** 249 * Adds a new 250 * {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to 251 * the current node. 252 * 253 * @param target the processing instruction target 254 * @param data the processing instruction data, or null if none was 255 * supplied 256 * @throws SAXException if the DOM implementation throws an exception 257 */ 258 @Override 259 public void processingInstruction(String target, String data) 260 throws SAXException { 261 262 try { 263 top.appendChild(doc.createProcessingInstruction(target, data)); 264 } catch (DOMException e) { 265 throw new SAXException(e.getMessage()); 266 } 267 268 } 269 270 271 /** 272 * Adds a new child {@link org.w3c.dom.Element Element} to the current 273 * node. 274 * 275 * @param namespaceURI the namespace URI 276 * @param localName the local name 277 * @param qName the qualified (prefixed) name 278 * @param atts the list of attributes 279 * @throws SAXException if the DOM implementation throws an exception 280 */ 281 @Override 282 public void startElement(String namespaceURI, String localName, 283 String qName, Attributes atts) 284 throws SAXException { 285 286 addTextIfPresent(); 287 288 try { 289 Node previousTop = top; 290 if ((localName == null) || (localName.length() == 0)) { 291 top = doc.createElement(qName); 292 } else { 293 top = doc.createElementNS(namespaceURI, localName); 294 } 295 for (int i = 0; i < atts.getLength(); i++) { 296 Attr attr = null; 297 if ((atts.getLocalName(i) == null) || 298 (atts.getLocalName(i).length() == 0)) { 299 attr = doc.createAttribute(atts.getQName(i)); 300 attr.setNodeValue(atts.getValue(i)); 301 ((Element)top).setAttributeNode(attr); 302 } else { 303 attr = doc.createAttributeNS(atts.getURI(i), 304 atts.getLocalName(i)); 305 attr.setNodeValue(atts.getValue(i)); 306 ((Element)top).setAttributeNodeNS(attr); 307 } 308 } 309 previousTop.appendChild(top); 310 depth++; 311 } catch (DOMException e) { 312 throw new SAXException(e.getMessage()); 313 } 314 315 } 316 317 } 318 319 320 // ----------------------------------------------------------- Constructors 321 322 323 /** 324 * Default constructor. Creates an instance of this rule that will create a 325 * DOM {@link org.w3c.dom.Element Element}. 326 */ 327 public NodeCreateRule() throws ParserConfigurationException { 328 329 this(Node.ELEMENT_NODE); 330 331 } 332 333 334 /** 335 * Constructor. Creates an instance of this rule that will create a DOM 336 * {@link org.w3c.dom.Element Element}, but lets you specify the JAXP 337 * <code>DocumentBuilder</code> that should be used when constructing the 338 * node tree. 339 * 340 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use 341 */ 342 public NodeCreateRule(DocumentBuilder documentBuilder) { 343 344 this(Node.ELEMENT_NODE, documentBuilder); 345 346 } 347 348 349 /** 350 * Constructor. Creates an instance of this rule that will create either a 351 * DOM {@link org.w3c.dom.Element Element} or a DOM 352 * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the 353 * value of the <code>nodeType</code> parameter. 354 * 355 * @param nodeType the type of node to create, which can be either 356 * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 357 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} 358 * @throws IllegalArgumentException if the node type is not supported 359 */ 360 public NodeCreateRule(int nodeType) throws ParserConfigurationException { 361 362 this(nodeType, 363 DocumentBuilderFactory.newInstance().newDocumentBuilder()); 364 365 } 366 367 368 /** 369 * Constructor. Creates an instance of this rule that will create either a 370 * DOM {@link org.w3c.dom.Element Element} or a DOM 371 * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the 372 * value of the <code>nodeType</code> parameter. This constructor lets you 373 * specify the JAXP <code>DocumentBuilder</code> that should be used when 374 * constructing the node tree. 375 * 376 * @param nodeType the type of node to create, which can be either 377 * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 378 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} 379 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use 380 * @throws IllegalArgumentException if the node type is not supported 381 */ 382 public NodeCreateRule(int nodeType, DocumentBuilder documentBuilder) { 383 384 if (!((nodeType == Node.DOCUMENT_FRAGMENT_NODE) || 385 (nodeType == Node.ELEMENT_NODE))) { 386 throw new IllegalArgumentException( 387 "Can only create nodes of type DocumentFragment and Element"); 388 } 389 this.nodeType = nodeType; 390 this.documentBuilder = documentBuilder; 391 392 } 393 394 395 // ----------------------------------------------------- Instance Variables 396 397 398 /** 399 * The JAXP <code>DocumentBuilder</code> to use. 400 */ 401 private DocumentBuilder documentBuilder = null; 402 403 404 /** 405 * The type of the node that should be created. Must be one of the 406 * constants defined in {@link org.w3c.dom.Node Node}, but currently only 407 * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and 408 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE} 409 * are allowed values. 410 */ 411 private int nodeType = Node.ELEMENT_NODE; 412 413 414 // ----------------------------------------------------------- Rule Methods 415 416 417 /** 418 * When this method fires, the digester is told to forward all SAX 419 * ContentHandler events to the builder object, resulting in a DOM being 420 * built instead of normal digester rule-handling occurring. When the 421 * end of the current xml element is encountered, the original content 422 * handler is restored (expected to be NULL, allowing normal Digester 423 * operations to continue). 424 * 425 * @param namespaceURI the namespace URI of the matching element, or an 426 * empty string if the parser is not namespace aware or the element has 427 * no namespace 428 * @param name the local name if the parser is namespace aware, or just 429 * the element name otherwise 430 * @param attributes The attribute list of this element 431 * @throws Exception indicates a JAXP configuration problem 432 */ 433 @Override 434 public void begin(String namespaceURI, String name, Attributes attributes) 435 throws Exception { 436 437 Document doc = documentBuilder.newDocument(); 438 NodeBuilder builder = null; 439 if (nodeType == Node.ELEMENT_NODE) { 440 Element element = null; 441 if (getDigester().getNamespaceAware()) { 442 element = 443 doc.createElementNS(namespaceURI, name); 444 for (int i = 0; i < attributes.getLength(); i++) { 445 element.setAttributeNS(attributes.getURI(i), 446 attributes.getQName(i), 447 attributes.getValue(i)); 448 } 449 } else { 450 element = doc.createElement(name); 451 for (int i = 0; i < attributes.getLength(); i++) { 452 element.setAttribute(attributes.getQName(i), 453 attributes.getValue(i)); 454 } 455 } 456 builder = new NodeBuilder(doc, element); 457 } else { 458 builder = new NodeBuilder(doc, doc.createDocumentFragment()); 459 } 460 // the NodeBuilder constructor has already saved the original 461 // value of the digester's custom content handler (expected to 462 // be null, but we save it just in case). So now we just 463 // need to tell the digester to forward events to the builder. 464 getDigester().setCustomContentHandler(builder); 465 } 466 467 468 /** 469 * Pop the Node off the top of the stack. 470 */ 471 @Override 472 public void end() throws Exception { 473 474 digester.pop(); 475 476 } 477 478 479 }