001package org.apache.commons.digester3.examples.api.documentmarkup; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one or more 005 * contributor license agreements. See the NOTICE file distributed with 006 * this work for additional information regarding copyright ownership. 007 * The ASF licenses this file to You under the Apache License, Version 2.0 008 * (the "License"); you may not use this file except in compliance with 009 * the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020import org.apache.commons.digester3.Digester; 021import org.apache.commons.digester3.Rule; 022 023import java.util.List; 024import javax.xml.parsers.SAXParser; 025import org.xml.sax.XMLReader; 026import org.xml.sax.SAXException; 027import org.xml.sax.Attributes; 028 029/** 030 * This is a subclass of digester which supports rules which implement 031 * the TextSegmentHandler interface, causing the "textSegment" method 032 * on each matching rule (of the appropriate type) to be invoked when 033 * an element contains a segment of text followed by a child element. 034 * <p> 035 * See the readme file included with this example for more information. 036 */ 037public class MarkupDigester 038 extends Digester 039{ 040 041 /** See equivalent constructor in Digester class. */ 042 public MarkupDigester() 043 { 044 } 045 046 /** See equivalent constructor in Digester class. */ 047 public MarkupDigester( SAXParser parser ) 048 { 049 super( parser ); 050 } 051 052 /** See equivalent constructor in Digester class. */ 053 public MarkupDigester( XMLReader reader ) 054 { 055 super( reader ); 056 } 057 058 //=================================================================== 059 060 /** 061 * The text found in the current element since the last child element. 062 */ 063 protected StringBuilder currTextSegment = new StringBuilder(); 064 065 /** 066 * Process notification of character data received from the body of 067 * an XML element. 068 * 069 * @param buffer The characters from the XML document 070 * @param start Starting offset into the buffer 071 * @param length Number of characters from the buffer 072 * 073 * @exception SAXException if a parsing error is to be reported 074 */ 075 @Override 076 public void characters( char buffer[], int start, int length ) 077 throws SAXException 078 { 079 super.characters( buffer, start, length ); 080 currTextSegment.append( buffer, start, length ); 081 } 082 083 /** 084 * Process notification of the start of an XML element being reached. 085 * 086 * @param namespaceURI The Namespace URI, or the empty string if the element 087 * has no Namespace URI or if Namespace processing is not being performed. 088 * @param localName The local name (without prefix), or the empty 089 * string if Namespace processing is not being performed. 090 * @param qName The qualified name (with prefix), or the empty 091 * string if qualified names are not available. 092 * @param list The attributes attached to the element. If there are 093 * no attributes, it shall be an empty Attributes object. 094 * @exception SAXException if a parsing error is to be reported 095 */ 096 @Override 097 public void startElement( String namespaceURI, String localName, String qName, Attributes list ) 098 throws SAXException 099 { 100 handleTextSegments(); 101 102 // Unlike bodyText, which accumulates despite intervening child 103 // elements, currTextSegment gets cleared here. This means that 104 // we don't need to save it on a stack either. 105 currTextSegment.setLength( 0 ); 106 107 super.startElement( namespaceURI, localName, qName, list ); 108 } 109 110 /** 111 * Process notification of the end of an XML element being reached. 112 * 113 * @param namespaceURI - The Namespace URI, or the empty string if the 114 * element has no Namespace URI or if Namespace processing is not 115 * being performed. 116 * @param localName - The local name (without prefix), or the empty 117 * string if Namespace processing is not being performed. 118 * @param qName - The qualified XML 1.0 name (with prefix), or the 119 * empty string if qualified names are not available. 120 * @exception SAXException if a parsing error is to be reported 121 */ 122 @Override 123 public void endElement( String namespaceURI, String localName, String qName ) 124 throws SAXException 125 { 126 handleTextSegments(); 127 currTextSegment.setLength( 0 ); 128 super.endElement( namespaceURI, localName, qName ); 129 } 130 131 /** 132 * Iterate over the list of rules most recently matched, and 133 * if any of them implement the TextSegmentHandler interface then 134 * invoke that rule's textSegment method passing the current 135 * segment of text from the xml element body. 136 */ 137 private void handleTextSegments() 138 throws SAXException 139 { 140 if ( currTextSegment.length() > 0 ) 141 { 142 String segment = currTextSegment.toString(); 143 List<Rule> parentMatches = getMatches().peek(); 144 int len = parentMatches.size(); 145 for ( int i = 0; i < len; ++i ) 146 { 147 Rule r = parentMatches.get( i ); 148 if ( r instanceof TextSegmentHandler ) 149 { 150 TextSegmentHandler h = (TextSegmentHandler) r; 151 try 152 { 153 h.textSegment( segment ); 154 } 155 catch ( Exception e ) 156 { 157 throw createSAXException( e ); 158 } 159 } 160 } 161 } 162 } 163 164}