001 /* 002 * Copyright 1999,2004 The Apache Software Foundation. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package org.apache.commons.feedparser; 018 019 import java.util.Iterator; 020 import java.util.List; 021 022 import org.apache.commons.feedparser.locate.EntityDecoder; 023 import org.jaxen.jdom.JDOMXPath; 024 import org.jdom.Attribute; 025 import org.jdom.CDATA; 026 import org.jdom.Comment; 027 import org.jdom.Element; 028 import org.jdom.Text; 029 import org.jdom.output.XMLOutputter; 030 031 /** 032 * http://www.intertwingly.net/wiki/pie/FrontPage 033 * 034 * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-05.txt 035 * 036 * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-04.txt 037 * 038 * http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html 039 * 040 * http://www.ietf.org/html.charters/atompub-charter.html 041 * 042 * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-01.txt 043 * 044 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a> 045 * @version $Id: AtomFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $ 046 */ 047 public class AtomFeedParser extends BaseParser { 048 049 /** 050 * Parse this feed. 051 * 052 * 053 */ 054 public static void parse( FeedParserListener listener, 055 org.jdom.Document doc ) throws Exception { 056 057 FeedParserState state = new FeedParserState( listener ); 058 059 FeedVersion v = new FeedVersion(); 060 v.isAtom = true; 061 listener.onFeedVersion( v ); 062 063 listener.init(); 064 065 Element root = doc.getRootElement(); 066 067 doLocale( state, listener, root ); 068 069 doChannel( state, listener, doc ); 070 doEntry( state, listener, doc ); 071 072 doLocaleEnd( state, listener, root ); 073 074 listener.finished(); 075 076 } 077 078 private static void doChannel( FeedParserState state, 079 FeedParserListener listener, 080 org.jdom.Document doc ) throws Exception { 081 082 Element root = doc.getRootElement(); 083 084 //perform onChannel method... (title, link, description) 085 String title = selectText( "/atom:feed/atom:title", root ); 086 087 //xpath = new XPath( "/atom:feed/atom:link[atom:rel='alternate']" ); 088 089 //perform onChannel method... (title, link, description) 090 String link = selectSingleAttribute( "/atom:feed/atom:link[@rel='alternate'][@type='text/html']/@href", root ); 091 092 //String description = selectText( "/atom:feed/atom:summary[@rel='text/plain']", doc ); 093 094 String tagline = selectText( "/atom:feed/atom:tagline", root ); 095 096 //state.current = title; 097 listener.onChannel( state, title, link, tagline ); 098 099 listener.onChannelEnd(); 100 101 } 102 103 private static void doEntry( FeedParserState state, 104 FeedParserListener listener, 105 org.jdom.Document doc ) throws Exception { 106 107 JDOMXPath xpath = new JDOMXPath( "/atom:feed/atom:entry" ); 108 xpath.setNamespaceContext( NS.context ); 109 110 List items = xpath.selectNodes( doc ); 111 112 Iterator i = items.iterator(); 113 114 //update items. 115 while ( i.hasNext() ) { 116 117 Element child = (Element)i.next(); 118 119 doLocale( state, listener, child ); 120 121 String title = selectText( "atom:title", child ); 122 123 // The "atom:link" element is a Link construct that conveys a URI 124 // associated with the entry. The nature of the relationship as well 125 // as the link itself is determined by the element's content. 126 127 // atom:entry elements MUST contain at least one atom:link element 128 // with a rel attribute value of "alternate". 129 130 // atom:entry elements MUST NOT contain more than one atom:link 131 // element with a rel attribute value of "alternate" that has the 132 // same type attribute value. 133 134 // atom:entry elements MAY contain additional atom:link elements 135 // beyond those described above. 136 137 String link = selectSingleAttribute( "atom:link[@rel='alternate'][@type='text/html']/@href", 138 child ); 139 140 // The "atom:summary" element is a Content construct that conveys a 141 // short summary, abstract or excerpt of the entry. atom:entry 142 // elements MAY contain an atom:created element, but MUST NOT 143 // contain more than one. 144 145 //FIXME: what if there is no type attribute specified? Whats the default? 146 147 // Content constructs MAY have a "type" attribute, whose value 148 // indicates the media type of the content. When present, this 149 // attribute's value MUST be a media type [RFC2045]. If this 150 // attribute is not present, processors MUST behave as if it were 151 // present with a value of "text/ plain". 152 153 String description = null; 154 155 Element summary = child.getChild( "summary", NS.ATOM ); 156 157 if ( summary != null ) { 158 159 String type = summary.getAttributeValue( "type", NS.ATOM ); 160 161 if ( type == null || "text/plain".equals( type ) ) 162 description = summary.getText(); 163 164 } 165 166 state.current = child; 167 168 listener.onItem( state, title, link, description, link ); 169 170 doLink( state, listener, child ); 171 172 doMeta( state, listener, child ); 173 174 doContent( state, listener, child ); 175 176 MetaFeedParser.parse( listener, state ); 177 TagFeedParser.parse( listener, state ); 178 179 listener.onItemEnd(); 180 doLocale( state, listener, child ); 181 182 } 183 184 } 185 186 private static void doLink( FeedParserState state, 187 FeedParserListener listener, 188 Element current ) throws Exception { 189 190 if ( listener instanceof LinkFeedParserListener == false ) 191 return; 192 193 LinkFeedParserListener lfpl = (LinkFeedParserListener)listener; 194 195 JDOMXPath xpath = new JDOMXPath( "atom:link" ); 196 xpath.setNamespaceContext( NS.context ); 197 198 List items = xpath.selectNodes( current ); 199 200 Iterator it = items.iterator(); 201 202 //update items. 203 while ( it.hasNext() ) { 204 205 Element link = (Element)it.next(); 206 207 String href = link.getAttributeValue( "href" ); 208 String rel = link.getAttributeValue( "rel" ); 209 String type = link.getAttributeValue( "type" ); 210 211 String title = null; 212 long length = -1; 213 214 lfpl.onLink( state, rel, type, href, title, length ); 215 216 } 217 218 } 219 220 private static void doContent( FeedParserState state, 221 FeedParserListener listener, 222 Element current ) throws Exception { 223 224 if ( ! (listener instanceof ContentFeedParserListener) ) 225 return; 226 227 ContentFeedParserListener clistener = (ContentFeedParserListener)listener; 228 229 JDOMXPath xpath = new JDOMXPath( "atom:content" ); 230 xpath.setNamespaceContext( NS.context ); 231 232 List items = xpath.selectNodes( current ); 233 234 Iterator i = items.iterator(); 235 236 //update items. 237 while ( i.hasNext() ) { 238 239 Element content = (Element)i.next(); 240 241 doLocale( state, listener, content ); 242 243 String type = content.getAttributeValue( "type", "text/plain" ); 244 String mode = content.getAttributeValue( "mode" ); 245 246 String format = null; 247 String encoding = null; 248 249 String value = null; 250 251 // 252 if ( "xml".equals( mode ) ) { 253 value = content.getText(); 254 } else if ( "escaped".equals( mode ) ) { 255 256 //need to decode the content here < -> < etc. 257 value = getXMLOfContent( content.getContent() ); 258 value = EntityDecoder.decode( value ); 259 } else { 260 mode = "xml"; 261 value = getXMLOfContent( content.getContent() ); 262 } 263 264 boolean isSummary = false; 265 266 clistener.onContent( state, type, format, encoding, mode, value, isSummary ); 267 268 doLocaleEnd( state, listener, content ); 269 270 } 271 272 xpath = new JDOMXPath( "atom:summary[@type='application/xhtml+xml']" ); 273 xpath.setNamespaceContext( NS.context ); 274 Element e = (Element)xpath.selectSingleNode( current ); 275 276 if ( e != null ) { 277 278 String type = "text/html"; 279 String format = "application/xhtml+xml"; 280 String encoding = null; 281 String mode = "xml"; 282 283 //FIXME: get xml:base to expand the URIs. 284 285 String value = getXMLOfContent( e ); 286 boolean isSummary = true; 287 288 clistener.onContent( state, type, format, encoding, mode, value, isSummary ); 289 290 } 291 292 } 293 294 private static String getXMLOfContent( Element element ) { 295 return getXMLOfContent( element.getContent() ); 296 } 297 298 /** 299 * Get the content of the given element. 300 * 301 * 302 */ 303 private static String getXMLOfContent( List content ) { 304 305 //NOTE: Fri Mar 04 2005 03:59 PM (burton1@rojo.com): in my profiling I 306 //found that this is a BIG memory allocater. FIXME: We SHOULD be able 307 //to do the same thing we do for xhtml:body RIGHT? 308 309 StringBuffer buff = new StringBuffer( 10000 ); 310 311 // NOTE: Changed this constructor to use the default Format. Since the 312 // constructor used no longer exists in jdom 1.0. 313 XMLOutputter outputter = new XMLOutputter(); 314 315 Iterator it = content.iterator(); 316 317 while ( it.hasNext() ) { 318 319 Object next = it.next(); 320 321 if ( next instanceof String ) { 322 buff.append( (String)next ); 323 } else if ( next instanceof Element ) { 324 buff.append( outputter.outputString( (Element)next ) ); 325 } else if ( next instanceof CDATA ) { 326 buff.append( ((CDATA)next).getText() ); 327 } else if ( next instanceof Comment ) { 328 buff.append( outputter.outputString( (Comment)next ) ); 329 } else if ( next instanceof Text ) { 330 buff.append( outputter.outputString( (Text)next ) ); 331 } 332 333 } 334 335 return buff.toString(); 336 337 } 338 339 private static void doMeta( FeedParserState state, 340 FeedParserListener listener, 341 Element element ) throws Exception { 342 343 //FIXME: move this code to MetaFeedParser... 344 345 if ( ! (listener instanceof MetaFeedParserListener) ) 346 return; 347 348 MetaFeedParserListener mlistener = (MetaFeedParserListener)listener; 349 350 //handle issued, created, and then dublin core.. 351 String subject = selectText( "dc:subject", element); 352 353 if ( subject != null ) { 354 mlistener.onSubject( state, subject ); 355 mlistener.onSubjectEnd(); 356 } 357 358 } 359 360 private static Element selectSingleElement( String query, org.jdom.Document doc ) throws Exception { 361 362 JDOMXPath xpath = new JDOMXPath( query ); 363 xpath.setNamespaceContext( NS.context ); 364 365 //perform onChannel method... (title, link, description) 366 return (Element)xpath.selectSingleNode( doc ); 367 368 } 369 370 private static String selectSingleAttribute( String query, Element element ) throws Exception { 371 372 JDOMXPath xpath = new JDOMXPath( query ); 373 xpath.setNamespaceContext( NS.context ); 374 375 //perform onChannel method... (title, link, description) 376 Attribute a = (Attribute)xpath.selectSingleNode( element ); 377 if ( a == null ) 378 return null; 379 380 return a.getValue(); 381 382 } 383 384 } 385