001 /* 002 * Copyright 1999,2004 The Apache Software Foundation. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package org.apache.commons.feedparser; 018 019 import java.util.Iterator; 020 import java.util.List; 021 022 import org.jaxen.jdom.JDOMXPath; 023 import org.jdom.Attribute; 024 import org.jdom.Element; 025 026 /** 027 * Handles parsing RSS . 028 * 029 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a> 030 * @version $Id: RSSFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $ 031 */ 032 public class RSSFeedParser extends BaseParser { 033 034 /** 035 * Parse the given document as an OPML document. 036 * 037 * 038 */ 039 public static void parse( FeedParserListener listener, 040 org.jdom.Document doc ) throws Exception { 041 042 FeedParserState state = new FeedParserState( listener ); 043 044 FeedVersion v = new FeedVersion(); 045 046 v.isRSS = true; 047 v.version = doc.getRootElement().getAttributeValue( "version" ); 048 049 listener.onFeedVersion( v ); 050 051 listener.init(); 052 053 //*** now process the channel. *** 054 JDOMXPath xpath = new JDOMXPath( "/descendant::*[local-name() = 'channel']" ); 055 Element channel = (Element)xpath.selectSingleNode( doc ); 056 state.current = channel; 057 058 doLocale( state, listener, channel ); 059 doChannel( listener, state ); 060 doLocaleEnd( state, listener, channel ); 061 062 //*** now process the image. *** 063 xpath = new JDOMXPath( "/descendant::*[local-name() = 'image']" ); 064 List images = xpath.selectNodes( doc ); 065 Iterator i = images.iterator(); 066 //update items. 067 while ( i.hasNext() ) { 068 069 Element child = (Element)i.next(); 070 state.current = child; 071 doParseImage( listener, state ); 072 073 } 074 075 //*** now process all items. *** 076 xpath = new JDOMXPath( "/descendant::*[local-name() = 'item']" ); 077 078 List items = xpath.selectNodes( doc ); 079 080 i = items.iterator(); 081 082 //update items. 083 while ( i.hasNext() ) { 084 085 Element item = (Element)i.next(); 086 087 state.current = item; 088 089 doLocale( state, listener, item ); 090 doItem( listener, state ); 091 doLocaleEnd( state, listener, item ); 092 093 } 094 095 listener.finished(); 096 097 } 098 099 /** 100 * Parse the given channel 101 * 102 * 103 */ 104 private static void doChannel( FeedParserListener listener, 105 FeedParserState state ) throws Exception { 106 107 String link = getChildElementTextByName( state, "link" ); 108 109 if ( link != null ) 110 link = link.trim(); 111 112 listener.onChannel( state, 113 getChildElementTextByName( state, "title" ), 114 link, 115 getChildElementTextByName( state, "description" ) ); 116 117 listener.onChannelEnd(); 118 119 } 120 121 /** 122 * Parse the given channel 123 * 124 * 125 */ 126 private static void doParseImage( FeedParserListener listener, 127 FeedParserState state ) throws Exception { 128 129 String title = getChildElementTextByName( state, "title" ); 130 String link = getChildElementTextByName( state, "link" ); 131 String url = getChildElementTextByName( state, "url" ); 132 133 if ( url != null ) { 134 listener.onImage( state, title, link, url ); 135 listener.onImageEnd(); 136 137 } 138 139 } 140 141 /** 142 * 143 * 144 * 145 */ 146 private static void doItem( FeedParserListener listener, 147 FeedParserState state ) throws Exception { 148 149 String resource = null; 150 151 //FIXME: migrate this to XPath 152 153 JDOMXPath xpath = new JDOMXPath( "@rdf:resource|guid|descendant::*[local-name() = 'link']" ); 154 xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() ); 155 Object node = xpath.selectSingleNode( state.current ); 156 157 //FIXME: if this is a GUID and isPermalink=false don't use it as the 158 //permalink. 159 160 if ( node instanceof Element ) { 161 162 Element element = (Element)node; 163 resource = element.getText(); 164 165 if ( "guid".equals( element.getName() ) ) { 166 167 boolean isPermaLink = 168 "true".equals( element.getAttributeValue( "isPermaLink" ) ); 169 170 if ( isPermaLink == false ) { 171 //resort to the 'link' 172 173 Element link = state.current.getChild( "link" ); 174 175 if ( link != null ) { 176 resource = link.getText(); 177 } 178 179 } 180 181 } 182 183 } else if ( node instanceof Attribute ) { 184 resource = ((Attribute)node).getValue(); 185 } 186 187 if ( resource == null ) 188 return; 189 190 //title, link, description 191 192 listener.onItem( state, 193 getChildElementTextByName( state, "title" ), 194 getChildElementTextByName( state, "link" ), 195 getChildElementTextByName( state, "description" ), 196 resource ); 197 198 //see if we have content encoded and if we need to report these events. 199 200 if ( listener instanceof ModContentFeedParserListener ) { 201 202 ModContentFeedParserListener mcpl = (ModContentFeedParserListener)listener; 203 204 Element encoded = state.current.getChild( "encoded", NS.CONTENT ); 205 206 if ( encoded != null ) { 207 208 //FIXME: move to the onContent API defined within the 209 //AtomFeedParser and deprecated this body handling. 210 211 mcpl.onContentEncoded( new FeedParserState( encoded ), 212 encoded.getText() ); 213 214 mcpl.onContentEncodedEnd(); 215 216 } else { 217 218 Element items = state.current.getChild( "items", NS.CONTENT ); 219 220 if ( items != null ) { 221 222 //FIXME: with malformed XML this could throw an NPE. Luckly 223 //this format is rare now. 224 Element value = 225 items.getChild( "Bag", NS.RDF ) 226 .getChild( "li", NS.RDF ) 227 .getChild( "item", NS.CONTENT ) 228 .getChild( "value", NS.RDF ); 229 230 //FIXME: move to the onContent API defined within the 231 //AtomFeedParser and deprecated this body handling. 232 233 mcpl.onContentItem( new FeedParserState( value ), 234 null, 235 null, 236 value ); 237 238 mcpl.onContentItemEnd(); 239 240 } 241 242 } 243 244 } 245 246 //process xhtml:body 247 248 if ( listener instanceof XHTMLFeedParserListener ) { 249 250 XHTMLFeedParserListener xfp = (XHTMLFeedParserListener)listener; 251 252 Element body = state.current.getChild( "body", NS.XHTML ); 253 254 //FIXME: move to the onContent API defined within the AtomFeedParser 255 //and deprecated this body handling. 256 257 if ( body != null ) { 258 xfp.onXHTMLBody( new FeedParserState( body ), 259 body ); 260 xfp.onXHTMLBodyEnd(); 261 } 262 263 } 264 265 MetaFeedParser.parse( listener, state ); 266 TagFeedParser.parse( listener, state ); 267 268 doEnclosures( listener, state ); 269 270 listener.onItemEnd(); 271 272 } 273 274 private static void doEnclosures( FeedParserListener listener, 275 FeedParserState state ) throws Exception { 276 277 if ( listener instanceof LinkFeedParserListener == false ) 278 return; 279 280 Element element = state.current.getChild( "enclosure" ); 281 282 if ( element == null ) 283 return; 284 285 LinkFeedParserListener linkFeedParserListener = (LinkFeedParserListener)listener; 286 287 String rel = null; 288 String type = element.getAttributeValue( "type" ); 289 String href = element.getAttributeValue( "url" ); 290 String title = null; 291 long length = 0; 292 if (element.getAttributeValue("length") != null) 293 length = Integer.parseInt( element.getAttributeValue( "length" ) ); 294 295 linkFeedParserListener.onLink( state, 296 rel, 297 type, 298 href, 299 title, 300 length ); 301 302 } 303 304 }