001 /* 002 * Copyright 1999,2004 The Apache Software Foundation. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package org.apache.commons.feedparser.sax; 018 019 import java.util.HashMap; 020 import java.util.HashSet; 021 022 import org.apache.commons.feedparser.FeedParserException; 023 import org.apache.commons.feedparser.FeedParserListener; 024 import org.apache.commons.feedparser.FeedParserState; 025 import org.apache.commons.feedparser.FeedVersion; 026 import org.xml.sax.Attributes; 027 import org.xml.sax.SAXException; 028 import org.xml.sax.helpers.DefaultHandler; 029 030 /** * 031 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a> 032 * @version $Id: RSSFeedParser.java 373622 2006-01-30 22:53:00Z mvdb $ 033 */ 034 public class RSSFeedParser extends BaseDefaultHandler { 035 036 public FeedParserListener listener = null; 037 038 boolean onItem = false; 039 040 HashMap properties = new HashMap(); 041 042 FeedParserState state = new FeedParserState(); 043 044 static HashSet RSS_NAMESPACES = new HashSet(); 045 046 static HashSet RDF_NAMESPACES = new HashSet(); 047 048 static HashSet MOD_CONTENT_NAMESPACES = new HashSet(); 049 050 static { 051 052 RSS_NAMESPACES.add( "http://purl.org/rss/1.0/" ); 053 054 RDF_NAMESPACES.add( "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ); 055 056 MOD_CONTENT_NAMESPACES.add( "http://purl.org/rss/1.0/modules/content/" ); 057 058 } 059 060 /** 061 * 062 * Create a new <code>RSSFeedParser</code> instance. 063 * 064 * 065 */ 066 public RSSFeedParser() { 067 068 super( "FIXME" ); 069 070 this.parser = this; 071 072 setNext( new ChannelTemplate( this ) ); 073 074 } 075 076 public void startDocument() throws SAXException { 077 078 try { 079 080 FeedVersion v = new FeedVersion(); 081 v.isRSS = true; 082 listener.onFeedVersion( v ); 083 084 listener.init(); 085 086 } catch ( FeedParserException f ) { 087 throw new SAXException( f ); 088 } 089 090 } 091 092 public void endDocument() throws SAXException { 093 094 try { 095 096 listener.finished(); 097 098 } catch ( FeedParserException f ) { 099 throw new SAXException( f ); 100 } 101 102 } 103 104 /** 105 * Match rss:channel 106 */ 107 class ChannelTemplate extends BaseDefaultHandler { 108 109 public ChannelTemplate( RSSFeedParser parser ) { 110 111 super( "channel", parser.RSS_NAMESPACES, parser ); 112 113 setNext( new URLTemplate( parser ) ); 114 115 } 116 117 public void beginFeedElement() throws FeedParserException { 118 119 parser.listener.onChannel( parser.state, 120 getProperty( "title" ), 121 getProperty( "link" ), 122 getProperty( "description" ) ); 123 124 } 125 126 public void endFeedElement() throws FeedParserException { 127 parser.listener.onChannelEnd(); 128 } 129 130 } 131 132 /** 133 * Match rss:url for images/etc 134 */ 135 class URLTemplate extends BaseDefaultHandler { 136 137 public URLTemplate( RSSFeedParser parser ) { 138 139 super( "url", parser.RSS_NAMESPACES, parser ); 140 141 setNext( new ModContentTemplate( parser ) ); 142 //this.setNext( new RDFValueTemplate( parser ) ); 143 144 } 145 146 } 147 148 /** 149 * Match the rdf:value for mod_content 150 * 151 * 152 */ 153 class ModContentTemplate extends BaseDefaultHandler { 154 155 public ModContentTemplate( RSSFeedParser parser ) { 156 157 super( "items", parser.MOD_CONTENT_NAMESPACES, parser ); 158 159 this.setNext( new RDFValueTemplate( parser ) ); 160 161 } 162 163 } 164 165 /** 166 * Match the rdf:value for mod_content 167 * 168 * 169 */ 170 class RDFValueTemplate extends BaseDefaultHandler { 171 172 public RDFValueTemplate( RSSFeedParser parser ) { 173 174 super( "value", parser.RDF_NAMESPACES, parser ); 175 176 this.setIncludeContent( true ); 177 this.setNext( new RSSImageFeedParser( parser ) ); 178 179 } 180 181 public void endFeedElement() throws FeedParserException { 182 //System.out.println( " FIXME: (debug): " + getProperty( "value" ) ); 183 } 184 185 } 186 187 } 188 189 class RSSImageFeedParser extends BaseDefaultHandler { 190 191 public RSSImageFeedParser( RSSFeedParser parser ) { 192 193 super( "image", parser.RSS_NAMESPACES, parser ); 194 195 setNext( new RSSItemFeedParser( parser ) ); 196 197 } 198 199 public void beginFeedElement() throws FeedParserException { 200 201 parser.listener.onImage( parser.state, 202 getProperty( "title" ), 203 getProperty( "link" ), 204 getProperty( "url" ) ); 205 206 } 207 208 public void endFeedElement() throws FeedParserException { 209 parser.listener.onImageEnd(); 210 } 211 212 } 213 214 class RSSItemFeedParser extends BaseDefaultHandler { 215 216 public RSSItemFeedParser( RSSFeedParser parser ) { 217 218 super( "item", parser ); 219 this.namespaces = parser.RSS_NAMESPACES; 220 221 setNext( new RSSTitleFeedParser( parser ) ); 222 223 } 224 225 public void beginFeedElement() throws FeedParserException { 226 227 parser.listener.onItem( parser.state, 228 getProperty( "title" ), 229 getProperty( "link" ), 230 getProperty( "description" ), 231 null ); 232 233 } 234 235 public void endFeedElement() throws FeedParserException { 236 parser.listener.onItemEnd(); 237 } 238 239 } 240 241 class RSSTitleFeedParser extends BaseDefaultHandler { 242 243 public RSSTitleFeedParser( RSSFeedParser parser ) { 244 245 super( "title", parser ); 246 247 setNext( new RSSLinkFeedParser( parser ) ); 248 249 } 250 251 } 252 253 class RSSLinkFeedParser extends BaseDefaultHandler { 254 255 public RSSLinkFeedParser( RSSFeedParser parser ) { 256 super( "link", parser ); 257 258 setNext( new RSSDescriptionFeedParser( parser ) ); 259 } 260 261 } 262 263 class RSSDescriptionFeedParser extends BaseDefaultHandler { 264 265 public RSSDescriptionFeedParser( RSSFeedParser parser ) { 266 super( "description", parser ); 267 } 268 269 } 270 271 /** 272 * dc:subject support 273 */ 274 class RSSDcSubjectFeedParser extends BaseDefaultHandler { 275 276 //MetaFeedParserListener metadataListener= null; 277 278 public RSSDcSubjectFeedParser( RSSFeedParser parser ) { 279 super( "subject", parser ); 280 } 281 282 public void beginFeedElement() { 283 284 //only if it's dc:subject 285 //listener.onSubject( parser.state, parser.getProperty( "subject" ) ); 286 287 } 288 289 public void endFeedElement() { 290 291 } 292 293 } 294 295 class BaseDefaultHandler extends DefaultHandler { 296 297 public static int STRING_BUFFER_CAPACITY = 100000; 298 299 //BUG: this will break on nested code: 300 301 // <foo> 302 // <foo> 303 // 304 // </foo> 305 // 306 // </foo> 307 308 // won't be smart enough to realize it's nested 309 310 /** 311 * The local name of the element 312 */ 313 private String local = null; 314 315 //FIXME: move to a FastStringBuffer that's not synchronized. 316 private StringBuffer buff = null; 317 318 private boolean onElement = false; 319 320 private boolean includeContent = false; 321 322 BaseDefaultHandler next = null; 323 324 FeedParserListener listener = null; 325 326 RSSFeedParser parser = null; 327 328 static HashMap nsPrefixMapping = new HashMap(); 329 330 /** 331 * Store a hashset of namespaces that the given URL supports. 332 * 333 */ 334 HashSet namespaces = null; 335 336 public BaseDefaultHandler( String local ) { 337 this.local = local; 338 } 339 340 public BaseDefaultHandler( String local, RSSFeedParser parser ) { 341 342 this.local = local; 343 this.parser = parser; 344 345 } 346 347 public BaseDefaultHandler( String local, 348 HashSet namespaces, 349 RSSFeedParser parser ) { 350 351 this.local = local; 352 this.namespaces = namespaces; 353 this.parser = parser; 354 355 } 356 357 /** 358 * If true we include the RAW XML content from the parser. 359 * 360 * 361 */ 362 public void setIncludeContent( boolean includeContent ) { 363 this.includeContent = includeContent; 364 } 365 366 /** 367 * Set the next template to process in this chain. 368 * 369 * 370 */ 371 public void setNext( BaseDefaultHandler next ) { 372 this.next = next; 373 } 374 375 /** 376 * Return the value of character data forfor the element. 377 * 378 * 379 */ 380 public String toString() { 381 382 if ( buff == null ) 383 return null; 384 385 if ( buff.length() == 0 ) 386 return null; 387 388 return buff.toString(); 389 } 390 391 /** 392 * Return true if the namespace is valid and this class is handling the 393 * given element name 394 * 395 * 396 */ 397 boolean isLocal( String namespace, String local ) { 398 399 //wee if we need to test forfor namespaces 400 if ( namespace != null && namespaces != null && ! namespaces.contains( namespace ) ) 401 return false; 402 403 return this.local.equals( local ); 404 } 405 406 /** 407 * Get the value of a string property we found whilewhile parsing 408 * 409 * 410 */ 411 public String getProperty( String name ) { 412 return (String)parser.properties.get( name ); 413 } 414 415 public boolean getBoolean( String name ) { 416 417 return "true".equals( getProperty( name ) ); 418 419 } 420 421 /** 422 * Method to call when we're finished processing this element but BEFORE 423 * processing of the next element in the chain. 424 * 425 * 426 */ 427 public void beginFeedElement() throws FeedParserException {} 428 429 /** 430 * Method to call when we're finished processing this element but AFTER 431 * processing of the next element in the chain. 432 * 433 * 434 */ 435 public void endFeedElement() throws FeedParserException {} 436 437 private boolean includeContentPrefix( String namespace ) { 438 439 if ( namespace != null ) { 440 441 String prefix = (String)nsPrefixMapping.get( namespace ); 442 443 if ( prefix != null ) { 444 445 buff.append( prefix ); 446 buff.append( ":" ); 447 return true; 448 } 449 450 } 451 452 return false; 453 454 } 455 456 // **** SAX DefaultHandler ************************************************** 457 458 /** 459 * Keep track of namespaces. 460 * 461 * 462 */ 463 public void startPrefixMapping( String prefix, 464 String namespace ) throws SAXException { 465 466 if ( prefix != null && ! "".equals( prefix ) ) { 467 //System.out.println( namespace + " -> " + prefix ); 468 469 nsPrefixMapping.put( namespace, prefix ); 470 471 } 472 473 } 474 475 //FIXME: it might be possible to call an item again without a member and the 476 //value from the LAST item is used... this needs to be a fatal error and we 477 //need to clear ... 478 479 public void startElement( String namespace, 480 String local, 481 String qName, 482 Attributes attributes ) throws SAXException { 483 484 if ( isLocal( namespace, local ) ) { 485 486 //FIXME: is there a more efficient way to clear a buffer than this? 487 488 //FIXME: also only do this ifif it's necessary and content has 489 //actually been added. This will save some performance. 490 491 //buff = new StringBuffer( STRING_BUFFER_CAPACITY ); 492 493 //buff = new StringBuffer( 1000 ); 494 495 if ( buff == null ) { 496 buff = new StringBuffer( 1000 ); 497 } else { 498 buff.setLength( 0 ); 499 } 500 501 onElement = true; 502 } 503 504 if ( next != null ) 505 next.startElement( namespace, local, qName, attributes ); 506 507 if ( includeContent && onElement ) { 508 buff.append( "<" ); 509 510 boolean hasPrefix = includeContentPrefix( namespace ); 511 512 buff.append( local ); 513 514 if ( ! hasPrefix && namespace != null ) { 515 buff.append( " xmlns=\"" ); 516 buff.append( namespace ); 517 buff.append( "\"" ); 518 } 519 520 //now include attributes 521 522 int length = attributes.getLength(); 523 524 for ( int i = 0; i < length; ++i ) { 525 526 buff.append( " " ); 527 buff.append( attributes.getQName( i ) ); 528 buff.append( "=" ); 529 buff.append( "\"" ); 530 buff.append( attributes.getValue( i ) ); 531 buff.append( "\"" ); 532 533 } 534 535 buff.append( ">" ); 536 } 537 538 } 539 540 public void characters( char[] ch, 541 int start, 542 int length ) throws SAXException { 543 544 if ( onElement ) { 545 buff.append( ch, start, length ); 546 } 547 548 if ( next != null ) 549 next.characters( ch, start, length ); 550 551 } 552 553 public void endElement( String namespace, 554 String local, 555 String qName ) throws SAXException { 556 557 try { 558 559 if ( isLocal( namespace, local ) ) { 560 561 onElement = false; 562 parser.properties.put( local, toString() ); 563 564 beginFeedElement(); 565 566 } 567 568 if ( next != null ) 569 next.endElement( namespace, local, qName ); 570 571 if ( isLocal( namespace, local ) ) 572 endFeedElement(); 573 574 if ( includeContent && onElement ) { 575 buff.append( "</" ); 576 577 includeContentPrefix( namespace ); 578 579 buff.append( local ); 580 581 buff.append( ">" ); 582 } 583 584 } catch ( FeedParserException fpe ) { 585 586 throw new SAXException( fpe ); 587 588 } 589 590 } 591 592 } 593