001 /* 002 * Copyright 1999,2004 The Apache Software Foundation. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package org.apache.commons.feedparser; 018 019 import java.util.Iterator; 020 import java.util.List; 021 022 import org.jaxen.jdom.JDOMXPath; 023 import org.jdom.Element; 024 025 /** 026 * Handles parsing RSS metadata including dates 027 * 028 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a> 029 * @version $Id: TagFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $ 030 */ 031 public class TagFeedParser { 032 033 /** 034 * 035 */ 036 public static void parse( FeedParserListener listener, 037 FeedParserState state ) throws Exception { 038 039 if ( listener instanceof TagFeedParserListener == false ) 040 return; 041 042 TagFeedParserListener tagFeedParserListener 043 = (TagFeedParserListener)listener; 044 045 if ( doParseModTaxonomy( tagFeedParserListener, state ) ) 046 return; 047 048 if ( doParseCategory( tagFeedParserListener, state ) ) 049 return; 050 051 doDcSubject( tagFeedParserListener, state ); 052 053 } 054 055 /** 056 * 057 * Parse out dc:subject tags 058 * 059 * Its not very clear how this is supposed to work. 060 * 061 * "Comment: Typically, a Subject will be expressed as keywords, key phrases 062 * or classification codes that describe a topic of the resource. 063 * Recommended best practice is to select a value from a controlled 064 * vocabulary or formal classification scheme." 065 * 066 * But this leaves it open to whether its a space separated list or that 067 * this needs to be per element. It seems that the real infringer is 068 * Delicious but I can fix this by processing dc:subject last. 069 * 070 * 071 */ 072 public static void doDcSubject( TagFeedParserListener listener, 073 FeedParserState state ) throws Exception { 074 075 JDOMXPath xpath = new JDOMXPath( "dc:subject" ); 076 xpath.addNamespace( NS.DC.getPrefix(), NS.DC.getURI() ); 077 078 List list = xpath.selectNodes( state.current ); 079 080 Iterator it = list.iterator(); 081 082 while ( it.hasNext() ) { 083 084 Element element = (Element)it.next(); 085 086 String tag = element.getText(); 087 String tagspace = null; 088 089 listener.onTag( state, tag, tagspace ); 090 listener.onTagEnd(); 091 092 } 093 094 } 095 096 /** 097 * Parse out atom:category and RSS 2.0/0.91 category 098 * 099 * 100 */ 101 public static boolean doParseCategory( TagFeedParserListener listener, 102 FeedParserState state ) throws Exception { 103 104 //XPath xpath = new XPath( "local-name() = 'category'" ); 105 106 JDOMXPath xpath = new JDOMXPath( "descendant::*[local-name() = 'category']" ); 107 108 //NOTE: this only works for elements without namespaces 109 //XPath xpath = new XPath( "category" ); 110 111 List list = xpath.selectNodes( state.current ); 112 113 Iterator it = list.iterator(); 114 115 boolean found = false; 116 117 while ( it.hasNext() ) { 118 119 Element element = (Element)it.next(); 120 121 String tag = element.getText(); 122 String tagspace = null; 123 124 listener.onTag( state, tag, tagspace ); 125 listener.onTagEnd(); 126 127 found = true; 128 129 } 130 131 return found; 132 133 } 134 135 public static boolean doParseModTaxonomy( TagFeedParserListener listener, 136 FeedParserState state ) throws Exception { 137 138 // <taxo:topics> 139 // <rdf:Bag> 140 // <rdf:li resource="http://del.icio.us/tag/hacking" /> 141 // <rdf:li resource="http://del.icio.us/tag/howto" /> 142 // <rdf:li resource="http://del.icio.us/tag/programming" /> 143 // <rdf:li resource="http://del.icio.us/tag/software" /> 144 // <rdf:li resource="http://del.icio.us/tag/tech" /> 145 // <rdf:li resource="http://del.icio.us/tag/technology" /> 146 // <rdf:li resource="http://del.icio.us/tag/tools" /> 147 // <rdf:li resource="http://del.icio.us/tag/tivo" /> 148 // </rdf:Bag> 149 // </taxo:topics> 150 151 JDOMXPath xpath = new JDOMXPath( "taxo:topics/rdf:Bag/rdf:li" ); 152 xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() ); 153 xpath.addNamespace( NS.TAXO.getPrefix(), NS.TAXO.getURI() ); 154 155 List list = xpath.selectNodes( state.current ); 156 157 Iterator it = list.iterator(); 158 159 boolean found = false; 160 161 while ( it.hasNext() ) { 162 163 Element element = (Element)it.next(); 164 165 String resource = element.getAttributeValue( "resource" ); 166 167 if ( resource != "" && resource != null ) { 168 169 String tag = resource; 170 String tagspace = resource; 171 172 int begin = resource.lastIndexOf( "/" ); 173 174 if ( begin != -1 ) { 175 ++begin; 176 tag = resource.substring( begin, resource.length() ); 177 } 178 179 listener.onTag( state, tag, tagspace ); 180 listener.onTagEnd(); 181 182 found = true; 183 } 184 185 } 186 187 return found; 188 189 } 190 191 }