001 /*
002 * Copyright 1999,2004 The Apache Software Foundation.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 package org.apache.commons.feedparser;
018
019 import java.util.Iterator;
020 import java.util.List;
021
022 import org.jaxen.jdom.JDOMXPath;
023 import org.jdom.Element;
024
025 /**
026 * Handles parsing RSS metadata including dates
027 *
028 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
029 * @version $Id: TagFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
030 */
031 public class TagFeedParser {
032
033 /**
034 *
035 */
036 public static void parse( FeedParserListener listener,
037 FeedParserState state ) throws Exception {
038
039 if ( listener instanceof TagFeedParserListener == false )
040 return;
041
042 TagFeedParserListener tagFeedParserListener
043 = (TagFeedParserListener)listener;
044
045 if ( doParseModTaxonomy( tagFeedParserListener, state ) )
046 return;
047
048 if ( doParseCategory( tagFeedParserListener, state ) )
049 return;
050
051 doDcSubject( tagFeedParserListener, state );
052
053 }
054
055 /**
056 *
057 * Parse out dc:subject tags
058 *
059 * Its not very clear how this is supposed to work.
060 *
061 * "Comment: Typically, a Subject will be expressed as keywords, key phrases
062 * or classification codes that describe a topic of the resource.
063 * Recommended best practice is to select a value from a controlled
064 * vocabulary or formal classification scheme."
065 *
066 * But this leaves it open to whether its a space separated list or that
067 * this needs to be per element. It seems that the real infringer is
068 * Delicious but I can fix this by processing dc:subject last.
069 *
070 *
071 */
072 public static void doDcSubject( TagFeedParserListener listener,
073 FeedParserState state ) throws Exception {
074
075 JDOMXPath xpath = new JDOMXPath( "dc:subject" );
076 xpath.addNamespace( NS.DC.getPrefix(), NS.DC.getURI() );
077
078 List list = xpath.selectNodes( state.current );
079
080 Iterator it = list.iterator();
081
082 while ( it.hasNext() ) {
083
084 Element element = (Element)it.next();
085
086 String tag = element.getText();
087 String tagspace = null;
088
089 listener.onTag( state, tag, tagspace );
090 listener.onTagEnd();
091
092 }
093
094 }
095
096 /**
097 * Parse out atom:category and RSS 2.0/0.91 category
098 *
099 *
100 */
101 public static boolean doParseCategory( TagFeedParserListener listener,
102 FeedParserState state ) throws Exception {
103
104 //XPath xpath = new XPath( "local-name() = 'category'" );
105
106 JDOMXPath xpath = new JDOMXPath( "descendant::*[local-name() = 'category']" );
107
108 //NOTE: this only works for elements without namespaces
109 //XPath xpath = new XPath( "category" );
110
111 List list = xpath.selectNodes( state.current );
112
113 Iterator it = list.iterator();
114
115 boolean found = false;
116
117 while ( it.hasNext() ) {
118
119 Element element = (Element)it.next();
120
121 String tag = element.getText();
122 String tagspace = null;
123
124 listener.onTag( state, tag, tagspace );
125 listener.onTagEnd();
126
127 found = true;
128
129 }
130
131 return found;
132
133 }
134
135 public static boolean doParseModTaxonomy( TagFeedParserListener listener,
136 FeedParserState state ) throws Exception {
137
138 // <taxo:topics>
139 // <rdf:Bag>
140 // <rdf:li resource="http://del.icio.us/tag/hacking" />
141 // <rdf:li resource="http://del.icio.us/tag/howto" />
142 // <rdf:li resource="http://del.icio.us/tag/programming" />
143 // <rdf:li resource="http://del.icio.us/tag/software" />
144 // <rdf:li resource="http://del.icio.us/tag/tech" />
145 // <rdf:li resource="http://del.icio.us/tag/technology" />
146 // <rdf:li resource="http://del.icio.us/tag/tools" />
147 // <rdf:li resource="http://del.icio.us/tag/tivo" />
148 // </rdf:Bag>
149 // </taxo:topics>
150
151 JDOMXPath xpath = new JDOMXPath( "taxo:topics/rdf:Bag/rdf:li" );
152 xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() );
153 xpath.addNamespace( NS.TAXO.getPrefix(), NS.TAXO.getURI() );
154
155 List list = xpath.selectNodes( state.current );
156
157 Iterator it = list.iterator();
158
159 boolean found = false;
160
161 while ( it.hasNext() ) {
162
163 Element element = (Element)it.next();
164
165 String resource = element.getAttributeValue( "resource" );
166
167 if ( resource != "" && resource != null ) {
168
169 String tag = resource;
170 String tagspace = resource;
171
172 int begin = resource.lastIndexOf( "/" );
173
174 if ( begin != -1 ) {
175 ++begin;
176 tag = resource.substring( begin, resource.length() );
177 }
178
179 listener.onTag( state, tag, tagspace );
180 listener.onTagEnd();
181
182 found = true;
183 }
184
185 }
186
187 return found;
188
189 }
190
191 }