001    /*
002     * Copyright 1999,2004 The Apache Software Foundation.
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     * 
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     * 
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.feedparser;
018    
019    import java.util.Iterator;
020    import java.util.List;
021    
022    import org.jaxen.jdom.JDOMXPath;
023    import org.jdom.Element;
024    
025    /**
026     * Handles parsing RSS metadata including dates
027     *
028     * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
029     * @version $Id: TagFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
030     */
031    public class TagFeedParser {
032    
033        /**
034         * 
035         */
036        public static void parse( FeedParserListener listener,
037                                  FeedParserState state ) throws Exception {
038    
039            if ( listener instanceof TagFeedParserListener == false )
040                return;
041    
042            TagFeedParserListener tagFeedParserListener
043                = (TagFeedParserListener)listener;
044    
045            if ( doParseModTaxonomy( tagFeedParserListener, state ) )
046                return;
047    
048            if ( doParseCategory( tagFeedParserListener, state ) )
049                return;
050    
051            doDcSubject( tagFeedParserListener, state );
052    
053        }
054    
055        /**
056         * 
057         * Parse out dc:subject tags
058         *
059         * Its not very clear how this is supposed to work.
060         * 
061         * "Comment: Typically, a Subject will be expressed as keywords, key phrases
062         * or classification codes that describe a topic of the resource.
063         * Recommended best practice is to select a value from a controlled
064         * vocabulary or formal classification scheme."
065         * 
066         * But this leaves it open to whether its a space separated list or that
067         * this needs to be per element.  It seems that the real infringer is
068         * Delicious but I can fix this by processing dc:subject last.
069         *
070         * 
071         */
072        public static void doDcSubject( TagFeedParserListener listener, 
073                                        FeedParserState state ) throws Exception {
074    
075            JDOMXPath xpath = new JDOMXPath( "dc:subject" );
076            xpath.addNamespace( NS.DC.getPrefix(), NS.DC.getURI() );
077            
078            List list = xpath.selectNodes( state.current );
079    
080            Iterator it = list.iterator();
081    
082            while ( it.hasNext() ) {
083    
084                Element element = (Element)it.next();
085    
086                String tag = element.getText();
087                String tagspace = null;
088                
089                listener.onTag( state, tag, tagspace );
090                listener.onTagEnd();
091    
092            }
093            
094        }
095    
096        /**
097         * Parse out atom:category and RSS 2.0/0.91 category
098         *
099         * 
100         */
101        public static boolean doParseCategory( TagFeedParserListener listener, 
102                                               FeedParserState state ) throws Exception {
103    
104            //XPath xpath = new XPath( "local-name() = 'category'" );
105    
106            JDOMXPath xpath = new JDOMXPath( "descendant::*[local-name() = 'category']" );
107    
108            //NOTE: this only works for elements without namespaces
109            //XPath xpath = new XPath( "category" );
110    
111            List list = xpath.selectNodes( state.current );
112    
113            Iterator it = list.iterator();
114    
115            boolean found = false;
116            
117            while ( it.hasNext() ) {
118    
119                Element element = (Element)it.next();
120    
121                String tag = element.getText();
122                String tagspace = null;
123                
124                listener.onTag( state, tag, tagspace );
125                listener.onTagEnd();
126    
127                found = true;
128    
129            }
130    
131            return found;
132            
133        }
134    
135        public static boolean doParseModTaxonomy( TagFeedParserListener listener, 
136                                                  FeedParserState state ) throws Exception {
137    
138            // <taxo:topics>
139            //      <rdf:Bag>
140            //          <rdf:li resource="http://del.icio.us/tag/hacking" />
141            //          <rdf:li resource="http://del.icio.us/tag/howto" />
142            //          <rdf:li resource="http://del.icio.us/tag/programming" />
143            //          <rdf:li resource="http://del.icio.us/tag/software" />
144            //          <rdf:li resource="http://del.icio.us/tag/tech" />
145            //          <rdf:li resource="http://del.icio.us/tag/technology" />
146            //          <rdf:li resource="http://del.icio.us/tag/tools" />
147            //          <rdf:li resource="http://del.icio.us/tag/tivo" />
148            //      </rdf:Bag>
149            //  </taxo:topics>
150     
151            JDOMXPath xpath = new JDOMXPath( "taxo:topics/rdf:Bag/rdf:li" );
152            xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() );
153            xpath.addNamespace( NS.TAXO.getPrefix(), NS.TAXO.getURI() );
154            
155            List list = xpath.selectNodes( state.current );
156    
157            Iterator it = list.iterator();
158    
159            boolean found = false;
160            
161            while ( it.hasNext() ) {
162    
163                Element element = (Element)it.next();
164    
165                String resource = element.getAttributeValue( "resource" );
166    
167                if ( resource != "" && resource != null ) {
168    
169                    String tag = resource;
170                    String tagspace = resource;
171                    
172                    int begin = resource.lastIndexOf( "/" );                    
173    
174                    if ( begin != -1 ) {
175                        ++begin;
176                        tag = resource.substring( begin, resource.length() );
177                    }
178    
179                    listener.onTag( state, tag, tagspace );
180                    listener.onTagEnd();
181    
182                    found = true;
183                }
184                
185            } 
186    
187            return found;
188            
189        }
190    
191    }