View Javadoc

1   /*
2    * Copyright 1999,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.feedparser;
18  
19  import org.apache.commons.feedparser.tools.*;
20  
21  import java.io.*;
22  import java.net.*;
23  import java.util.*;
24  
25  import org.jdom.*;
26  
27  import org.jaxen.jdom.*;
28  
29  /***
30   * Handles parsing RSS metadata including dates
31   *
32   * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
33   * @version $Id: TagFeedParser.java 159213 2005-03-27 23:32:01Z burton $
34   */
35  public class TagFeedParser {
36  
37      /***
38       * 
39       */
40      public static void parse( FeedParserListener listener,
41                                FeedParserState state ) throws Exception {
42  
43          if ( listener instanceof TagFeedParserListener == false )
44              return;
45  
46          TagFeedParserListener tagFeedParserListener
47              = (TagFeedParserListener)listener;
48  
49          if ( doParseModTaxonomy( tagFeedParserListener, state ) )
50              return;
51  
52          if ( doParseCategory( tagFeedParserListener, state ) )
53              return;
54  
55          doDcSubject( tagFeedParserListener, state );
56  
57      }
58  
59      /***
60       * 
61       * Parse out dc:subject tags
62       *
63       * Its not very clear how this is supposed to work.
64       * 
65       * "Comment: Typically, a Subject will be expressed as keywords, key phrases
66       * or classification codes that describe a topic of the resource.
67       * Recommended best practice is to select a value from a controlled
68       * vocabulary or formal classification scheme."
69       * 
70       * But this leaves it open to whether its a space separated list or that
71       * this needs to be per element.  It seems that the real infringer is
72       * Delicious but I can fix this by processing dc:subject last.
73       *
74       * 
75       */
76      public static void doDcSubject( TagFeedParserListener listener, 
77                                      FeedParserState state ) throws Exception {
78  
79          XPath xpath = new XPath( "dc:subject" );
80          xpath.addNamespace( NS.DC.getPrefix(), NS.DC.getURI() );
81          
82          List list = xpath.selectNodes( state.current );
83  
84          Iterator it = list.iterator();
85  
86          while ( it.hasNext() ) {
87  
88              Element element = (Element)it.next();
89  
90              String tag = element.getText();
91              String tagspace = null;
92              
93              listener.onTag( state, tag, tagspace );
94              listener.onTagEnd();
95  
96          }
97          
98      }
99  
100     /***
101      * Parse out atom:category and RSS 2.0/0.91 category
102      *
103      * 
104      */
105     public static boolean doParseCategory( TagFeedParserListener listener, 
106                                            FeedParserState state ) throws Exception {
107 
108         //XPath xpath = new XPath( "local-name() = 'category'" );
109 
110         XPath xpath = new XPath( "descendant::*[local-name() = 'category']" );
111 
112         //NOTE: this only works for elements without namespaces
113         //XPath xpath = new XPath( "category" );
114 
115         List list = xpath.selectNodes( state.current );
116 
117         Iterator it = list.iterator();
118 
119         boolean found = false;
120         
121         while ( it.hasNext() ) {
122 
123             Element element = (Element)it.next();
124 
125             String tag = element.getText();
126             String tagspace = null;
127             
128             listener.onTag( state, tag, tagspace );
129             listener.onTagEnd();
130 
131             found = true;
132 
133         }
134 
135         return found;
136         
137     }
138 
139     public static boolean doParseModTaxonomy( TagFeedParserListener listener, 
140                                               FeedParserState state ) throws Exception {
141 
142         // <taxo:topics>
143         //      <rdf:Bag>
144         //          <rdf:li resource="http://del.icio.us/tag/hacking" />
145         //          <rdf:li resource="http://del.icio.us/tag/howto" />
146         //          <rdf:li resource="http://del.icio.us/tag/programming" />
147         //          <rdf:li resource="http://del.icio.us/tag/software" />
148         //          <rdf:li resource="http://del.icio.us/tag/tech" />
149         //          <rdf:li resource="http://del.icio.us/tag/technology" />
150         //          <rdf:li resource="http://del.icio.us/tag/tools" />
151         //          <rdf:li resource="http://del.icio.us/tag/tivo" />
152         //      </rdf:Bag>
153         //  </taxo:topics>
154  
155         XPath xpath = new XPath( "taxo:topics/rdf:Bag/rdf:li" );
156         xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() );
157         xpath.addNamespace( NS.TAXO.getPrefix(), NS.TAXO.getURI() );
158         
159         List list = xpath.selectNodes( state.current );
160 
161         Iterator it = list.iterator();
162 
163         boolean found = false;
164         
165         while ( it.hasNext() ) {
166 
167             Element element = (Element)it.next();
168 
169             String resource = element.getAttributeValue( "resource" );
170 
171             if ( resource != "" && resource != null ) {
172 
173                 String tag = resource;
174                 String tagspace = resource;
175                 
176                 int begin = resource.lastIndexOf( "/" );                    
177 
178                 if ( begin != -1 ) {
179                     ++begin;
180                     tag = resource.substring( begin, resource.length() );
181                 }
182 
183                 listener.onTag( state, tag, tagspace );
184                 listener.onTagEnd();
185 
186                 found = true;
187             }
188             
189         } 
190 
191         return found;
192         
193     }
194 
195 }