1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.feedparser;
18
19 import org.apache.commons.feedparser.tools.*;
20
21 import java.io.*;
22 import java.net.*;
23 import java.util.*;
24
25 import org.jdom.*;
26
27 import org.jaxen.jdom.*;
28
29 /***
30 * Handles parsing RSS metadata including dates
31 *
32 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
33 * @version $Id: TagFeedParser.java 159213 2005-03-27 23:32:01Z burton $
34 */
35 public class TagFeedParser {
36
37 /***
38 *
39 */
40 public static void parse( FeedParserListener listener,
41 FeedParserState state ) throws Exception {
42
43 if ( listener instanceof TagFeedParserListener == false )
44 return;
45
46 TagFeedParserListener tagFeedParserListener
47 = (TagFeedParserListener)listener;
48
49 if ( doParseModTaxonomy( tagFeedParserListener, state ) )
50 return;
51
52 if ( doParseCategory( tagFeedParserListener, state ) )
53 return;
54
55 doDcSubject( tagFeedParserListener, state );
56
57 }
58
59 /***
60 *
61 * Parse out dc:subject tags
62 *
63 * Its not very clear how this is supposed to work.
64 *
65 * "Comment: Typically, a Subject will be expressed as keywords, key phrases
66 * or classification codes that describe a topic of the resource.
67 * Recommended best practice is to select a value from a controlled
68 * vocabulary or formal classification scheme."
69 *
70 * But this leaves it open to whether its a space separated list or that
71 * this needs to be per element. It seems that the real infringer is
72 * Delicious but I can fix this by processing dc:subject last.
73 *
74 *
75 */
76 public static void doDcSubject( TagFeedParserListener listener,
77 FeedParserState state ) throws Exception {
78
79 XPath xpath = new XPath( "dc:subject" );
80 xpath.addNamespace( NS.DC.getPrefix(), NS.DC.getURI() );
81
82 List list = xpath.selectNodes( state.current );
83
84 Iterator it = list.iterator();
85
86 while ( it.hasNext() ) {
87
88 Element element = (Element)it.next();
89
90 String tag = element.getText();
91 String tagspace = null;
92
93 listener.onTag( state, tag, tagspace );
94 listener.onTagEnd();
95
96 }
97
98 }
99
100 /***
101 * Parse out atom:category and RSS 2.0/0.91 category
102 *
103 *
104 */
105 public static boolean doParseCategory( TagFeedParserListener listener,
106 FeedParserState state ) throws Exception {
107
108
109
110 XPath xpath = new XPath( "descendant::*[local-name() = 'category']" );
111
112
113
114
115 List list = xpath.selectNodes( state.current );
116
117 Iterator it = list.iterator();
118
119 boolean found = false;
120
121 while ( it.hasNext() ) {
122
123 Element element = (Element)it.next();
124
125 String tag = element.getText();
126 String tagspace = null;
127
128 listener.onTag( state, tag, tagspace );
129 listener.onTagEnd();
130
131 found = true;
132
133 }
134
135 return found;
136
137 }
138
139 public static boolean doParseModTaxonomy( TagFeedParserListener listener,
140 FeedParserState state ) throws Exception {
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155 XPath xpath = new XPath( "taxo:topics/rdf:Bag/rdf:li" );
156 xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() );
157 xpath.addNamespace( NS.TAXO.getPrefix(), NS.TAXO.getURI() );
158
159 List list = xpath.selectNodes( state.current );
160
161 Iterator it = list.iterator();
162
163 boolean found = false;
164
165 while ( it.hasNext() ) {
166
167 Element element = (Element)it.next();
168
169 String resource = element.getAttributeValue( "resource" );
170
171 if ( resource != "" && resource != null ) {
172
173 String tag = resource;
174 String tagspace = resource;
175
176 int begin = resource.lastIndexOf( "/" );
177
178 if ( begin != -1 ) {
179 ++begin;
180 tag = resource.substring( begin, resource.length() );
181 }
182
183 listener.onTag( state, tag, tagspace );
184 listener.onTagEnd();
185
186 found = true;
187 }
188
189 }
190
191 return found;
192
193 }
194
195 }