1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.feedparser.locate;
18
19 import java.net.MalformedURLException;
20 import java.net.URL;
21 import java.util.HashSet;
22 import java.util.List;
23
24 import org.apache.commons.feedparser.FeedList;
25
26
27
28
29
30
31
32 public class LinkLocator {
33
34
35
36
37
38
39 public static final List locate( String resource,
40 String content,
41 final FeedList list )
42 throws Exception {
43
44
45
46
47
48 final boolean hasExplicitRSSFeed = list.getAdRSSFeed() != null;
49 final boolean hasExplicitAtomFeed = list.getAdRSSFeed() != null;
50
51 AnchorParserListener listener = new AnchorParserListener() {
52
53 String resource = null;
54
55 String site = null;
56
57 HashSet seen = new HashSet();
58
59 boolean hasFoundRSSFeed = false;
60 boolean hasFoundAtomFeed = false;
61
62 public void setContext( Object context ) {
63
64 resource = (String)context;
65
66
67 site = getSite( resource );
68
69 }
70
71 public Object getResult() {
72 return list;
73 }
74
75 public boolean onAnchor( String href, String rel, String title ) {
76 String current = ResourceExpander.expand( resource, href );
77 if ( current == null )
78 return true;
79
80
81
82
83
84
85
86
87
88
89
90
91
92 if ( ! site.equals( getSite( current ) ) ) {
93 return true;
94 }
95
96
97
98
99
100 if ( current.startsWith( "http://127" ) )
101 return true;
102
103 if ( seen.contains( current ) ) {
104 return true;
105 }
106
107 seen.add( current );
108
109
110
111
112 boolean isRSSLink = current.endsWith( ".rss" );
113
114
115
116 if ( isRSSLink == false ) {
117
118 isRSSLink =
119 title != null &&
120 title.equalsIgnoreCase( "rss" ) &&
121 href.indexOf( "rss" ) != -1;
122
123 }
124
125 if ( isRSSLink ) {
126
127
128 FeedReference ref = new FeedReference( current,
129 FeedReference.RSS_MEDIA_TYPE );
130
131
132
133
134 if (list.contains(ref))
135 return true;
136
137
138 if ( ! hasExplicitRSSFeed )
139 list.setAdRSSFeed( ref );
140
141 list.add( ref );
142
143 hasFoundRSSFeed = true;
144
145 }
146
147 if ( current.endsWith( ".atom" ) ) {
148
149 FeedReference ref = new FeedReference( current,
150 FeedReference.RSS_MEDIA_TYPE );
151
152
153
154 if (list.contains(ref))
155 return true;
156
157
158 if ( ! hasExplicitAtomFeed )
159 list.setAdAtomFeed( ref );
160
161 list.add( ref );
162
163 hasFoundAtomFeed = true;
164
165 }
166
167 if ( current.endsWith( ".xml" ) ||
168 current.endsWith( ".rdf" ) ) {
169
170
171
172
173
174
175
176
177
178 FeedReference ref = new FeedReference( current,
179 FeedReference.RSS_MEDIA_TYPE );
180
181
182
183 if (list.contains(ref))
184 return true;
185
186
187
188 if ( ! hasExplicitRSSFeed && ! hasFoundRSSFeed ) {
189
190
191
192
193
194 if ( list.getAdRSSFeed() == null ||
195 list.getAdRSSFeed().resource.endsWith( ".rdf" ) == false ) {
196
197 list.setAdRSSFeed( ref );
198
199 }
200
201 }
202
203
204 list.add( ref );
205 return true;
206
207 }
208
209
210
211 if ( current.endsWith( "/node/feed" ) )
212 list.add( current );
213
214 return true;
215
216 }
217
218 };
219
220 listener.setContext( resource );
221 AnchorParser.parseAnchors( content, listener );
222
223 return list;
224
225 }
226
227 public static String getSite( String resource ) {
228
229 try {
230
231 String site = new URL( resource ).getHost();
232 return site.replaceAll( "http://www", "http://" );
233
234 } catch ( MalformedURLException e ) {
235 return null;
236 }
237
238 }
239
240 }