1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.feedparser.network;
18
19 import java.io.*;
20 import java.net.*;
21 import java.util.*;
22 import java.util.zip.*;
23
24 import org.apache.log4j.*;
25
26 import sun.net.www.protocol.http.HttpURLConnection;
27
28 /***
29 * ResourceRequest implementation that uses java.net.URL as the backend.
30 *
31 * Differences from other ResourceRequests.
32 *
33 * setRequestMethod() - Allows us to change the request type (HEAD, etc).
34 *
35 * getContentLength() - Returns the length/size of the content represented by
36 * this resource. Can be used by clients with setRequestMethod( "HEAD" ) to
37 * find the size of a remote resource without doing a full fetch.
38 *
39 * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
40 * @version $Id: URLResourceRequest.java 159213 2005-03-27 23:32:01Z burton $
41 */
42 public class URLResourceRequest extends BaseResourceRequest implements ResourceRequest {
43
44 private static Logger log = Logger.getLogger( URLResourceRequest.class.getName() );
45
46 public static final String ACCEPT_ENCODING_HEADER = "Accept-Encoding";
47 public static final String IF_NONE_MATCH_HEADER = "If-None-Match";
48 public static final String GZIP_ENCODING = "gzip";
49 public static final String USER_AGENT_HEADER = "User-Agent";
50
51 /***
52 *
53 * Enable RFC 3228 HTTP Delta for feeds.
54 *
55 * http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html
56 *
57 * http://bobwyman.pubsub.com/main/2004/09/implementations.html
58 *
59 */
60 public static boolean ENABLE_HTTP_DELTA_FEED_IM = false;
61
62 public static String USER_AGENT
63 = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; http://commons.apache.org/feedparser/) Gecko/20021130";
64
65 public static String USER_AGENT_MOZILLA
66 = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1) Gecko/20021130";
67
68 /***
69 * Not used anymore. Provided for historical reasons.
70 */
71 public static final String REFERER
72 = "http://commons.apache.org/feedparser/?isAggregator=true";
73
74 public static final int MAX_CONTENT_LENGTH = 1000000;
75
76 private URL _url = null;
77
78 private URLConnection _urlConnection = null;
79
80 private InputStream inputStream = null;
81
82 private boolean initConnection = false;
83
84 /***
85 *
86 *
87 */
88 public void init() throws IOException {
89
90 String resource = this.getResource();
91
92
93 if ( ResourceRequestFactory.isOffline() ) { return; }
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 _url = new URL( this.getResource() );
111 _urlConnection = _url.openConnection();
112
113 }
114
115 /***
116 * Init the actual connection. Should be called AFTER init() but before
117 * getInputStream() so that we can set any runtime params requestMethod,
118 * etc. If getInputStream() is called without an initConnection() we do
119 * this automatically. initConnection() might not want to be called when
120 * doing a HEAD request.
121 *
122 *
123 */
124 public void initConnection() throws NetworkException {
125
126 long before = System.currentTimeMillis();
127
128 initConnection = true;
129
130 this.fireInit();
131
132
133
134
135
136 String resource = this.getResource();
137
138
139 if ( getRequestHeaderField( USER_AGENT_HEADER ) == null ) {
140 _urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT );
141 }
142
143 _urlConnection.setRequestProperty( ACCEPT_ENCODING_HEADER, GZIP_ENCODING );
144
145
146
147 Iterator it = getRequestHeaderFields();
148
149 while ( it.hasNext() ) {
150
151 String key = (String)it.next();
152
153 _urlConnection.setRequestProperty( key, getRequestHeaderField( key ) );
154
155 }
156
157 if ( _urlConnection instanceof HttpURLConnection ) {
158
159 HttpURLConnection httpURLConn = (HttpURLConnection)_urlConnection;
160
161 httpURLConn.setFollowRedirects( getFollowRedirects() );
162 httpURLConn.setInstanceFollowRedirects( getFollowRedirects() );
163
164 if ( this.getIfModifiedSince() != -1 )
165 httpURLConn.setIfModifiedSince( this.getIfModifiedSince() );
166
167 if ( getEtag() != null ) {
168 httpURLConn.setRequestProperty( IF_NONE_MATCH_HEADER, getEtag() );
169
170
171
172
173 if ( ENABLE_HTTP_DELTA_FEED_IM ) {
174
175
176
177
178 httpURLConn.setRequestProperty( "A-IM", "feed, gzip" );
179
180 }
181
182 }
183
184 try {
185
186 httpURLConn.connect();
187
188
189
190 this.setResponseCode( httpURLConn.getResponseCode() );
191
192 } catch ( IOException e ) {
193 throw new NetworkException( e );
194 }
195
196 }
197
198 int contentLength = _urlConnection.getContentLength();
199
200
201
202 if ( contentLength > MAX_CONTENT_LENGTH &&
203 this.getResource().startsWith( "file:" ) == false ) {
204
205
206
207
208
209 throw new NetworkException( "Content is too large - " + contentLength + " - " + getResource() );
210
211 }
212
213 long after = System.currentTimeMillis();
214
215 log.debug( getResource() + " - init duration: " + (after-before) );
216
217 }
218
219 java.lang.reflect.Field FIELD_HTTP_URL_CONNECTION_HTTP = null;
220 java.lang.reflect.Field FIELD_HTTP_CLIENT_URL = null;
221
222 /***
223 * This method used Reflection to pull out the redirected URL in
224 * java.net.URL. Internally sun.net.www.protocol.http.HttpURLConnection
225 * stores a reference to sun.net.www.http.HttpClient which then in turn does
226 * all the redirection and stores the redirect java.net.URL. We just use
227 * reflection to FETCH this URL and then call toString to get the correct
228 * value.
229 *
230 * Java needs the concept of readonly private variables.
231 *
232 *
233 */
234 public String getResourceFromRedirect() {
235
236 try {
237
238 if ( FIELD_HTTP_URL_CONNECTION_HTTP == null ) {
239
240
241 FIELD_HTTP_URL_CONNECTION_HTTP = _urlConnection.getClass().getDeclaredField( "http" );
242 FIELD_HTTP_URL_CONNECTION_HTTP.setAccessible( true );
243
244 }
245
246 Object http = FIELD_HTTP_URL_CONNECTION_HTTP.get( _urlConnection );
247
248
249
250 if ( http == null )
251 return getResource();
252
253 if ( FIELD_HTTP_CLIENT_URL == null ) {
254
255 FIELD_HTTP_CLIENT_URL = http.getClass().getDeclaredField( "url" );
256 FIELD_HTTP_CLIENT_URL.setAccessible( true );
257
258 }
259
260 Object url = FIELD_HTTP_CLIENT_URL.get( http );
261
262
263
264 return url.toString();
265
266 } catch ( Throwable t ) {
267
268 return getResource();
269 }
270
271 }
272
273 public InputStream getInputStream() throws NetworkException {
274
275 try {
276 return _getInputStream();
277
278 } catch ( IOException e ) {
279
280 String message = null;
281
282
283
284 if ( e.getCause() instanceof FileNotFoundException ) {
285 message = "File not found: " + e.getCause().getMessage();
286 } else {
287 message = e.getMessage();
288 }
289
290 throw new NetworkException( message, e, this, _url, _urlConnection );
291 }
292
293 }
294
295 /***
296 *
297 *
298 *
299 */
300 public InputStream _getInputStream() throws IOException {
301
302 if ( ! initConnection ) { initConnection(); }
303
304 String resource = this.getResource();
305
306
307
308 if ( ResourceRequestFactory.isOffline() ) {
309
310
311
312
313
314
315
316 throw new IOException( "ResourceRequestFactory is offline and content was not in cache - " +
317 resource );
318
319 }
320
321
322 if ( this.inputStream == null ) {
323
324 this.inputStream = _urlConnection.getInputStream();
325 this.inputStream = new AdvancedInputStream( this.inputStream, this );
326
327
328 if ( GZIP_ENCODING.equals( _urlConnection.getContentEncoding() ) ) {
329
330
331
332
333
334 this.inputStream = new GZIPInputStream( this.inputStream );
335
336 }
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354 }
355
356 setResource( getResourceFromRedirect() );
357
358
359
360 return inputStream;
361
362 }
363
364 /***
365 * Set the RequestMethod of this URLConnection.
366 *
367 *
368 */
369 public void setRequestMethod( String method ) throws NetworkException {
370
371 try {
372
373 if ( _urlConnection instanceof HttpURLConnection ) {
374
375 ((HttpURLConnection)_urlConnection).setRequestMethod( method );
376
377 }
378
379 } catch ( ProtocolException pe ) {
380
381 NetworkException ne = new NetworkException( pe.getMessage() );
382 ne.initCause( pe );
383 throw ne;
384
385 }
386
387 }
388
389 /***
390 *
391 *
392 *
393 */
394 public int getContentLength() throws IOException {
395
396 if ( ! initConnection ) { initConnection(); }
397
398
399
400 return _urlConnection.getContentLength();
401
402 }
403
404 public String getHeaderField( String name ) {
405 return _urlConnection.getHeaderField( name );
406 }
407
408 }