1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.feedparser.network;
18
19 import java.io.FileNotFoundException;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.net.ProtocolException;
23 import java.net.URL;
24 import java.net.URLConnection;
25 import java.util.Iterator;
26 import java.util.zip.GZIPInputStream;
27
28 import org.apache.log4j.Logger;
29
30 import sun.net.www.protocol.http.HttpURLConnection;
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 public class URLResourceRequest extends BaseResourceRequest implements ResourceRequest {
47
48 private static Logger log = Logger.getLogger( URLResourceRequest.class.getName() );
49
50 public static final String ACCEPT_ENCODING_HEADER = "Accept-Encoding";
51 public static final String IF_NONE_MATCH_HEADER = "If-None-Match";
52 public static final String GZIP_ENCODING = "gzip";
53 public static final String USER_AGENT_HEADER = "User-Agent";
54
55
56
57
58
59
60
61
62
63
64 public static boolean ENABLE_HTTP_DELTA_FEED_IM = false;
65
66 public static String USER_AGENT
67 = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; http://commons.apache.org/feedparser/) Gecko/20021130";
68
69 public static String USER_AGENT_MOZILLA
70 = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1) Gecko/20021130";
71
72
73
74
75 public static final String REFERER
76 = "http://commons.apache.org/feedparser/?isAggregator=true";
77
78 public static final int MAX_CONTENT_LENGTH = 1000000;
79
80 private URL _url = null;
81
82 private URLConnection _urlConnection = null;
83
84 private InputStream inputStream = null;
85
86 private boolean initConnection = false;
87
88
89
90
91
92 public void init() throws IOException {
93
94 String resource = this.getResource();
95
96
97 if ( ResourceRequestFactory.isOffline() ) { return; }
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 _url = new URL( this.getResource() );
115 _urlConnection = _url.openConnection();
116
117 }
118
119
120
121
122
123
124
125
126
127
128 public void initConnection() throws NetworkException {
129
130 long before = System.currentTimeMillis();
131
132 initConnection = true;
133
134 this.fireInit();
135
136
137
138
139
140 String resource = this.getResource();
141
142
143 if ( getRequestHeaderField( USER_AGENT_HEADER ) == null ) {
144 _urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT );
145 }
146
147 _urlConnection.setRequestProperty( ACCEPT_ENCODING_HEADER, GZIP_ENCODING );
148
149
150
151 Iterator it = getRequestHeaderFields();
152
153 while ( it.hasNext() ) {
154
155 String key = (String)it.next();
156
157 _urlConnection.setRequestProperty( key, getRequestHeaderField( key ) );
158
159 }
160
161 if ( _urlConnection instanceof HttpURLConnection ) {
162
163 HttpURLConnection httpURLConn = (HttpURLConnection)_urlConnection;
164
165 httpURLConn.setFollowRedirects( getFollowRedirects() );
166 httpURLConn.setInstanceFollowRedirects( getFollowRedirects() );
167
168 if ( this.getIfModifiedSince() != -1 )
169 httpURLConn.setIfModifiedSince( this.getIfModifiedSince() );
170
171 if ( getEtag() != null ) {
172 httpURLConn.setRequestProperty( IF_NONE_MATCH_HEADER, getEtag() );
173
174
175
176
177 if ( ENABLE_HTTP_DELTA_FEED_IM ) {
178
179
180
181
182 httpURLConn.setRequestProperty( "A-IM", "feed, gzip" );
183
184 }
185
186 }
187
188 try {
189
190 httpURLConn.connect();
191
192
193
194 this.setResponseCode( httpURLConn.getResponseCode() );
195
196 } catch ( IOException e ) {
197 throw new NetworkException( e );
198 }
199
200 }
201
202 int contentLength = _urlConnection.getContentLength();
203
204
205
206 if ( contentLength > MAX_CONTENT_LENGTH &&
207 this.getResource().startsWith( "file:" ) == false ) {
208
209
210
211
212
213 throw new NetworkException( "Content is too large - " + contentLength + " - " + getResource() );
214
215 }
216
217 long after = System.currentTimeMillis();
218
219 log.debug( getResource() + " - init duration: " + (after-before) );
220
221 }
222
223 java.lang.reflect.Field FIELD_HTTP_URL_CONNECTION_HTTP = null;
224 java.lang.reflect.Field FIELD_HTTP_CLIENT_URL = null;
225
226
227
228
229
230
231
232
233
234
235
236
237
238 public String getResourceFromRedirect() {
239
240 try {
241
242 if ( FIELD_HTTP_URL_CONNECTION_HTTP == null ) {
243
244
245 FIELD_HTTP_URL_CONNECTION_HTTP = _urlConnection.getClass().getDeclaredField( "http" );
246 FIELD_HTTP_URL_CONNECTION_HTTP.setAccessible( true );
247
248 }
249
250 Object http = FIELD_HTTP_URL_CONNECTION_HTTP.get( _urlConnection );
251
252
253
254 if ( http == null )
255 return getResource();
256
257 if ( FIELD_HTTP_CLIENT_URL == null ) {
258
259 FIELD_HTTP_CLIENT_URL = http.getClass().getDeclaredField( "url" );
260 FIELD_HTTP_CLIENT_URL.setAccessible( true );
261
262 }
263
264 Object url = FIELD_HTTP_CLIENT_URL.get( http );
265
266
267
268 return url.toString();
269
270 } catch ( Throwable t ) {
271
272 return getResource();
273 }
274
275 }
276
277 public InputStream getInputStream() throws NetworkException {
278
279 try {
280 return _getInputStream();
281
282 } catch ( IOException e ) {
283
284 String message = null;
285
286
287
288 if ( e.getCause() instanceof FileNotFoundException ) {
289 message = "File not found: " + e.getCause().getMessage();
290 } else {
291 message = e.getMessage();
292 }
293
294 throw new NetworkException( message, e, this, _url, _urlConnection );
295 }
296
297 }
298
299
300
301
302
303
304 public InputStream _getInputStream() throws IOException {
305
306 if ( ! initConnection ) { initConnection(); }
307
308 String resource = this.getResource();
309
310
311
312 if ( ResourceRequestFactory.isOffline() ) {
313
314
315
316
317
318
319
320 throw new IOException( "ResourceRequestFactory is offline and content was not in cache - " +
321 resource );
322
323 }
324
325
326 if ( this.inputStream == null ) {
327
328 this.inputStream = _urlConnection.getInputStream();
329 this.inputStream = new AdvancedInputStream( this.inputStream, this );
330
331
332 if ( GZIP_ENCODING.equals( _urlConnection.getContentEncoding() ) ) {
333
334
335
336
337
338 this.inputStream = new GZIPInputStream( this.inputStream );
339
340 }
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358 }
359
360 setResource( getResourceFromRedirect() );
361
362
363
364 return inputStream;
365
366 }
367
368
369
370
371
372
373 public void setRequestMethod( String method ) throws NetworkException {
374
375 try {
376
377 if ( _urlConnection instanceof HttpURLConnection ) {
378
379 ((HttpURLConnection)_urlConnection).setRequestMethod( method );
380
381 }
382
383 } catch ( ProtocolException pe ) {
384
385 NetworkException ne = new NetworkException( pe.getMessage() );
386 ne.initCause( pe );
387 throw ne;
388
389 }
390
391 }
392
393
394
395
396
397
398 public int getContentLength() throws IOException {
399
400 if ( ! initConnection ) { initConnection(); }
401
402
403
404 return _urlConnection.getContentLength();
405
406 }
407
408 public String getHeaderField( String name ) {
409 return _urlConnection.getHeaderField( name );
410 }
411
412 }