1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.feedparser;
18
19
20
21
22
23
24
25
26
27 public class ContentDetector {
28
29
30
31
32
33
34
35
36 public static ContentDetectorResult detect( String content ) throws Exception {
37
38 ContentDetectorResult result = new ContentDetectorResult();
39
40 result.isHTML = isHTMLContent( content );
41 result.isRSS = ( isRSS_1_0_Content( content ) ||
42 isRSS_2_0_Content( content ) ||
43 isRSS_0_9_0_Content( content ) ||
44 isRSS_0_9_1_Content( content ) ||
45 isRSS_0_9_2_Content( content ) );
46
47 result.isAtom = isAtomContent( content );
48
49 result.isFeed = result.isRSS || result.isAtom;
50
51 return result;
52
53 }
54
55
56
57
58
59
60 public static boolean isRSS_1_0_Content( String content ) throws Exception {
61
62
63
64
65 return content.indexOf( "http://purl.org/rss/1.0/" ) != -1;
66
67 }
68
69
70
71
72
73
74 public static boolean isRSS_0_9_1_Content( String content ) throws Exception {
75
76
77 return content.indexOf( "<rss" ) != -1;
78
79 }
80
81
82
83
84
85
86 public static boolean isRSS_0_9_2_Content( String content ) throws Exception {
87
88
89 return isRSS_0_9_1_Content( content );
90
91 }
92
93
94
95
96
97
98 public static boolean isRSS_2_0_Content( String content ) throws Exception {
99
100 return isRSS_0_9_1_Content( content );
101
102 }
103
104
105
106
107
108
109 public static boolean isRSS_0_9_0_Content( String content ) throws Exception {
110
111
112 return content.indexOf( "http://my.netscape.com/rdf/simple/0.9/" ) != -1;
113
114 }
115
116 public static boolean isAtomContent( String content ) throws Exception {
117
118 return content.indexOf( "http://purl.org/atom/ns#" ) != -1;
119
120 }
121
122
123
124
125
126
127 public static boolean isHTMLContent( String content ) throws Exception {
128
129
130 return content.indexOf( "<html" ) != -1;
131
132 }
133
134 public static void main( String[] args ) {
135
136 try {
137
138
139
140 } catch ( Throwable t ) {
141
142 t.printStackTrace();
143
144 }
145
146 }
147
148 }