1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.betwixt;
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 public class XMLUtils {
36
37
38
39
40
41 public static final String LESS_THAN_ENTITY = "<";
42
43 public static final String GREATER_THAN_ENTITY = ">";
44
45 public static final String AMPERSAND_ENTITY = "&";
46
47 public static final String APOSTROPHE_ENTITY = "'";
48
49 public static final String QUOTE_ENTITY = """;
50
51
52
53 private static final int MASK_NAME_START = 0x01;
54
55 private static final int MASK_NAME = 0x02;
56
57
58
59
60
61 private static final byte[] CHARS = new byte[1 << 16];
62
63
64
65
66
67 static {
68
69
70
71
72
73
74 int nameChar[] = {
75 0x002D, 0x002E,
76 };
77
78
79
80
81
82 int nameStartChar[] = {
83 0x003A, 0x005F,
84 };
85
86
87
88
89
90 int letterRange[] = {
91
92 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
93 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
94 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
95 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
96 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
97 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
98 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
99 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
100 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
101 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
102 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
103 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
104 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
105 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
106 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
107 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
108 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
109 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
110 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
111 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
112 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
113 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
114 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
115 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
116 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
117 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
118 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
119 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
120 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
121 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
122 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
123 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
124 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
125 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
126 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
127 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
128 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
129 0xAC00, 0xD7A3,
130
131 0x3021, 0x3029, 0x4E00, 0x9FA5,
132 };
133 int letterChar[] = {
134
135 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
136 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
137 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
138 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
139 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
140 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
141 0x1F5D, 0x1FBE, 0x2126, 0x212E,
142
143 0x3007,
144 };
145
146
147
148
149
150 int combiningCharRange[] = {
151 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
152 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
153 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
154 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
155 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
156 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
157 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
158 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
159 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
160 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
161 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
162 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
163 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
164 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
165 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
166 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
167 0x20D0, 0x20DC, 0x302A, 0x302F,
168 };
169
170 int combiningCharChar[] = {
171 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
172 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
173 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
174 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
175 };
176
177
178
179
180
181 int digitRange[] = {
182 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
183 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
184 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
185 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
186 };
187
188
189
190
191
192 int extenderRange[] = {
193 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
194 };
195
196 int extenderChar[] = {
197 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
198 };
199
200
201
202
203
204
205 for (int i = 0; i < nameStartChar.length; i++) {
206 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME;
207 }
208 for (int i = 0; i < letterRange.length; i += 2) {
209 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
210 CHARS[j] |= MASK_NAME_START | MASK_NAME;
211 }
212 }
213 for (int i = 0; i < letterChar.length; i++) {
214 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME;
215 }
216
217
218 for (int i = 0; i < nameChar.length; i++) {
219 CHARS[nameChar[i]] |= MASK_NAME;
220 }
221 for (int i = 0; i < digitRange.length; i += 2) {
222 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
223 CHARS[j] |= MASK_NAME;
224 }
225 }
226 for (int i = 0; i < combiningCharRange.length; i += 2) {
227 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
228 CHARS[j] |= MASK_NAME;
229 }
230 }
231 for (int i = 0; i < combiningCharChar.length; i++) {
232 CHARS[combiningCharChar[i]] |= MASK_NAME;
233 }
234 for (int i = 0; i < extenderRange.length; i += 2) {
235 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
236 CHARS[j] |= MASK_NAME;
237 }
238 }
239 for (int i = 0; i < extenderChar.length; i++) {
240 CHARS[extenderChar[i]] |= MASK_NAME;
241 }
242
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256 public XMLUtils() {}
257
258
259
260
261
262
263
264
265
266
267
268 public static final String escapeBodyValue(Object value) {
269 StringBuffer buffer = new StringBuffer(value.toString());
270 for (int i=0, size = buffer.length(); i <size; i++) {
271 switch (buffer.charAt(i)) {
272 case '<':
273 buffer.replace(i, i+1, LESS_THAN_ENTITY);
274 size += 3;
275 i+=3;
276 break;
277 case '>':
278 buffer.replace(i, i+1, GREATER_THAN_ENTITY);
279 size += 3;
280 i += 3;
281 break;
282 case '&':
283 buffer.replace(i, i+1, AMPERSAND_ENTITY);
284 size += 4;
285 i += 4;
286 break;
287 }
288 }
289 return buffer.toString();
290 }
291
292
293
294
295
296
297
298
299 public static final String escapeAttributeValue(Object value) {
300 StringBuffer buffer = new StringBuffer(value.toString());
301 for (int i=0, size = buffer.length(); i <size; i++) {
302 switch (buffer.charAt(i)) {
303 case '<':
304 buffer.replace(i, i+1, LESS_THAN_ENTITY);
305 size += 3;
306 i+=3;
307 break;
308 case '>':
309 buffer.replace(i, i+1, GREATER_THAN_ENTITY);
310 size += 3;
311 i += 3;
312 break;
313 case '&':
314 buffer.replace(i, i+1, AMPERSAND_ENTITY);
315 size += 4;
316 i += 4;
317 break;
318 case '\'':
319 buffer.replace(i, i+1, APOSTROPHE_ENTITY);
320 size += 5;
321 i += 5;
322 break;
323 case '\"':
324 buffer.replace(i, i+1, QUOTE_ENTITY);
325 size += 5;
326 i += 5;
327 break;
328 }
329 }
330 return buffer.toString();
331 }
332
333
334
335
336
337
338
339
340
341
342
343
344 public static final String escapeCDATAContent(String content) {
345 StringBuffer buffer = new StringBuffer(content);
346 escapeCDATAContent(buffer);
347 return buffer.toString();
348 }
349
350
351
352
353
354
355
356
357
358
359
360 public static final void escapeCDATAContent(StringBuffer bufferedContent) {
361 for (int i=2, size = bufferedContent.length(); i<size; i++) {
362 char at = bufferedContent.charAt(i);
363 if ( at == '>'
364 && bufferedContent.charAt(i-1) == ']'
365 && bufferedContent.charAt(i-2) == ']') {
366
367 bufferedContent.replace(i, i+1, GREATER_THAN_ENTITY);
368 size += 3;
369 i+=3;
370 }
371 }
372 }
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390 public static boolean isWellFormedXMLName( String name ) {
391 if ( name == null ) {
392 return false;
393 }
394
395 if ( name.length() == 0 ) {
396 return false;
397 }
398
399 char ch = name.charAt(0);
400 if( isNameStartChar(ch) == false) {
401 return false;
402
403 }
404
405 for (int i = 1; i < name.length(); i++ ) {
406 ch = name.charAt(i);
407 if( isNameChar( ch ) == false ) {
408 return false;
409 }
410 }
411 return true;
412 }
413
414
415
416
417
418
419
420
421 public static boolean isNameChar(int c) {
422 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
423 }
424
425
426
427
428
429
430
431
432 public static boolean isNameStartChar(int c) {
433 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
434 }
435 }