1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.apache.commons.xmlio.in;
25
26 import java.io.*;
27 import java.util.*;
28 import java.net.*;
29
30 import org.xml.sax.*;
31 import org.xml.sax.helpers.*;
32 import javax.xml.parsers.*;
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 public class SimpleImporter {
72
73
74 private boolean trimContent = true;
75 private boolean makeCopy = false;
76 private boolean zeroLengthIsNull = true;
77 private boolean includeLeadingCDataIntoStartElementCallback = true;
78 private boolean fullDebug = false;
79 private boolean useQName = true;
80 private boolean buildComplexPath = false;
81
82 protected SAXParserFactory factory;
83
84 protected List callbackHandlerList = new ArrayList();
85
86
87 protected StringBuffer currentMixedPCData = null;
88 protected boolean foundMixedPCData = false;
89
90
91 protected StringBuffer firstPCData = null;
92 protected boolean isFirstPCData = true;
93
94
95 protected ParseElement currentElement = null;
96
97 protected PathStack parseStack = new PathStack();
98
99 protected String debugBuffer = null;
100
101
102
103
104 public SimpleImporter() {
105 factory = SAXParserFactory.newInstance();
106 }
107
108
109 public boolean getFoundMixedPCData() {
110 return foundMixedPCData;
111 }
112
113
114
115
116 public boolean getUseQName() {
117 return useQName;
118 }
119
120
121
122
123 public void setUseQName(boolean useQName) {
124 this.useQName = useQName;
125 }
126
127
128
129
130 public boolean getBuildComplexPath() {
131 return buildComplexPath;
132 }
133
134
135
136
137 public void setBuildComplexPath(boolean buildComplexPath) {
138 this.buildComplexPath = buildComplexPath;
139 }
140
141
142
143
144
145 public void setFullDebugMode(boolean fullDebug) {
146 this.fullDebug = fullDebug;
147 }
148
149
150
151
152 public boolean getFullDebugMode() {
153 return fullDebug;
154 }
155
156
157
158
159
160 public String getParsedStreamForDebug() {
161 if (!getFullDebugMode()) {
162 return null;
163 } else {
164 return debugBuffer;
165 }
166 }
167
168
169
170
171
172
173
174
175
176
177
178 public boolean getIncludeLeadingCDataIntoStartElementCallback() {
179 return includeLeadingCDataIntoStartElementCallback;
180 }
181
182
183
184
185 public void setIncludeLeadingCDataIntoStartElementCallback(boolean includeLeadingCDataIntoStartElementCallback) {
186 this.includeLeadingCDataIntoStartElementCallback = includeLeadingCDataIntoStartElementCallback;
187 }
188
189
190
191
192 public boolean getTrimContent() {
193 return trimContent;
194 }
195
196
197
198
199
200 public void setTrimContent(boolean trimContent) {
201 this.trimContent = trimContent;
202 }
203
204
205
206
207 public boolean getZeroLengthIsNull() {
208 return zeroLengthIsNull;
209 }
210
211
212
213
214 public void setZeroLengthIsNull(boolean zeroLengthIsNull) {
215 this.zeroLengthIsNull = zeroLengthIsNull;
216 }
217
218
219
220
221
222
223 public boolean getMakeCopy() {
224 return makeCopy;
225 }
226
227
228 public void setMakeCopy(boolean makeCopy) {
229 this.makeCopy = makeCopy;
230 }
231
232
233
234
235
236 public void addSimpleImportHandler(SimpleImportHandler callbackHandler) {
237 synchronized (callbackHandlerList) {
238 if (!callbackHandlerList.contains(callbackHandler)) {
239 callbackHandlerList.add(callbackHandler);
240 }
241 }
242 }
243
244
245
246
247
248 public void removeSimpleImportHandler(SimpleImportHandler callbackHandler) {
249 synchronized (callbackHandlerList) {
250 callbackHandlerList.remove(callbackHandler);
251 }
252 }
253
254
255
256
257
258
259
260
261 public synchronized void parseUrlOrFile(String urlOrFileName)
262 throws ParserConfigurationException, SAXException, IOException, SimpleImporterException {
263 Throwable urlException = null;
264 Throwable fileException = null;
265 InputStream in = null;
266 try {
267 URL url = new URL(urlOrFileName);
268 URLConnection urlConnection = url.openConnection();
269 in = urlConnection.getInputStream();
270 } catch (MalformedURLException mue) {
271 urlException = mue;
272 } catch (IOException ioe) {
273 urlException = ioe;
274 }
275
276 try {
277 in = new FileInputStream(urlOrFileName);
278 } catch (IOException ioe) {
279 fileException = ioe;
280 }
281
282 if (in != null) {
283 parse(new InputSource(new BufferedInputStream(in)));
284 } else {
285 throw new SimpleImporterException(
286 "Could not parse "
287 + urlOrFileName
288 + ", is neither URL ("
289 + urlException.getMessage()
290 + ") nor file ("
291 + fileException.getMessage()
292 + ").");
293 }
294 }
295
296
297
298
299
300
301 public synchronized void parse(InputSource is) throws ParserConfigurationException, SAXException, IOException {
302 firstPCData = null;
303 currentElement = null;
304 factory.setNamespaceAware(!useQName || buildComplexPath);
305 SAXParser parser = factory.newSAXParser();
306 if (getFullDebugMode()) {
307 InputSource preReadIn = bufferParserStream(is);
308 parser.parse(preReadIn, new SAXHandler());
309 } else {
310 parser.parse(is, new SAXHandler());
311 }
312 }
313
314 private InputSource bufferParserStream(InputSource is) throws IOException {
315 StringBuffer buf = new StringBuffer();
316 Reader reader;
317 BufferedReader bufferedReader;
318 if (is.getCharacterStream() != null) {
319 reader = is.getCharacterStream();
320 } else {
321 String encoding = is.getEncoding();
322 if (encoding != null) {
323 reader = new InputStreamReader(is.getByteStream(), encoding);
324 } else {
325 reader = new InputStreamReader(is.getByteStream());
326 }
327 }
328 if (reader instanceof BufferedReader) {
329 bufferedReader = (BufferedReader) reader;
330 } else {
331 bufferedReader = new BufferedReader(reader);
332 }
333
334 while (true) {
335 String line = bufferedReader.readLine();
336 if (line == null) {
337 break;
338 } else {
339 buf.append(line).append('\n');
340 }
341 }
342 debugBuffer = buf.toString();
343 return new InputSource(new StringReader(debugBuffer));
344 }
345
346
347 private void callBackStartElementWhenReady() {
348 if (currentElement != null) {
349 String content = getFirstPCData();
350 SimplePath path;
351 if (buildComplexPath) {
352 path =
353 new SimplePath(
354 currentElement.path,
355 (Item[]) currentElement.pathList.toArray(new Item[currentElement.pathList.size()]));
356 } else {
357 path = new SimplePath(currentElement.path);
358
359 }
360
361 synchronized (callbackHandlerList) {
362 for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) {
363 SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next();
364 if (getMakeCopy()) {
365
366 callbackHandler.startElement(
367 new SimplePath(path),
368 currentElement.name,
369 new AttributesImpl(currentElement.attributes),
370 content);
371 } else {
372 callbackHandler.startElement(path, currentElement.name, currentElement.attributes, content);
373 }
374 }
375 }
376
377 firstPCData = null;
378 currentElement = null;
379 }
380 }
381
382 private void sendCharacters(String text) {
383 if (text == null)
384 return;
385
386 if (isFirstPCData) {
387 if (includeLeadingCDataIntoStartElementCallback) {
388 addToFirstPCData(text);
389 } else {
390 sendCData(text);
391 }
392 } else {
393 foundMixedPCData = true;
394 sendCData(text);
395 }
396 }
397
398 private void callBackCDATAWhenReady() {
399 callBackStartElementWhenReady();
400 if (currentMixedPCData == null) {
401 return;
402 }
403 String text = currentMixedPCData.toString();
404 text = trimPCData(text);
405 if (text == null) {
406 return;
407 }
408
409 SimplePath path;
410 if (buildComplexPath) {
411 path =
412 new SimplePath(
413 parseStack.getPath(),
414 (Item[]) parseStack.getPathList().toArray(new Item[parseStack.getPathList().size()]));
415 } else {
416 path = new SimplePath(parseStack.getPath());
417
418 }
419
420 synchronized (callbackHandlerList) {
421 for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) {
422 SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next();
423 if (getMakeCopy()) {
424
425 callbackHandler.cData(new SimplePath(path), text);
426 } else {
427 callbackHandler.cData(path, text);
428 }
429 }
430 }
431 currentMixedPCData = null;
432 }
433
434
435 private void sendCData(String text) {
436
437
438 if (currentMixedPCData == null) {
439 currentMixedPCData = new StringBuffer(text.length());
440 }
441 currentMixedPCData.append(text);
442 }
443
444 private void addToFirstPCData(String text) {
445 if (firstPCData == null) {
446 firstPCData = new StringBuffer(text.length());
447 }
448 firstPCData.append(text);
449 }
450
451 private String getFirstPCData() {
452 if (firstPCData == null) {
453 return null;
454 } else {
455 String text = firstPCData.toString();
456 return trimPCData(text);
457 }
458 }
459
460
461 private String trimPCData(String pcData) {
462 if (pcData == null) {
463 return null;
464 } else {
465 if (getTrimContent()) {
466 pcData = pcData.trim();
467 }
468 if (pcData.length() == 0 && getZeroLengthIsNull()) {
469 return null;
470 } else {
471 return pcData;
472 }
473 }
474 }
475
476
477 private final static class ParseElement {
478 public String name, path;
479 public List pathList;
480 public AttributesImpl attributes;
481
482 public ParseElement(String name, String path, List pathList, AttributesImpl attributes) {
483 this.name = name;
484 this.path = path;
485 this.attributes = attributes;
486 this.pathList = pathList;
487 }
488 }
489
490 private final class SAXHandler extends DefaultHandler {
491 public void startDocument() {
492 synchronized (callbackHandlerList) {
493 for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) {
494 SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next();
495 callbackHandler.startDocument();
496 }
497 }
498 }
499
500 public void endDocument() {
501
502 callBackStartElementWhenReady();
503 callBackCDATAWhenReady();
504 synchronized (callbackHandlerList) {
505 for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) {
506 SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next();
507 callbackHandler.endDocument();
508 }
509 }
510 }
511
512 public void characters(char ch[], int start, int length) {
513 if (length < 1)
514 return;
515 String text = new String(ch, start, length);
516 sendCharacters(text);
517 }
518
519 public void endElement(String namespaceURI, String localName, String qName) {
520
521
522 callBackStartElementWhenReady();
523 callBackCDATAWhenReady();
524 String name;
525 if (!useQName || qName == null || qName.length() == 0) {
526 name = localName;
527 } else {
528 name = qName;
529 }
530
531 SimplePath path;
532 if (buildComplexPath) {
533 path =
534 new SimplePath(
535 parseStack.getPath(),
536 (Item[]) parseStack.getPathList().toArray(new Item[parseStack.getPathList().size()]));
537 } else {
538 path = new SimplePath(parseStack.getPath());
539
540 }
541
542 synchronized (callbackHandlerList) {
543 for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) {
544 SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next();
545 if (getMakeCopy()) {
546
547 callbackHandler.endElement(new SimplePath(path), name);
548 } else {
549 callbackHandler.endElement(path, name);
550 }
551 }
552 }
553
554
555 if (parseStack.empty()) {
556 throw new SimpleImporterException("Umatchted end tag: " + name);
557 } else {
558 Object top = parseStack.peek();
559 String topName;
560 if (buildComplexPath) {
561 topName = ((Item)top).getName();
562 } else {
563 topName = (String)top;
564 }
565 if (!name.equals(topName)) {
566 throw new SimpleImporterException(
567 "End tag " + name + " does not match start tag " + top);
568 } else {
569 parseStack.pop();
570 }
571 }
572
573 isFirstPCData = false;
574 }
575
576 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
577
578
579 callBackStartElementWhenReady();
580 callBackCDATAWhenReady();
581 String name;
582 if (!useQName || qName == null || qName.length() == 0) {
583 name = localName;
584 } else {
585 name = qName;
586 }
587 parseStack.push(namespaceURI, name);
588
589
590 AttributesImpl attributesCopy = new AttributesImpl(atts);
591 currentElement = new ParseElement(name, parseStack.getPath(), parseStack.getPathList(), attributesCopy);
592
593
594
595
596 isFirstPCData = true;
597 }
598 }
599
600
601 private final class PathStack {
602
603 private List pathStack;
604
605 public PathStack(int initialCapacity) {
606 pathStack = new ArrayList(initialCapacity);
607 }
608
609 public PathStack() {
610 pathStack = new ArrayList();
611 }
612
613 public String getPath() {
614 StringBuffer path = new StringBuffer(100);
615
616 path.append('/');
617 for (Iterator it = pathStack.iterator(); it.hasNext();) {
618 Object element = it.next();
619 String pathElement;
620 if (buildComplexPath) {
621 pathElement = ((Item) element).getName();
622 } else {
623 pathElement = (String) element;
624 }
625 path.append(pathElement).append('/');
626 }
627 return path.toString();
628 }
629
630 public List getPathList() {
631 return pathStack;
632 }
633
634 public String toString() {
635 return getPath();
636 }
637
638 public void push(String namespaceURI, String name) {
639 if (buildComplexPath) {
640 pathStack.add(new Item(name, namespaceURI));
641 } else {
642 pathStack.add(name);
643 }
644 }
645
646 public int size() {
647 return pathStack.size();
648 }
649
650 public boolean empty() {
651 return (pathStack.size() <= 0);
652 }
653
654 public Object peek() {
655 int size = pathStack.size();
656 if (size > 0) {
657 return pathStack.get(size - 1);
658 } else {
659 return null;
660 }
661 }
662
663 public Object pop() {
664 int size = pathStack.size();
665 if (size > 0) {
666 Object o = pathStack.get(size - 1);
667 pathStack.remove(size - 1);
668 return o;
669 } else {
670 return null;
671 }
672 }
673
674 }
675 }