View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.csv.writer;
20  
21  import java.io.BufferedReader;
22  import java.io.InputStream;
23  import java.io.InputStreamReader;
24  
25  /**
26   * Tries to guess a config based on an InputStream.
27   *
28   * @author Martin van den Bemt
29   * @version $Id: $
30   */
31  public class CSVConfigGuesser {
32  
33      /** The stream to read */
34      private InputStream in;
35      /** 
36       * if the file has a field header (need this info, to be able to guess better)
37       * Defaults to false
38       */
39      private boolean hasFieldHeader = false;
40      /** The found config */
41      protected CSVConfig config;
42      
43      /**
44       * 
45       */
46      public CSVConfigGuesser() {
47          this.config = new CSVConfig();
48      }
49      
50      /**
51       * @param in the inputstream to guess from
52       */
53      public CSVConfigGuesser(InputStream in) {
54          this();
55          setInputStream(in);
56      }
57      
58      public void setInputStream(InputStream in) {
59          this.in = in;
60      }
61      
62      /**
63       * Allow override.
64       * @return the inputstream that was set.
65       */
66      protected InputStream getInputStream() {
67          return in;
68      }
69      
70      /**
71       * Guess the config based on the first 10 (or less when less available) 
72       * records of a CSV file.
73       * 
74       * @return the guessed config.
75       */
76      public CSVConfig guess() {
77          try {
78              // tralalal
79              BufferedReader bIn = new BufferedReader(new InputStreamReader((getInputStream())));
80              String[] lines = new String[10];
81              String line = null;
82              int counter = 0;
83              while ( (line = bIn.readLine()) != null && counter <= 10) {
84                  lines[counter] = line;
85                  counter++;
86              }
87              if (counter < 10) {
88                  // remove nulls from the array, so we can skip the null checking.
89                  String[] newLines = new String[counter];
90                  System.arraycopy(lines, 0, newLines, 0, counter);
91                  lines = newLines;
92              }
93              analyseLines(lines);
94          } catch(Exception e) {
95              e.printStackTrace();
96          } finally {
97              if (in != null) {
98                  try {
99                      in.close();
100                 } catch(Exception e) {
101                     // ignore exception.
102                 }
103             }
104         }
105         CSVConfig conf = config;
106         // cleanup the config.
107         config = null;
108         return conf;
109     }
110     
111     protected void analyseLines(String[] lines) {
112         guessFixedWidth(lines);
113         guessFieldSeperator(lines);
114     }
115     
116     /**
117      * Guess if this file is fixedwidth.
118      * Just basing the fact on all lines being of the same length
119      * @param lines
120      */
121     protected void guessFixedWidth(String[] lines) {
122         int lastLength = 0;
123         // assume fixedlength.
124         config.setFixedWidth(true);
125         for (int i = 0; i < lines.length; i++) {
126             if (i == 0) {
127                 lastLength = lines[i].length();
128             } else {
129                 if (lastLength != lines[i].length()) {
130                     config.setFixedWidth(false);
131                 }
132             }
133         }
134     }
135         
136 
137     protected void guessFieldSeperator(String[] lines) {
138         if (config.isFixedWidth()) {
139             guessFixedWidthSeperator(lines);
140             return;
141         }
142         for (int i = 0; i < lines.length; i++) {
143         }
144     }
145     
146     protected void guessFixedWidthSeperator(String[] lines) {
147         // keep track of the fieldlength
148         int previousMatch = -1;
149         for (int i = 0; i < lines[0].length(); i++) {
150             char last = ' ';
151             boolean charMatches = true;
152             for (int j = 0; j < lines.length; j++) {
153                 if (j == 0) {
154                     last = lines[j].charAt(i);
155                 }
156                 if (last != lines[j].charAt(i)) {
157                     charMatches = false;
158                     break;
159                 } 
160             }
161             if (charMatches) {
162                 if (previousMatch == -1) {
163                     previousMatch = 0;
164                 }
165                 CSVField field = new CSVField();
166                 field.setName("field"+config.getFields().length+1);
167                 field.setSize((i-previousMatch));
168                 config.addField(field);
169             }
170         }
171     }
172     /**
173      * 
174      * @return if the field uses a field header. Defaults to false.
175      */
176     public boolean hasFieldHeader() {
177         return hasFieldHeader;
178     }
179 
180     /**
181      * Specify if the CSV file has a field header
182      * @param hasFieldHeader true or false
183      */
184     public void setHasFieldHeader(boolean hasFieldHeader) {
185         this.hasFieldHeader = hasFieldHeader;
186     }
187     
188  
189 }