View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.geometry.io.euclidean.threed.txt;
18  
19  import java.io.Reader;
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.List;
23  
24  import org.apache.commons.geometry.euclidean.threed.Vector3D;
25  import org.apache.commons.geometry.io.core.internal.GeometryIOUtils;
26  import org.apache.commons.geometry.io.core.internal.SimpleTextParser;
27  import org.apache.commons.geometry.io.euclidean.threed.FacetDefinition;
28  import org.apache.commons.geometry.io.euclidean.threed.FacetDefinitionReader;
29  import org.apache.commons.geometry.io.euclidean.threed.SimpleFacetDefinition;
30  
31  /** Facet definition reader implementation that reads an extremely simple
32   * text format. The format simply consists of sequences of decimal numbers
33   * defining the vertices of each facet, with one facet defined per line.
34   * Facet vertices are defined by listing their {@code x}, {@code y}, and {@code z}
35   * components in that order. The format can be described as follows:
36   * <p>
37   * <code>
38   *      p1<sub>x</sub> p1<sub>y</sub> p1<sub>z</sub> p2<sub>x</sub> p2<sub>y</sub> p2<sub>z</sub> p3<sub>x</sub> p3<sub>y</sub> p3<sub>z</sub> ...
39   * </code>
40   * </p>
41   * <p>where the <em>p1</em> elements contain the coordinates of the first facet vertex,
42   * <em>p2</em> those of the second, and so on. At least 3 vertices are required for each
43   * facet but more can be specified as long as all {@code x, y, z} components are provided
44   * for each vertex. The facet normal is defined implicitly from the facet vertices using
45   * the right-hand rule (i.e. vertices are arranged counter-clockwise).</p>
46   *
47   * <p><strong>Delimiters</strong></p>
48   * <p>Vertex coordinate values may be separated by any character that is
49   * not a digit, alphabetic, '-' (minus), or '+' (plus). The character does
50   * not need to be consistent between (or even within) lines and does not
51   * need to be configured in the reader. This design provides configuration-free
52   * support for common formats such as CSV as well as other formats designed
53   * for human readability.</p>
54   *
55   * <p><strong>Comments</strong></p>
56   * <p>Comments are supported through use of the {@link #getCommentToken() comment token}
57   * property. Characters from the comment token through the end of the current line are
58   * discarded. Setting the comment token to null or the empty string disables comment parsing.
59   * The default comment token is {@value #DEFAULT_COMMENT_TOKEN}</p>
60   *
61   * <p><strong>Examples</strong></p>
62   * <p>The following examples demonstrate the definition of two facets,
63   * one with 3 vertices and one with 4 vertices, in different formats.</p>
64   * <p><em>CSV</em></p>
65   * <pre>
66   *  0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0
67   *  1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0
68   * </pre>
69   * <p><em>Whitespace and semicolons</em></p>
70   * <pre>
71   *  # line comment
72   *  0 0 0; 1 0 0; 1 1 0 # 3 vertices
73   *  1 0 0; 1 1 0; 1 1 1; 1 0 1 # 4 vertices
74   * </pre>
75   *
76   * @see TextFacetDefinitionWriter
77   */
78  public class TextFacetDefinitionReader implements FacetDefinitionReader {
79  
80      /** Default comment token string. */
81      public static final String DEFAULT_COMMENT_TOKEN = "#";
82  
83      /** Reader for accessing the character stream. */
84      private final Reader reader;
85  
86      /** Parser used to parse text content. */
87      private final SimpleTextParser parser;
88  
89      /** Comment token string; may be null. */
90      private String commentToken;
91  
92      /** True if the instance has a non-null, non-empty comment token. */
93      private boolean hasCommentToken;
94  
95      /** First character of the comment token. */
96      private int commentStartChar;
97  
98      /** Construct a new instance that reads characters from the argument and uses
99       * the default comment token value of {@value TextFacetDefinitionReader#DEFAULT_COMMENT_TOKEN}.
100      * @param reader reader to read characters from
101      */
102     public TextFacetDefinitionReader(final Reader reader) {
103         this(reader, DEFAULT_COMMENT_TOKEN);
104     }
105 
106     /** Construct a new instance with the given reader and comment token.
107      * @param reader reader to read characters from
108      * @param commentToken comment token string; set to null to disable comment parsing
109      * @throws IllegalArgumentException if {@code commentToken} is non-null and contains whitespace
110      */
111     public TextFacetDefinitionReader(final Reader reader, final String commentToken) {
112         this.reader = reader;
113         this.parser = new SimpleTextParser(reader);
114 
115         setCommentTokenInternal(commentToken);
116     }
117 
118     /** Get the comment token string. If not null or empty, any characters from
119      * this token to the end of the current line are discarded during parsing.
120      * @return comment token string; may be null
121      */
122     public String getCommentToken() {
123         return commentToken;
124     }
125 
126     /** Set the comment token string. If not null or empty, any characters from this
127      * token to the end of the current line are discarded during parsing. Set to null
128      * or the empty string to disable comment parsing. Comment tokens may not contain
129      * whitespace.
130      * @param commentToken token to set
131      * @throws IllegalArgumentException if the argument is non-null and contains whitespace
132      */
133     public void setCommentToken(final String commentToken) {
134         setCommentTokenInternal(commentToken);
135     }
136 
137     /** {@inheritDoc} */
138     @Override
139     public FacetDefinition readFacet() {
140         discardNonDataLines();
141         if (parser.hasMoreCharacters()) {
142             try {
143                 return readFacetInternal();
144             } finally {
145                 // advance to the next line even if parsing failed for the
146                 // current line
147                 parser.discardLine();
148             }
149         }
150         return null;
151     }
152 
153     /** {@inheritDoc} */
154     @Override
155     public void close() {
156         GeometryIOUtils.closeUnchecked(reader);
157     }
158 
159     /** Internal method to read a facet definition starting from the current parser
160      * position. Empty lines (including lines containing only comments) are discarded.
161      * @return facet definition or null if the end of input is reached
162      * @throws IllegalStateException if a data format error occurs
163      * @throws java.io.UncheckedIOException if an I/O error occurs
164      */
165     private FacetDefinition readFacetInternal() {
166         final Vector3D p1 = readVector();
167         discardNonData();
168         final Vector3D p2 = readVector();
169         discardNonData();
170         final Vector3D p3 = readVector();
171 
172         final List<Vector3D> vertices;
173 
174         discardNonData();
175         if (parser.hasMoreCharactersOnLine()) {
176             vertices = new ArrayList<>();
177             vertices.add(p1);
178             vertices.add(p2);
179             vertices.add(p3);
180 
181             do {
182                 vertices.add(readVector());
183                 discardNonData();
184             } while (parser.hasMoreCharactersOnLine());
185         } else {
186             vertices = Arrays.asList(p1, p2, p3);
187         }
188 
189         return new SimpleFacetDefinition(vertices);
190     }
191 
192     /** Read a vector starting from the current parser position.
193      * @return vector read from the parser
194      * @throws IllegalStateException if a data format error occurs
195      * @throws java.io.UncheckedIOException if an I/O error occurs
196      */
197     private Vector3D readVector() {
198         final double x = readDouble();
199         discardNonData();
200         final double y = readDouble();
201         discardNonData();
202         final double z = readDouble();
203 
204         return Vector3D.of(x, y, z);
205     }
206 
207     /** Read a double starting from the current parser position.
208      * @return double value read from the parser
209      * @throws IllegalStateException if a data format error occurs
210      * @throws java.io.UncheckedIOException if an I/O error occurs
211      */
212     private double readDouble() {
213         return parser
214                 .next(TextFacetDefinitionReader::isDataTokenPart)
215                 .getCurrentTokenAsDouble();
216     }
217 
218     /** Discard lines that do not contain any data. This includes empty lines
219      * and lines that only contain comments.
220      * @throws IllegalStateException if a data format error occurs
221      * @throws java.io.UncheckedIOException if an I/O error occurs
222      */
223     private void discardNonDataLines() {
224         parser.discardLineWhitespace();
225         while (parser.hasMoreCharacters() &&
226                 (!parser.hasMoreCharactersOnLine() ||
227                 foundComment())) {
228 
229             parser
230                 .discardLine()
231                 .discardLineWhitespace();
232         }
233     }
234 
235     /** Discard a sequence of non-data characters on the current line starting
236      * from the current parser position.
237      * @throws IllegalStateException if a data format error occurs
238      * @throws java.io.UncheckedIOException if an I/O error occurs
239      */
240     private void discardNonData() {
241         parser.discard(c ->
242             !SimpleTextParser.isNewLinePart(c) &&
243             !isDataTokenPart(c) &&
244             c != commentStartChar);
245 
246         if (foundComment()) {
247             // discard everything to the end of the line but do
248             // not read the new line sequence
249             parser.discard(SimpleTextParser::isNotNewLinePart);
250         }
251     }
252 
253     /** Return true if the parser is positioned at the start of the comment token.
254      * @return true if the parser is positioned at the start of the comment token.
255      * @throws IllegalStateException if a data format error occurs
256      * @throws java.io.UncheckedIOException if an I/O error occurs
257      */
258     private boolean foundComment() {
259         return hasCommentToken &&
260                 commentToken.equals(parser.peek(commentToken.length()));
261     }
262 
263     /** Internal method called to set the comment token state.
264      * @param commentTokenStr comment token to set
265      * @throws IllegalArgumentException if the argument is non-null and contains whitespace
266      */
267     private void setCommentTokenInternal(final String commentTokenStr) {
268         if (commentTokenStr != null && containsWhitespace(commentTokenStr)) {
269             throw new IllegalArgumentException("Comment token cannot contain whitespace; was [" +
270                     commentTokenStr + "]");
271         }
272 
273         this.commentToken = commentTokenStr;
274         this.hasCommentToken = commentTokenStr != null && commentTokenStr.length() > 0;
275         this.commentStartChar = this.hasCommentToken ?
276                 commentTokenStr.charAt(0) :
277                 -1;
278     }
279 
280     /** Return true if the given character is considered as part of a data token
281      * for this reader.
282      * @param ch character to test
283      * @return true if {@code ch} is part of a data token
284      */
285     private static boolean isDataTokenPart(final int ch) {
286         // include all alphabetic characters in the data tokens, which will help
287         // to provide better error messages in case of failure (ie, tokens will
288         // be split more naturally)
289         return Character.isAlphabetic(ch) ||
290                 SimpleTextParser.isDecimalPart(ch);
291     }
292 
293     /** Return true if the given string contains any whitespace characters.
294      * @param str string to test
295      * @return true if {@code str} contains any whitespace characters
296      */
297     private static boolean containsWhitespace(final String str) {
298         for (final char ch : str.toCharArray()) {
299             if (Character.isWhitespace(ch)) {
300                 return true;
301             }
302         }
303 
304         return false;
305     }
306 }