TextFacetDefinitionReader.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.geometry.io.euclidean.threed.txt;

  18. import java.io.Reader;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.List;

  22. import org.apache.commons.geometry.euclidean.threed.Vector3D;
  23. import org.apache.commons.geometry.io.core.internal.GeometryIOUtils;
  24. import org.apache.commons.geometry.io.core.internal.SimpleTextParser;
  25. import org.apache.commons.geometry.io.euclidean.threed.FacetDefinition;
  26. import org.apache.commons.geometry.io.euclidean.threed.FacetDefinitionReader;
  27. import org.apache.commons.geometry.io.euclidean.threed.SimpleFacetDefinition;

  28. /** Facet definition reader implementation that reads an extremely simple
  29.  * text format. The format simply consists of sequences of decimal numbers
  30.  * defining the vertices of each facet, with one facet defined per line.
  31.  * Facet vertices are defined by listing their {@code x}, {@code y}, and {@code z}
  32.  * components in that order. The format can be described as follows:
  33.  * <p>
  34.  * <code>
  35.  *      p1<sub>x</sub> p1<sub>y</sub> p1<sub>z</sub> p2<sub>x</sub> p2<sub>y</sub> p2<sub>z</sub> p3<sub>x</sub> p3<sub>y</sub> p3<sub>z</sub> ...
  36.  * </code>
  37.  * </p>
  38.  * <p>where the <em>p1</em> elements contain the coordinates of the first facet vertex,
  39.  * <em>p2</em> those of the second, and so on. At least 3 vertices are required for each
  40.  * facet but more can be specified as long as all {@code x, y, z} components are provided
  41.  * for each vertex. The facet normal is defined implicitly from the facet vertices using
  42.  * the right-hand rule (i.e. vertices are arranged counter-clockwise).</p>
  43.  *
  44.  * <p><strong>Delimiters</strong></p>
  45.  * <p>Vertex coordinate values may be separated by any character that is
  46.  * not a digit, alphabetic, '-' (minus), or '+' (plus). The character does
  47.  * not need to be consistent between (or even within) lines and does not
  48.  * need to be configured in the reader. This design provides configuration-free
  49.  * support for common formats such as CSV as well as other formats designed
  50.  * for human readability.</p>
  51.  *
  52.  * <p><strong>Comments</strong></p>
  53.  * <p>Comments are supported through use of the {@link #getCommentToken() comment token}
  54.  * property. Characters from the comment token through the end of the current line are
  55.  * discarded. Setting the comment token to null or the empty string disables comment parsing.
  56.  * The default comment token is {@value #DEFAULT_COMMENT_TOKEN}</p>
  57.  *
  58.  * <p><strong>Examples</strong></p>
  59.  * <p>The following examples demonstrate the definition of two facets,
  60.  * one with 3 vertices and one with 4 vertices, in different formats.</p>
  61.  * <p><em>CSV</em></p>
  62.  * <pre>
  63.  *  0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0
  64.  *  1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0
  65.  * </pre>
  66.  * <p><em>Whitespace and semicolons</em></p>
  67.  * <pre>
  68.  *  # line comment
  69.  *  0 0 0; 1 0 0; 1 1 0 # 3 vertices
  70.  *  1 0 0; 1 1 0; 1 1 1; 1 0 1 # 4 vertices
  71.  * </pre>
  72.  *
  73.  * @see TextFacetDefinitionWriter
  74.  */
  75. public class TextFacetDefinitionReader implements FacetDefinitionReader {

  76.     /** Default comment token string. */
  77.     public static final String DEFAULT_COMMENT_TOKEN = "#";

  78.     /** Reader for accessing the character stream. */
  79.     private final Reader reader;

  80.     /** Parser used to parse text content. */
  81.     private final SimpleTextParser parser;

  82.     /** Comment token string; may be null. */
  83.     private String commentToken;

  84.     /** True if the instance has a non-null, non-empty comment token. */
  85.     private boolean hasCommentToken;

  86.     /** First character of the comment token. */
  87.     private int commentStartChar;

  88.     /** Construct a new instance that reads characters from the argument and uses
  89.      * the default comment token value of {@value TextFacetDefinitionReader#DEFAULT_COMMENT_TOKEN}.
  90.      * @param reader reader to read characters from
  91.      */
  92.     public TextFacetDefinitionReader(final Reader reader) {
  93.         this(reader, DEFAULT_COMMENT_TOKEN);
  94.     }

  95.     /** Construct a new instance with the given reader and comment token.
  96.      * @param reader reader to read characters from
  97.      * @param commentToken comment token string; set to null to disable comment parsing
  98.      * @throws IllegalArgumentException if {@code commentToken} is non-null and contains whitespace
  99.      */
  100.     public TextFacetDefinitionReader(final Reader reader, final String commentToken) {
  101.         this.reader = reader;
  102.         this.parser = new SimpleTextParser(reader);

  103.         setCommentTokenInternal(commentToken);
  104.     }

  105.     /** Get the comment token string. If not null or empty, any characters from
  106.      * this token to the end of the current line are discarded during parsing.
  107.      * @return comment token string; may be null
  108.      */
  109.     public String getCommentToken() {
  110.         return commentToken;
  111.     }

  112.     /** Set the comment token string. If not null or empty, any characters from this
  113.      * token to the end of the current line are discarded during parsing. Set to null
  114.      * or the empty string to disable comment parsing. Comment tokens may not contain
  115.      * whitespace.
  116.      * @param commentToken token to set
  117.      * @throws IllegalArgumentException if the argument is non-null and contains whitespace
  118.      */
  119.     public void setCommentToken(final String commentToken) {
  120.         setCommentTokenInternal(commentToken);
  121.     }

  122.     /** {@inheritDoc} */
  123.     @Override
  124.     public FacetDefinition readFacet() {
  125.         discardNonDataLines();
  126.         if (parser.hasMoreCharacters()) {
  127.             try {
  128.                 return readFacetInternal();
  129.             } finally {
  130.                 // advance to the next line even if parsing failed for the
  131.                 // current line
  132.                 parser.discardLine();
  133.             }
  134.         }
  135.         return null;
  136.     }

  137.     /** {@inheritDoc} */
  138.     @Override
  139.     public void close() {
  140.         GeometryIOUtils.closeUnchecked(reader);
  141.     }

  142.     /** Internal method to read a facet definition starting from the current parser
  143.      * position. Empty lines (including lines containing only comments) are discarded.
  144.      * @return facet definition or null if the end of input is reached
  145.      * @throws IllegalStateException if a data format error occurs
  146.      * @throws java.io.UncheckedIOException if an I/O error occurs
  147.      */
  148.     private FacetDefinition readFacetInternal() {
  149.         final Vector3D p1 = readVector();
  150.         discardNonData();
  151.         final Vector3D p2 = readVector();
  152.         discardNonData();
  153.         final Vector3D p3 = readVector();

  154.         final List<Vector3D> vertices;

  155.         discardNonData();
  156.         if (parser.hasMoreCharactersOnLine()) {
  157.             vertices = new ArrayList<>();
  158.             vertices.add(p1);
  159.             vertices.add(p2);
  160.             vertices.add(p3);

  161.             do {
  162.                 vertices.add(readVector());
  163.                 discardNonData();
  164.             } while (parser.hasMoreCharactersOnLine());
  165.         } else {
  166.             vertices = Arrays.asList(p1, p2, p3);
  167.         }

  168.         return new SimpleFacetDefinition(vertices);
  169.     }

  170.     /** Read a vector starting from the current parser position.
  171.      * @return vector read from the parser
  172.      * @throws IllegalStateException if a data format error occurs
  173.      * @throws java.io.UncheckedIOException if an I/O error occurs
  174.      */
  175.     private Vector3D readVector() {
  176.         final double x = readDouble();
  177.         discardNonData();
  178.         final double y = readDouble();
  179.         discardNonData();
  180.         final double z = readDouble();

  181.         return Vector3D.of(x, y, z);
  182.     }

  183.     /** Read a double starting from the current parser position.
  184.      * @return double value read from the parser
  185.      * @throws IllegalStateException if a data format error occurs
  186.      * @throws java.io.UncheckedIOException if an I/O error occurs
  187.      */
  188.     private double readDouble() {
  189.         return parser
  190.                 .next(TextFacetDefinitionReader::isDataTokenPart)
  191.                 .getCurrentTokenAsDouble();
  192.     }

  193.     /** Discard lines that do not contain any data. This includes empty lines
  194.      * and lines that only contain comments.
  195.      * @throws IllegalStateException if a data format error occurs
  196.      * @throws java.io.UncheckedIOException if an I/O error occurs
  197.      */
  198.     private void discardNonDataLines() {
  199.         parser.discardLineWhitespace();
  200.         while (parser.hasMoreCharacters() &&
  201.                 (!parser.hasMoreCharactersOnLine() ||
  202.                 foundComment())) {

  203.             parser
  204.                 .discardLine()
  205.                 .discardLineWhitespace();
  206.         }
  207.     }

  208.     /** Discard a sequence of non-data characters on the current line starting
  209.      * from the current parser position.
  210.      * @throws IllegalStateException if a data format error occurs
  211.      * @throws java.io.UncheckedIOException if an I/O error occurs
  212.      */
  213.     private void discardNonData() {
  214.         parser.discard(c ->
  215.             !SimpleTextParser.isNewLinePart(c) &&
  216.             !isDataTokenPart(c) &&
  217.             c != commentStartChar);

  218.         if (foundComment()) {
  219.             // discard everything to the end of the line but do
  220.             // not read the new line sequence
  221.             parser.discard(SimpleTextParser::isNotNewLinePart);
  222.         }
  223.     }

  224.     /** Return true if the parser is positioned at the start of the comment token.
  225.      * @return true if the parser is positioned at the start of the comment token.
  226.      * @throws IllegalStateException if a data format error occurs
  227.      * @throws java.io.UncheckedIOException if an I/O error occurs
  228.      */
  229.     private boolean foundComment() {
  230.         return hasCommentToken &&
  231.                 commentToken.equals(parser.peek(commentToken.length()));
  232.     }

  233.     /** Internal method called to set the comment token state.
  234.      * @param commentTokenStr comment token to set
  235.      * @throws IllegalArgumentException if the argument is non-null and contains whitespace
  236.      */
  237.     private void setCommentTokenInternal(final String commentTokenStr) {
  238.         if (commentTokenStr != null && containsWhitespace(commentTokenStr)) {
  239.             throw new IllegalArgumentException("Comment token cannot contain whitespace; was [" +
  240.                     commentTokenStr + "]");
  241.         }

  242.         this.commentToken = commentTokenStr;
  243.         this.hasCommentToken = commentTokenStr != null && commentTokenStr.length() > 0;
  244.         this.commentStartChar = this.hasCommentToken ?
  245.                 commentTokenStr.charAt(0) :
  246.                 -1;
  247.     }

  248.     /** Return true if the given character is considered as part of a data token
  249.      * for this reader.
  250.      * @param ch character to test
  251.      * @return true if {@code ch} is part of a data token
  252.      */
  253.     private static boolean isDataTokenPart(final int ch) {
  254.         // include all alphabetic characters in the data tokens, which will help
  255.         // to provide better error messages in case of failure (ie, tokens will
  256.         // be split more naturally)
  257.         return Character.isAlphabetic(ch) ||
  258.                 SimpleTextParser.isDecimalPart(ch);
  259.     }

  260.     /** Return true if the given string contains any whitespace characters.
  261.      * @param str string to test
  262.      * @return true if {@code str} contains any whitespace characters
  263.      */
  264.     private static boolean containsWhitespace(final String str) {
  265.         for (final char ch : str.toCharArray()) {
  266.             if (Character.isWhitespace(ch)) {
  267.                 return true;
  268.             }
  269.         }

  270.         return false;
  271.     }
  272. }