SimpleTextParser.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.geometry.io.core.internal;

  18. import java.io.Reader;
  19. import java.util.Arrays;
  20. import java.util.List;
  21. import java.util.function.IntConsumer;
  22. import java.util.function.IntPredicate;

  23. /** Class providing basic text parsing capabilities. The goals of this class are to
  24.  * (1) provide a simple, flexible API for performing common text parsing operations and
  25.  * (2) provide a mechanism for creating consistent and informative parsing errors.
  26.  * This class is not intended as a replacement for grammar-based parsers and/or lexers.
  27.  */
  28. public class SimpleTextParser {

  29.     /** Constant indicating that the end of the input has been reached. */
  30.     private static final int EOF = -1;

  31.     /** Carriage return character. */
  32.     private static final char CR = '\r';

  33.     /** Line feed character. */
  34.     private static final char LF = '\n';

  35.     /** Default value for the max string length property. */
  36.     private static final int DEFAULT_MAX_STRING_LENGTH = 1024;

  37.     /** Error message used when a string exceeds the configured maximum length. */
  38.     private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of ";

  39.     /** Initial token position number. */
  40.     private static final int INITIAL_TOKEN_POS = -1;

  41.     /** Int consumer that does nothing. */
  42.     private static final IntConsumer NOOP_CONSUMER = ch -> { };

  43.     /** Current line number; line numbers start counting at 1. */
  44.     private int lineNumber = 1;

  45.     /** Current character column on the current line; column numbers start at 1.*/
  46.     private int columnNumber = 1;

  47.     /** Maximum length for strings returned by this instance. */
  48.     private int maxStringLength = DEFAULT_MAX_STRING_LENGTH;

  49.     /** The current token. */
  50.     private String currentToken;

  51.     /** The line number that the current token started on. */
  52.     private int currentTokenLineNumber = INITIAL_TOKEN_POS;

  53.     /** The character number that the current token started on. */
  54.     private int currentTokenColumnNumber = INITIAL_TOKEN_POS;

  55.     /** Flag used to indicate that at least one token has been read from the stream. */
  56.     private boolean hasSetToken;

  57.     /** Character read buffer used to access the character stream. */
  58.     private final CharReadBuffer buffer;

  59.     /** Construct a new instance that reads characters from the given reader. The
  60.      * reader will not be closed.
  61.      * @param reader reader instance to read characters from
  62.      */
  63.     public SimpleTextParser(final Reader reader) {
  64.         this(new CharReadBuffer(reader));
  65.     }

  66.     /** Construct a new instance that reads characters from the given character buffer.
  67.      * @param buffer read buffer to read characters from
  68.      */
  69.     public SimpleTextParser(final CharReadBuffer buffer) {
  70.         this.buffer = buffer;
  71.     }

  72.     /** Get the current line number. Line numbers start at 1.
  73.      * @return the current line number
  74.      */
  75.     public int getLineNumber() {
  76.         return lineNumber;
  77.     }

  78.     /** Set the current line number. This does not affect the character stream position,
  79.      * only the value returned by {@link #getLineNumber()}.
  80.      * @param lineNumber line number to set; line numbers start at 1
  81.      */
  82.     public void setLineNumber(final int lineNumber) {
  83.         this.lineNumber = lineNumber;
  84.     }

  85.     /** Get the current column number. This indicates the column position of the
  86.      * character that will returned by the next call to {@link #readChar()}. The first
  87.      * character of each line has a column number of 1.
  88.      * @return the current column number; column numbers start at 1
  89.      */
  90.     public int getColumnNumber() {
  91.         return columnNumber;
  92.     }

  93.     /** Set the current column number. This does not affect the character stream position,
  94.      * only the value returned by {@link #getColumnNumber()}.
  95.      * @param column the column number to set; column numbers start at 1
  96.      */
  97.     public void setColumnNumber(final int column) {
  98.         this.columnNumber = column;
  99.     }

  100.     /** Get the maximum length for strings returned by this instance. Operations
  101.      * that produce strings longer than this length will throw an exception.
  102.      * @return maximum length for strings returned by this instance
  103.      */
  104.     public int getMaxStringLength() {
  105.         return maxStringLength;
  106.     }

  107.     /** Set the maximum length for strings returned by this instance. Operations
  108.      * that produce strings longer than this length will throw an exception.
  109.      * @param maxStringLength maximum length for strings returned by this instance
  110.      * @throws IllegalArgumentException if the argument is less than zero
  111.      */
  112.     public void setMaxStringLength(final int maxStringLength) {
  113.         if (maxStringLength < 0) {
  114.             throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " +
  115.                     maxStringLength);
  116.         }
  117.         this.maxStringLength = maxStringLength;
  118.     }

  119.     /** Get the current token. This is the most recent string read by one of the {@code nextXXX()}
  120.      * methods. This value will be null if no token has yet been read or if the end of content has
  121.      * been reached.
  122.      * @return the current token
  123.      * @see #next(int)
  124.      * @see #next(IntPredicate)
  125.      * @see #nextLine()
  126.      * @see #nextAlphanumeric()
  127.      */
  128.     public String getCurrentToken() {
  129.         return currentToken;
  130.     }

  131.     /** Return true if the current token is not null or empty.
  132.      * @return true if the current token is not null or empty
  133.      * @see #getCurrentToken()
  134.      */
  135.     public boolean hasNonEmptyToken() {
  136.         return currentToken != null && !currentToken.isEmpty();
  137.     }

  138.     /** Get the line number that the current token started on. This value will
  139.      * be -1 if no token has been read yet.
  140.      * @return current token starting line number or -1 if no token has been
  141.      *      read yet
  142.      * @see #getCurrentToken()
  143.      */
  144.     public int getCurrentTokenLineNumber() {
  145.         return currentTokenLineNumber;
  146.     }

  147.     /** Get the column position that the current token started on. This value will
  148.      * be -1 if no token has been read yet.
  149.      * @return current token column number or -1 if no oken has been read yet
  150.      * @see #getCurrentToken()
  151.      */
  152.     public int getCurrentTokenColumnNumber() {
  153.         return currentTokenColumnNumber;
  154.     }

  155.     /** Get the current token parsed as an integer.
  156.      * @return the current token parsed as an integer
  157.      * @throws IllegalStateException if no token has been read or the
  158.      *      current token cannot be parsed as an integer
  159.      */
  160.     public int getCurrentTokenAsInt() {
  161.         ensureHasSetToken();

  162.         Throwable cause = null;

  163.         if (currentToken != null) {
  164.             try {
  165.                 return Integer.parseInt(currentToken);
  166.             } catch (NumberFormatException exc) {
  167.                 cause = exc;
  168.             }
  169.         }

  170.         throw unexpectedToken("integer", cause);
  171.     }

  172.     /** Get the current token parsed as a double.
  173.      * @return the current token parsed as a double
  174.      * @throws IllegalStateException if no token has been read or the
  175.      *      current token cannot be parsed as a double
  176.      */
  177.     public double getCurrentTokenAsDouble() {
  178.         ensureHasSetToken();

  179.         Throwable cause = null;

  180.         if (currentToken != null) {
  181.             try {
  182.                 return Double.parseDouble(currentToken);
  183.             } catch (NumberFormatException exc) {
  184.                 cause = exc;
  185.             }
  186.         }

  187.         throw unexpectedToken("double", cause);
  188.     }

  189.     /** Return true if there are more characters to read from this instance.
  190.      * @return true if there are more characters to read from this instance
  191.      * @throws java.io.UncheckedIOException if an I/O error occurs
  192.      */
  193.     public boolean hasMoreCharacters() {
  194.         return buffer.hasMoreCharacters();
  195.     }

  196.     /** Return true if there are more characters to read on the current line.
  197.      * @return true if there are more characters to read on the current line
  198.      * @throws java.io.UncheckedIOException if an I/O error occurs
  199.      */
  200.     public boolean hasMoreCharactersOnLine() {
  201.         return hasMoreCharacters() && isNotNewLinePart(peekChar());
  202.     }

  203.     /** Read and return the next character in the stream and advance the parser position.
  204.      * This method updates the current line number and column number but does <strong>not</strong>
  205.      * set the {@link #getCurrentToken() current token}.
  206.      * @return the next character in the stream or -1 if the end of the stream has been
  207.      *      reached
  208.      * @throws java.io.UncheckedIOException if an I/O error occurs
  209.      * @see #peekChar()
  210.      */
  211.     public int readChar() {
  212.         final int value = buffer.read();
  213.         if (value == LF ||
  214.                 (value == CR && peekChar() != LF)) {
  215.             ++lineNumber;
  216.             columnNumber = 1;
  217.         } else if (value != EOF) {
  218.             ++columnNumber;
  219.         }

  220.         return value;
  221.     }

  222.     /** Read a string containing at most {@code len} characters from the stream and
  223.      * set it as the current token. Characters are added to the string until the string
  224.      * has the specified length or the end of the stream is reached. The characters are
  225.      * consumed from the stream. The token is set to null if no more characters are available
  226.      * from the character stream when this method is called.
  227.      * @param len the maximum length of the extracted string
  228.      * @return this instance
  229.      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
  230.      *      configured {@link #getMaxStringLength() maximum string length}
  231.      * @throws java.io.UncheckedIOException if an I/O error occurs
  232.      * @see #getCurrentToken()
  233.      * @see #consume(int, IntConsumer)
  234.      */
  235.     public SimpleTextParser next(final int len) {
  236.         validateRequestedStringLength(len);

  237.         final int line = getLineNumber();
  238.         final int col = getColumnNumber();

  239.         String token = null;
  240.         if (hasMoreCharacters()) {
  241.             final StringBuilder sb = new StringBuilder(len);

  242.             consume(len, ch -> sb.append((char) ch));

  243.             token = sb.toString();
  244.         }

  245.         setToken(line, col, token);

  246.         return this;
  247.     }

  248.     /** Read a string containing at most {@code len} characters from the stream and
  249.      * set it as the current token. This is similar to {@link #next(int)} but with the exception
  250.      * that new line sequences beginning with {@code lineContinuationChar} are skipped.
  251.      * @param lineContinuationChar character used to indicate skipped new line sequences
  252.      * @param len the maximum length of the extracted string
  253.      * @return this instance
  254.      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
  255.      *      configured {@link #getMaxStringLength() maximum string length}
  256.      * @throws java.io.UncheckedIOException if an I/O error occurs
  257.      * @see #getCurrentToken()
  258.      * @see #consumeWithLineContinuation(char, int, IntConsumer)
  259.      */
  260.     public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) {
  261.         validateRequestedStringLength(len);

  262.         final int line = getLineNumber();
  263.         final int col = getColumnNumber();

  264.         String token = null;
  265.         if (hasMoreCharacters()) {
  266.             final StringBuilder sb = new StringBuilder(len);

  267.             consumeWithLineContinuation(lineContinuationChar, len,
  268.                     ch -> sb.append((char) ch));

  269.             token = sb.toString();
  270.         }

  271.         setToken(line, col, token);

  272.         return this;
  273.     }

  274.     /** Read characters from the stream while the given predicate returns true and set the result
  275.      * as the current token. The next call to {@link #readChar()} will return either a character
  276.      * that fails the predicate test or -1 if the end of the stream has been reached.
  277.      * The token will be null if the end of the stream has been reached prior to the method call.
  278.      * @param pred predicate function passed characters read from the input; reading continues
  279.      *      until the predicate returns false
  280.      * @return this instance
  281.      * @throws IllegalStateException if the length of the produced string exceeds the configured
  282.      *      {@link #getMaxStringLength() maximum string length}
  283.      * @throws java.io.UncheckedIOException if an I/O error occurs
  284.      * @see #getCurrentToken()
  285.      * @see #consume(IntPredicate, IntConsumer)
  286.      */
  287.     public SimpleTextParser next(final IntPredicate pred) {
  288.         final int line = getLineNumber();
  289.         final int col = getColumnNumber();

  290.         String token = null;
  291.         if (hasMoreCharacters()) {
  292.             final StringCollector collector = new StringCollector(line, col, pred);

  293.             consume(collector, collector);

  294.             token = collector.getString();
  295.         }

  296.         setToken(line, col, token);

  297.         return this;
  298.     }

  299.     /** Read characters from the stream while the given predicate returns true and set the result
  300.      * as the current token. This is similar to {@link #next(IntPredicate)} but with the exception
  301.      * that new line sequences prefixed with {@code lineContinuationChar} are skipped.
  302.      * @param lineContinuationChar character used to indicate skipped new line sequences
  303.      * @param pred predicate function passed characters read from the input; reading continues
  304.      *      until the predicate returns false
  305.      * @return this instance
  306.      * @throws IllegalStateException if the length of the produced string exceeds the configured
  307.      *      {@link #getMaxStringLength() maximum string length}
  308.      * @throws java.io.UncheckedIOException if an I/O error occurs
  309.      * @see #getCurrentToken()
  310.      * @see #consume(IntPredicate, IntConsumer)
  311.      */
  312.     public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) {
  313.         final int line = getLineNumber();
  314.         final int col = getColumnNumber();

  315.         String token = null;
  316.         if (hasMoreCharacters()) {
  317.             final StringCollector collector = new StringCollector(line, col, pred);

  318.             consumeWithLineContinuation(lineContinuationChar, collector, collector);

  319.             token = collector.getString();
  320.         }

  321.         setToken(line, col, token);

  322.         return this;
  323.     }

  324.     /** Read characters from the current parser position to the next new line sequence and
  325.      * set the result as the current token . The newline character sequence
  326.      * ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token.
  327.      * The token will be null if the end of the stream has been reached prior to the method call.
  328.      * @return this instance
  329.      * @throws IllegalStateException if the length of the produced string exceeds the configured
  330.      *      {@link #getMaxStringLength() maximum string length}
  331.      * @throws java.io.UncheckedIOException if an I/O error occurs
  332.      * @see #getCurrentToken()
  333.      */
  334.     public SimpleTextParser nextLine() {
  335.         next(SimpleTextParser::isNotNewLinePart);

  336.         discardNewLineSequence();

  337.         return this;
  338.     }

  339.     /** Read a sequence of alphanumeric characters starting from the current parser position
  340.      * and set the result as the current token. The token will be the empty string if the next
  341.      * character in the stream is not alphanumeric and will be null if the end of the stream has
  342.      * been reached prior to the method call.
  343.      * @return this instance
  344.      * @throws IllegalStateException if the length of the produced string exceeds the configured
  345.      *      {@link #getMaxStringLength() maximum string length}
  346.      * @throws java.io.UncheckedIOException if an I/O error occurs
  347.      * @see #getCurrentToken()
  348.      */
  349.     public SimpleTextParser nextAlphanumeric() {
  350.         return next(SimpleTextParser::isAlphanumeric);
  351.     }

  352.     /** Discard {@code len} number of characters from the character stream. The
  353.      * parser position is updated but the current token is not changed.
  354.      * @param len number of characters to discard
  355.      * @return this instance
  356.      * @throws java.io.UncheckedIOException if an I/O error occurs
  357.      */
  358.     public SimpleTextParser discard(final int len) {
  359.         return consume(len, NOOP_CONSUMER);
  360.     }

  361.     /** Discard {@code len} number of characters from the character stream. The
  362.      * parser position is updated but the current token is not changed. Lines beginning
  363.      * with {@code lineContinuationChar} are skipped.
  364.      * @param lineContinuationChar character used to indicate skipped new line sequences
  365.      * @param len number of characters to discard
  366.      * @return this instance
  367.      * @throws java.io.UncheckedIOException if an I/O error occurs
  368.      */
  369.     public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
  370.             final int len) {
  371.         return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER);
  372.     }

  373.     /** Discard characters from the stream while the given predicate returns true. The next call
  374.      * to {@link #readChar()} will return either a character that fails the predicate test or -1
  375.      * if the end of the stream has been reached. The parser position is updated but the current
  376.      * token is not changed.
  377.      * @param pred predicate test for characters to discard
  378.      * @return this instance
  379.      * @throws java.io.UncheckedIOException if an I/O error occurs
  380.      */
  381.     public SimpleTextParser discard(final IntPredicate pred) {
  382.         return consume(pred, NOOP_CONSUMER);
  383.     }

  384.     /** Discard characters from the stream while the given predicate returns true. New line sequences
  385.      * beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()}
  386.      * will return either a character that fails the predicate test or -1 if the end of the stream
  387.      * has been reached. The parser position is updated but the current token is not changed.
  388.      * @param lineContinuationChar character used to indicate skipped new line sequences
  389.      * @param pred predicate test for characters to discard
  390.      * @return this instance
  391.      * @throws java.io.UncheckedIOException if an I/O error occurs
  392.      */
  393.     public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
  394.             final IntPredicate pred) {
  395.         return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER);
  396.     }

  397.     /** Discard a sequence of whitespace characters from the character stream starting from the
  398.      * current parser position. The next call to {@link #readChar()} will return either a non-whitespace
  399.      * character or -1 if the end of the stream has been reached. The parser position is updated
  400.      * but the current token is not changed.
  401.      * @return this instance
  402.      * @throws java.io.UncheckedIOException if an I/O error occurs
  403.      */
  404.     public SimpleTextParser discardWhitespace() {
  405.         return discard(SimpleTextParser::isWhitespace);
  406.     }

  407.     /** Discard the next whitespace characters on the current line. The next call to
  408.      * {@link #readChar()} will return either a non-whitespace character on the current line,
  409.      * the newline character sequence (indicating the end of the line), or -1 (indicating the
  410.      * end of the stream). The parser position is updated but the current token is not changed.
  411.      * @return this instance
  412.      * @throws java.io.UncheckedIOException if an I/O error occurs
  413.      */
  414.     public SimpleTextParser discardLineWhitespace() {
  415.         return discard(SimpleTextParser::isLineWhitespace);
  416.     }

  417.     /** Discard the newline character sequence at the current reader position. The sequence
  418.      * is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned
  419.      * at a newline sequence. The parser position is updated but the current token is not changed.
  420.      * @return this instance
  421.      * @throws java.io.UncheckedIOException if an I/O error occurs
  422.      */
  423.     public SimpleTextParser discardNewLineSequence() {
  424.         final int value = peekChar();
  425.         if (value == LF) {
  426.             readChar();
  427.         } else if (value == CR) {
  428.             readChar();

  429.             if (peekChar() == LF) {
  430.                 readChar();
  431.             }
  432.         }

  433.         return this;
  434.     }

  435.     /** Discard all remaining characters on the current line, including the terminating
  436.      * newline character sequence. The next call to {@link #readChar()} will return either the
  437.      * first character on the next line or -1 if the end of the stream has been reached.
  438.      * The parser position is updated but the current token is not changed.
  439.      * @return this instance
  440.      * @throws java.io.UncheckedIOException if an I/O error occurs
  441.      */
  442.     public SimpleTextParser discardLine() {
  443.         discard(SimpleTextParser::isNotNewLinePart);

  444.         discardNewLineSequence();

  445.         return this;
  446.     }

  447.     /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
  448.      * returns true. The operation ends when the predicate returns false or the end of the stream is
  449.      * reached.
  450.      * @param pred predicate test for characters to consume
  451.      * @param consumer object to be passed each consumed character
  452.      * @return this instance
  453.      * @throws java.io.UncheckedIOException if an I/O error occurs
  454.      */
  455.     public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) {
  456.         int ch;
  457.         while ((ch = peekChar()) != EOF && pred.test(ch)) {
  458.             consumer.accept(readChar());
  459.         }

  460.         return this;
  461.     }

  462.     /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
  463.      * This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line
  464.      * sequences prefixed with {@code lineContinuationChar} are skipped.
  465.      * @param lineContinuationChar character used to indicate skipped new line sequences
  466.      * @param len number of characters to consume
  467.      * @param consumer function to be passed each consumed character
  468.      * @return this instance
  469.      * @throws java.io.UncheckedIOException if an I/O error occurs
  470.      */
  471.     public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
  472.             final int len, final IntConsumer consumer) {
  473.         int i = -1;
  474.         int ch;
  475.         while (++i < len && (ch = readChar()) != EOF) {
  476.             if (ch == lineContinuationChar && isNewLinePart(peekChar())) {
  477.                 --i; // don't count the continuation char toward the total length
  478.                 discardNewLineSequence();
  479.             } else {
  480.                 consumer.accept(ch);
  481.             }
  482.         }

  483.         return this;
  484.     }

  485.     /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
  486.      * The operation continues until {@code len} number of characters have been read or the end of
  487.      * the stream has been reached.
  488.      * @param len number of characters to consume
  489.      * @param consumer object to be passed each consumed character
  490.      * @return this instance
  491.      * @throws java.io.UncheckedIOException if an I/O error occurs
  492.      */
  493.     public SimpleTextParser consume(final int len, final IntConsumer consumer) {
  494.         int ch;
  495.         for (int i = 0; i < len; ++i) {
  496.             ch = readChar();
  497.             if (ch != EOF) {
  498.                 consumer.accept(ch);
  499.             } else {
  500.                 break;
  501.             }
  502.         }

  503.         return this;
  504.     }

  505.     /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
  506.      * returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the
  507.      * exception that new lines sequences beginning with {@code lineContinuationChar} are skipped.
  508.      * @param lineContinuationChar character used to indicate skipped new line sequences
  509.      * @param pred predicate test for characters to consume
  510.      * @param consumer object to be passed each consumed character
  511.      * @return this instance
  512.      * @throws java.io.UncheckedIOException if an I/O error occurs
  513.      */
  514.     public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
  515.             final IntPredicate pred, final IntConsumer consumer) {
  516.         int ch;
  517.         while ((ch = peekChar()) != EOF) {
  518.             if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) {
  519.                 readChar();
  520.                 discardNewLineSequence();
  521.             } else if (pred.test(ch)) {
  522.                 consumer.accept(readChar());
  523.             } else {
  524.                 break;
  525.             }
  526.         }

  527.         return this;
  528.     }

  529.     /** Return the next character in the stream but do not advance the parser position.
  530.      * @return the next character in the stream or -1 if the end of the stream has been
  531.      *      reached
  532.      * @throws java.io.UncheckedIOException if an I/O error occurs
  533.      * @see #readChar()
  534.      */
  535.     public int peekChar() {
  536.         return buffer.peek();
  537.     }

  538.     /** Return a string containing containing at most {@code len} characters from the stream but
  539.      * without changing the parser position. Characters are added to the string until the
  540.      * string has the specified length or the end of the stream is reached.
  541.      * @param len the maximum length of the returned string
  542.      * @return a string containing containing at most {@code len} characters from the stream
  543.      *      or null if the parser has already reached the end of the stream
  544.      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
  545.      *      configured {@link #getMaxStringLength() maximum string length}
  546.      * @throws java.io.UncheckedIOException if an I/O error occurs
  547.      * @see #next(int)
  548.      */
  549.     public String peek(final int len) {
  550.         validateRequestedStringLength(len);

  551.         return buffer.peekString(len);
  552.     }

  553.     /** Read characters from the stream while the given predicate returns true but do not
  554.      * change the current token or advance the parser position.
  555.      * @param pred predicate function passed characters read from the input; reading continues
  556.      *      until the predicate returns false
  557.      * @return string containing characters matching {@code pred} or null if the parser has already
  558.      *      reached the end of the stream
  559.      * @throws IllegalStateException if the length of the produced string exceeds the configured
  560.      *      {@link #getMaxStringLength() maximum string length}
  561.      * @throws java.io.UncheckedIOException if an I/O error occurs
  562.      * @see #getCurrentToken()
  563.      */
  564.     public String peek(final IntPredicate pred) {
  565.         String token = null;

  566.         if (hasMoreCharacters()) {
  567.             final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred);

  568.             int i = -1;
  569.             int ch = buffer.charAt(++i);
  570.             while (ch != EOF && collector.test(ch)) {
  571.                 collector.accept(ch);

  572.                 ch = buffer.charAt(++i);
  573.             }

  574.             token = collector.getString();
  575.         }

  576.         return token;
  577.     }

  578.     /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
  579.      * exception if they are not equal. The comparison is case-sensitive.
  580.      * @param expected expected token
  581.      * @return this instance
  582.      * @throws IllegalStateException if no token has been read or {@code expected} does not exactly
  583.      *      equal the current token
  584.      */
  585.     public SimpleTextParser match(final String expected) {
  586.         matchInternal(expected, true, true);
  587.         return this;
  588.     }

  589.     /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
  590.      * exception if they are not equal. The comparison is <em>not</em> case-sensitive.
  591.      * @param expected expected token
  592.      * @return this instance
  593.      * @throws IllegalStateException if no token has been read or {@code expected} does not equal
  594.      *      the current token (ignoring case)
  595.      */
  596.     public SimpleTextParser matchIgnoreCase(final String expected) {
  597.         matchInternal(expected, false, true);
  598.         return this;
  599.     }

  600.     /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
  601.      * The comparison is case-sensitive.
  602.      * @param expected expected token
  603.      * @return true if the argument exactly equals the current token
  604.      * @throws IllegalStateException if no token has been read
  605.      * @throws java.io.UncheckedIOException if an I/O error occurs
  606.      */
  607.     public boolean tryMatch(final String expected) {
  608.         return matchInternal(expected, true, false);
  609.     }

  610.     /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
  611.      * The comparison is <em>not</em> case-sensitive.
  612.      * @param expected expected token
  613.      * @return true if the argument equals the current token (ignoring case)
  614.      * @throws IllegalStateException if no token has been read
  615.      */
  616.     public boolean tryMatchIgnoreCase(final String expected) {
  617.         return matchInternal(expected, false, false);
  618.     }

  619.     /** Internal method to compare the current token with the argument.
  620.      * @param expected expected token
  621.      * @param caseSensitive if the comparison should be case-sensitive
  622.      * @param throwOnFailure if an exception should be thrown if the argument is not
  623.      *      equal to the current token
  624.      * @return true if the argument is equal to the current token
  625.      * @throws IllegalStateException if no token has been read or {@code expected} does not match the
  626.      *      current token and {@code throwOnFailure} is true
  627.      */
  628.     private boolean matchInternal(final String expected, final boolean caseSensitive,
  629.             final boolean throwOnFailure) {
  630.         ensureHasSetToken();

  631.         if (!stringsEqual(expected, currentToken, caseSensitive)) {
  632.             if (throwOnFailure) {
  633.                 throw unexpectedToken("[" + expected + "]");
  634.             }

  635.             return false;
  636.         }

  637.         return true;
  638.     }

  639.     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
  640.      * An exception is thrown if no match is found. String comparisons are case-sensitive.
  641.      * @param expected strings to compare with the current token
  642.      * @return index of the argument that exactly matches the current token
  643.      * @throws IllegalStateException if no token has been read or no match is found among the arguments
  644.      */
  645.     public int choose(final String... expected) {
  646.         return choose(Arrays.asList(expected));
  647.     }

  648.     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
  649.      * An exception is thrown if no match is found. String comparisons are case-sensitive.
  650.      * @param expected strings to compare with the current token
  651.      * @return index of the argument that exactly matches the current token
  652.      * @throws IllegalStateException if no token has been read or no match is found among the arguments
  653.      */
  654.     public int choose(final List<String> expected) {
  655.         return chooseInternal(expected, true, true);
  656.     }

  657.     /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
  658.      * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
  659.      * case-sensitive.
  660.      * @param expected strings to compare with the current token
  661.      * @return index of the argument that matches the current token (ignoring case)
  662.      * @throws IllegalStateException if no token has been read or no match is found among the arguments
  663.      */
  664.     public int chooseIgnoreCase(final String... expected) {
  665.         return chooseIgnoreCase(Arrays.asList(expected));
  666.     }

  667.     /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
  668.      * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
  669.      * case-sensitive.
  670.      * @param expected strings to compare with the current token
  671.      * @return index of the argument that matches the current token (ignoring case)
  672.      * @throws IllegalStateException if no token has been read or no match is found among the arguments
  673.      */
  674.     public int chooseIgnoreCase(final List<String> expected) {
  675.         return chooseInternal(expected, false, true);
  676.     }

  677.     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
  678.      * or -1 if no match is found. String comparisons are case-sensitive.
  679.      * @param expected strings to compare with the current token
  680.      * @return index of the argument that exactly matches the current token or -1 if
  681.      *      no match is found
  682.      * @throws IllegalStateException if no token has been read
  683.      */
  684.     public int tryChoose(final String... expected) {
  685.         return tryChoose(Arrays.asList(expected));
  686.     }

  687.     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
  688.      * or -1 if no match is found. String comparisons are case-sensitive.
  689.      * @param expected strings to compare with the current token
  690.      * @return index of the argument that exactly matches the current token or -1 if
  691.      *      no match is found
  692.      * @throws IllegalStateException if no token has been read
  693.      */
  694.     public int tryChoose(final List<String> expected) {
  695.         return chooseInternal(expected, true, false);
  696.     }

  697.     /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
  698.      * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
  699.      * @param expected strings to compare with the current token
  700.      * @return index of the argument that matches the current token (ignoring case) or -1 if
  701.      *      no match is found
  702.      * @throws IllegalStateException if no token has been read
  703.      */
  704.     public int tryChooseIgnoreCase(final String... expected) {
  705.         return tryChooseIgnoreCase(Arrays.asList(expected));
  706.     }

  707.     /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
  708.      * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
  709.      * @param expected strings to compare with the current token
  710.      * @return index of the argument that matches the current token (ignoring case) or -1 if
  711.      *      no match is found
  712.      * @throws IllegalStateException if no token has been read
  713.      */
  714.     public int tryChooseIgnoreCase(final List<String> expected) {
  715.         return chooseInternal(expected, false, false);
  716.     }

  717.     /** Internal method to compare the current token with a list of possible strings. The index of
  718.      * the matching argument is returned.
  719.      * @param expected strings to compare with the current token
  720.      * @param caseSensitive if the comparisons should be case-sensitive
  721.      * @param throwOnFailure if an exception should be thrown if no match is found
  722.      * @return the index of the matching argument or -1 if no match is found
  723.      * @throws IllegalStateException if no token has been read or no match is found and
  724.      *      {@code throwOnFailure} is true
  725.      */
  726.     private int chooseInternal(final List<String> expected, final boolean caseSensitive,
  727.             final boolean throwOnFailure) {
  728.         ensureHasSetToken();

  729.         int i = 0;
  730.         for (final String str : expected) {
  731.             if (stringsEqual(str, currentToken, caseSensitive)) {
  732.                 return i;
  733.             }

  734.             ++i;
  735.         }

  736.         if (throwOnFailure) {
  737.             throw unexpectedToken("one of " + expected);
  738.         }

  739.         return -1;
  740.     }

  741.     /** Get an exception indicating that the current token was unexpected. The returned
  742.      * exception contains a message with the line number and column of the current token and
  743.      * a description of its value.
  744.      * @param expected string describing what was expected
  745.      * @return exception indicating that the current token was unexpected
  746.      */
  747.     public IllegalStateException unexpectedToken(final String expected) {
  748.         return unexpectedToken(expected, null);
  749.     }

  750.     /** Get an exception indicating that the current token was unexpected. The returned
  751.      * exception contains a message with the line number and column of the current token and
  752.      * a description of its value.
  753.      * @param expected string describing what was expected
  754.      * @param cause cause of the error
  755.      * @return exception indicating that the current token was unexpected
  756.      */
  757.     public IllegalStateException unexpectedToken(final String expected, final Throwable cause) {

  758.         StringBuilder msg = new StringBuilder();
  759.         msg.append("expected ")
  760.             .append(expected)
  761.             .append(" but found ")
  762.             .append(getCurrentTokenDescription());

  763.         final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
  764.         final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;

  765.         return parseError(line, col, msg.toString(), cause);
  766.     }

  767.     /** Get an exception indicating an error during parsing at the current token position.
  768.      * @param msg error message
  769.      * @return an exception indicating an error during parsing at the current token position
  770.      */
  771.     public IllegalStateException tokenError(final String msg) {
  772.         return tokenError(msg, null);
  773.     }

  774.     /** Get an exception indicating an error during parsing at the current token position.
  775.      * @param msg error message
  776.      * @param cause the cause of the error; may be null
  777.      * @return an exception indicating an error during parsing at the current token position
  778.      */
  779.     public IllegalStateException tokenError(final String msg, final Throwable cause) {
  780.         final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
  781.         final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;

  782.         return parseError(line, col, msg, cause);
  783.     }

  784.     /** Return an exception indicating an error occurring at the current parser position.
  785.      * @param msg error message
  786.      * @return an exception indicating an error during parsing
  787.      */
  788.     public IllegalStateException parseError(final String msg) {
  789.         return parseError(msg, null);
  790.     }

  791.     /** Return an exception indicating an error occurring at the current parser position.
  792.      * @param msg error message
  793.      * @param cause the cause of the error; may be null
  794.      * @return an exception indicating an error during parsing
  795.      */
  796.     public IllegalStateException parseError(final String msg, final Throwable cause) {
  797.         return parseError(lineNumber, columnNumber, msg, cause);
  798.     }

  799.     /** Return an exception indicating an error during parsing.
  800.      * @param line line number of the error
  801.      * @param col column number of the error
  802.      * @param msg error message
  803.      * @return an exception indicating an error during parsing
  804.      */
  805.     public IllegalStateException parseError(final int line, final int col, final String msg) {
  806.         return parseError(line, col, msg, null);
  807.     }

  808.     /** Return an exception indicating an error during parsing.
  809.      * @param line line number of the error
  810.      * @param col column number of the error
  811.      * @param msg error message
  812.      * @param cause the cause of the error
  813.      * @return an exception indicating an error during parsing
  814.      */
  815.     public IllegalStateException parseError(final int line, final int col, final String msg,
  816.             final Throwable cause) {
  817.         final String fullMsg = String.format("Parsing failed at line %d, column %d: %s",
  818.                 line, col, msg);
  819.         return GeometryIOUtils.parseError(fullMsg, cause);
  820.     }

  821.     /** Set the current token string and position.
  822.      * @param line line number for the start of the token
  823.      * @param col column number for the start of the token
  824.      * @param token token to set
  825.      */
  826.     private void setToken(final int line, final int col, final String token) {
  827.         currentTokenLineNumber = line;
  828.         currentTokenColumnNumber = col;
  829.         currentToken = token;

  830.         hasSetToken = true;
  831.     }

  832.     /** Get a user-friendly description of the current token.
  833.      * @return a user-friendly description of the current token.
  834.      */
  835.     private String getCurrentTokenDescription() {
  836.         if (currentToken == null || currentToken.isEmpty()) {
  837.             // attempt to return a more helpful message about the location
  838.             // of empty tokens by checking the buffer content; if this fails
  839.             // we'll ignore the error and continue with a more generic message
  840.             try {
  841.                 if (!hasMoreCharacters()) {
  842.                     return "end of content";
  843.                 } else if (currentToken != null) {
  844.                     if (!hasMoreCharactersOnLine()) {
  845.                         return "end of line";
  846.                     }
  847.                     return "empty token followed by [" + peek(1) + "]";
  848.                 }
  849.             } catch (IllegalStateException exc) {
  850.                 // ignore
  851.             }
  852.         }

  853.         if (currentToken == null) {
  854.             return "no current token";
  855.         } else if (currentToken.isEmpty()) {
  856.             return "empty token";
  857.         }

  858.         return "[" + currentToken + "]";
  859.     }

  860.     /** Validate the requested string length.
  861.      * @param len requested string length
  862.      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength}
  863.      */
  864.     private void validateRequestedStringLength(final int len) {
  865.         if (len < 0) {
  866.             throw new IllegalArgumentException("Requested string length cannot be negative; was " + len);
  867.         } else if (len > maxStringLength) {
  868.             throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " +
  869.                     maxStringLength);
  870.         }
  871.     }

  872.     /** Ensure that a token read operation has been performed, throwing an exception if not.
  873.      * @throws IllegalStateException if no token read operation has been performed
  874.      */
  875.     private void ensureHasSetToken() {
  876.         if (!hasSetToken) {
  877.             throw new IllegalStateException("No token has been read from the character stream");
  878.         }
  879.     }

  880.     /** Return true if the given character (Unicode code point) is whitespace.
  881.      * @param ch character (Unicode code point) to test
  882.      * @return true if the given character is whitespace
  883.      * @see Character#isWhitespace(int)
  884.      */
  885.     public static boolean isWhitespace(final int ch) {
  886.         return Character.isWhitespace(ch);
  887.     }

  888.     /** Return true if the given character (Unicode code point) is not whitespace.
  889.      * @param ch character (Unicode code point) to test
  890.      * @return true if the given character is not whitespace
  891.      * @see #isWhitespace(int)
  892.      */
  893.     public static boolean isNotWhitespace(final int ch) {
  894.         return !isWhitespace(ch);
  895.     }

  896.     /** Return true if the given character (Unicode code point) is whitespace
  897.      * that is not used in newline sequences (ie, not '\r' or '\n').
  898.      * @param ch character (Unicode code point) to test
  899.      * @return true if the given character is a whitespace character not used in newline
  900.      *      sequences
  901.      */
  902.     public static boolean isLineWhitespace(final int ch) {
  903.         return isWhitespace(ch) && isNotNewLinePart(ch);
  904.     }

  905.     /** Return true if the given character (Unicode code point) is used
  906.      * as part of newline sequences (ie, is either '\r' or '\n').
  907.      * @param ch character (Unicode code point) to test
  908.      * @return true if the given character is used as part of newline sequences
  909.      */
  910.     public static boolean isNewLinePart(final int ch) {
  911.         return ch == CR || ch == LF;
  912.     }

  913.     /** Return true if the given character (Unicode code point) is not used as
  914.      * part of newline sequences (ie, not '\r' or '\n').
  915.      * @param ch character (Unicode code point) to test
  916.      * @return true if the given character is not used as part of newline sequences
  917.      * @see #isNewLinePart(int)
  918.      */
  919.     public static boolean isNotNewLinePart(final int ch) {
  920.         return !isNewLinePart(ch);
  921.     }

  922.     /** Return true if the given character (Unicode code point) is alphanumeric.
  923.      * @param ch character (Unicode code point) to test
  924.      * @return true if the argument is alphanumeric
  925.      * @see Character#isAlphabetic(int)
  926.      * @see Character#isDigit(int)
  927.      */
  928.     public static boolean isAlphanumeric(final int ch) {
  929.         return Character.isAlphabetic(ch) ||
  930.                 Character.isDigit(ch);
  931.     }

  932.     /** Return true if the given character (Unicode code point) is not alphanumeric.
  933.      * @param ch character (Unicode code point) to test
  934.      * @return true if the argument is not alphanumeric
  935.      * @see #isAlphanumeric(int)
  936.      */
  937.     public static boolean isNotAlphanumeric(final int ch) {
  938.         return !isAlphanumeric(ch);
  939.     }

  940.     /** Return true if the given character (Unicode code point) can be used as part of
  941.      * the string representation of an integer. This will be true for the following types
  942.      * of characters:
  943.      * <ul>
  944.      *  <li>{@link Character#isDigit(int) digits}</li>
  945.      *  <li>the '-' (minus) character</li>
  946.      *  <li>the '+' (plus) character</li>
  947.      * </ul>
  948.      * @param ch character (Unicode code point) to test
  949.      * @return true if the given character can be used as part of an integer string
  950.      */
  951.     public static boolean isIntegerPart(final int ch) {
  952.         return Character.isDigit(ch) ||
  953.                 ch == '-' ||
  954.                 ch == '+';
  955.     }

  956.     /** Return true if the given character (Unicode code point) can be used as part of
  957.      * the string representation of a decimal number. This will be true for the following types
  958.      * of characters:
  959.      * <ul>
  960.      *  <li>{@link Character#isDigit(int) digits}</li>
  961.      *  <li>the '-' (minus) character</li>
  962.      *  <li>the '+' (plus) character</li>
  963.      *  <li>the '.' (period) character</li>
  964.      *  <li>the 'e' character</li>
  965.      *  <li>the 'E' character</li>
  966.      * </ul>
  967.      * @param ch character (Unicode code point) to test
  968.      * @return true if the given character can be used as part of a decimal number string
  969.      */
  970.     public static boolean isDecimalPart(final int ch) {
  971.         return Character.isDigit(ch) ||
  972.             ch == '-' ||
  973.             ch == '+' ||
  974.             ch == '.' ||
  975.             ch == 'e' ||
  976.             ch == 'E';
  977.     }

  978.     /** Test two strings for equality. One or both arguments may be null.
  979.      * @param a first string
  980.      * @param b second string
  981.      * @param caseSensitive comparison is case-sensitive if set to true
  982.      * @return true if the string arguments are considered equal
  983.      */
  984.     private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) {
  985.         if (a == null) {
  986.             return b == null;
  987.         }

  988.         return caseSensitive ?
  989.                 a.equals(b) :
  990.                 a.equalsIgnoreCase(b);
  991.     }

  992.     /** Internal class used to collect strings from the character stream while ensuring that the
  993.      * collected strings do not exceed the maximum configured string length.
  994.      */
  995.     private final class StringCollector implements IntPredicate, IntConsumer {

  996.         /** String builder instance. */
  997.         private final StringBuilder sb = new StringBuilder();

  998.         /** Start position line. */
  999.         private final int line;

  1000.         /** Start position column. */
  1001.         private final int col;

  1002.         /** Character predicate. */
  1003.         private final IntPredicate pred;

  1004.         /** Construct a new instance with the given start position and character predicate.
  1005.          * @param line start position line
  1006.          * @param col start position col
  1007.          * @param pred character predicate
  1008.          */
  1009.         StringCollector(final int line, final int col, final IntPredicate pred) {
  1010.             this.line = line;
  1011.             this.col = col;
  1012.             this.pred = pred;
  1013.         }

  1014.         /** {@inheritDoc} */
  1015.         @Override
  1016.         public boolean test(final int value) {
  1017.             return pred.test(value) && !hasExceededMaxStringLength();
  1018.         }

  1019.         /** {@inheritDoc} */
  1020.         @Override
  1021.         public void accept(final int value) {
  1022.             sb.append((char) value);
  1023.         }

  1024.         /** Get the string collected by this instance.
  1025.          * @return the string collected by this instance
  1026.          * @throws IllegalStateException if the string exceeds the maximum configured length
  1027.          */
  1028.         public String getString() {
  1029.             if (hasExceededMaxStringLength()) {
  1030.                 throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength);
  1031.             }

  1032.             return sb.toString();
  1033.         }

  1034.         /** Return true if this collector has exceeded the maximum configured string length.
  1035.          * @return true if this collector has exceeded the maximum string length
  1036.          */
  1037.         private boolean hasExceededMaxStringLength() {
  1038.             return sb.length() > maxStringLength;
  1039.         }
  1040.     }
  1041. }