SimpleTextParser.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.geometry.io.core.internal;

import java.io.Reader;
import java.util.Arrays;
import java.util.List;
import java.util.function.IntConsumer;
import java.util.function.IntPredicate;

/** Class providing basic text parsing capabilities. The goals of this class are to
 * (1) provide a simple, flexible API for performing common text parsing operations and
 * (2) provide a mechanism for creating consistent and informative parsing errors.
 * This class is not intended as a replacement for grammar-based parsers and/or lexers.
 */
public class SimpleTextParser {

    /** Constant indicating that the end of the input has been reached. */
    private static final int EOF = -1;

    /** Carriage return character. */
    private static final char CR = '\r';

    /** Line feed character. */
    private static final char LF = '\n';

    /** Default value for the max string length property. */
    private static final int DEFAULT_MAX_STRING_LENGTH = 1024;

    /** Error message used when a string exceeds the configured maximum length. */
    private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of ";

    /** Initial token position number. */
    private static final int INITIAL_TOKEN_POS = -1;

    /** Int consumer that does nothing. */
    private static final IntConsumer NOOP_CONSUMER = ch -> { };

    /** Current line number; line numbers start counting at 1. */
    private int lineNumber = 1;

    /** Current character column on the current line; column numbers start at 1.*/
    private int columnNumber = 1;

    /** Maximum length for strings returned by this instance. */
    private int maxStringLength = DEFAULT_MAX_STRING_LENGTH;

    /** The current token. */
    private String currentToken;

    /** The line number that the current token started on. */
    private int currentTokenLineNumber = INITIAL_TOKEN_POS;

    /** The character number that the current token started on. */
    private int currentTokenColumnNumber = INITIAL_TOKEN_POS;

    /** Flag used to indicate that at least one token has been read from the stream. */
    private boolean hasSetToken;

    /** Character read buffer used to access the character stream. */
    private final CharReadBuffer buffer;

    /** Construct a new instance that reads characters from the given reader. The
     * reader will not be closed.
     * @param reader reader instance to read characters from
     */
    public SimpleTextParser(final Reader reader) {
        this(new CharReadBuffer(reader));
    }

    /** Construct a new instance that reads characters from the given character buffer.
     * @param buffer read buffer to read characters from
     */
    public SimpleTextParser(final CharReadBuffer buffer) {
        this.buffer = buffer;
    }

    /** Get the current line number. Line numbers start at 1.
     * @return the current line number
     */
    public int getLineNumber() {
        return lineNumber;
    }

    /** Set the current line number. This does not affect the character stream position,
     * only the value returned by {@link #getLineNumber()}.
     * @param lineNumber line number to set; line numbers start at 1
     */
    public void setLineNumber(final int lineNumber) {
        this.lineNumber = lineNumber;
    }

    /** Get the current column number. This indicates the column position of the
     * character that will returned by the next call to {@link #readChar()}. The first
     * character of each line has a column number of 1.
     * @return the current column number; column numbers start at 1
     */
    public int getColumnNumber() {
        return columnNumber;
    }

    /** Set the current column number. This does not affect the character stream position,
     * only the value returned by {@link #getColumnNumber()}.
     * @param column the column number to set; column numbers start at 1
     */
    public void setColumnNumber(final int column) {
        this.columnNumber = column;
    }

    /** Get the maximum length for strings returned by this instance. Operations
     * that produce strings longer than this length will throw an exception.
     * @return maximum length for strings returned by this instance
     */
    public int getMaxStringLength() {
        return maxStringLength;
    }

    /** Set the maximum length for strings returned by this instance. Operations
     * that produce strings longer than this length will throw an exception.
     * @param maxStringLength maximum length for strings returned by this instance
     * @throws IllegalArgumentException if the argument is less than zero
     */
    public void setMaxStringLength(final int maxStringLength) {
        if (maxStringLength < 0) {
            throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " +
                    maxStringLength);
        }
        this.maxStringLength = maxStringLength;
    }

    /** Get the current token. This is the most recent string read by one of the {@code nextXXX()}
     * methods. This value will be null if no token has yet been read or if the end of content has
     * been reached.
     * @return the current token
     * @see #next(int)
     * @see #next(IntPredicate)
     * @see #nextLine()
     * @see #nextAlphanumeric()
     */
    public String getCurrentToken() {
        return currentToken;
    }

    /** Return true if the current token is not null or empty.
     * @return true if the current token is not null or empty
     * @see #getCurrentToken()
     */
    public boolean hasNonEmptyToken() {
        return currentToken != null && !currentToken.isEmpty();
    }

    /** Get the line number that the current token started on. This value will
     * be -1 if no token has been read yet.
     * @return current token starting line number or -1 if no token has been
     *      read yet
     * @see #getCurrentToken()
     */
    public int getCurrentTokenLineNumber() {
        return currentTokenLineNumber;
    }

    /** Get the column position that the current token started on. This value will
     * be -1 if no token has been read yet.
     * @return current token column number or -1 if no oken has been read yet
     * @see #getCurrentToken()
     */
    public int getCurrentTokenColumnNumber() {
        return currentTokenColumnNumber;
    }

    /** Get the current token parsed as an integer.
     * @return the current token parsed as an integer
     * @throws IllegalStateException if no token has been read or the
     *      current token cannot be parsed as an integer
     */
    public int getCurrentTokenAsInt() {
        ensureHasSetToken();

        Throwable cause = null;

        if (currentToken != null) {
            try {
                return Integer.parseInt(currentToken);
            } catch (NumberFormatException exc) {
                cause = exc;
            }
        }

        throw unexpectedToken("integer", cause);
    }

    /** Get the current token parsed as a double.
     * @return the current token parsed as a double
     * @throws IllegalStateException if no token has been read or the
     *      current token cannot be parsed as a double
     */
    public double getCurrentTokenAsDouble() {
        ensureHasSetToken();

        Throwable cause = null;

        if (currentToken != null) {
            try {
                return Double.parseDouble(currentToken);
            } catch (NumberFormatException exc) {
                cause = exc;
            }
        }

        throw unexpectedToken("double", cause);
    }

    /** Return true if there are more characters to read from this instance.
     * @return true if there are more characters to read from this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public boolean hasMoreCharacters() {
        return buffer.hasMoreCharacters();
    }

    /** Return true if there are more characters to read on the current line.
     * @return true if there are more characters to read on the current line
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public boolean hasMoreCharactersOnLine() {
        return hasMoreCharacters() && isNotNewLinePart(peekChar());
    }

    /** Read and return the next character in the stream and advance the parser position.
     * This method updates the current line number and column number but does <strong>not</strong>
     * set the {@link #getCurrentToken() current token}.
     * @return the next character in the stream or -1 if the end of the stream has been
     *      reached
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #peekChar()
     */
    public int readChar() {
        final int value = buffer.read();
        if (value == LF ||
                (value == CR && peekChar() != LF)) {
            ++lineNumber;
            columnNumber = 1;
        } else if (value != EOF) {
            ++columnNumber;
        }

        return value;
    }

    /** Read a string containing at most {@code len} characters from the stream and
     * set it as the current token. Characters are added to the string until the string
     * has the specified length or the end of the stream is reached. The characters are
     * consumed from the stream. The token is set to null if no more characters are available
     * from the character stream when this method is called.
     * @param len the maximum length of the extracted string
     * @return this instance
     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
     *      configured {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     * @see #consume(int, IntConsumer)
     */
    public SimpleTextParser next(final int len) {
        validateRequestedStringLength(len);

        final int line = getLineNumber();
        final int col = getColumnNumber();

        String token = null;
        if (hasMoreCharacters()) {
            final StringBuilder sb = new StringBuilder(len);

            consume(len, ch -> sb.append((char) ch));

            token = sb.toString();
        }

        setToken(line, col, token);

        return this;
    }

    /** Read a string containing at most {@code len} characters from the stream and
     * set it as the current token. This is similar to {@link #next(int)} but with the exception
     * that new line sequences beginning with {@code lineContinuationChar} are skipped.
     * @param lineContinuationChar character used to indicate skipped new line sequences
     * @param len the maximum length of the extracted string
     * @return this instance
     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
     *      configured {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     * @see #consumeWithLineContinuation(char, int, IntConsumer)
     */
    public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) {
        validateRequestedStringLength(len);

        final int line = getLineNumber();
        final int col = getColumnNumber();

        String token = null;
        if (hasMoreCharacters()) {
            final StringBuilder sb = new StringBuilder(len);

            consumeWithLineContinuation(lineContinuationChar, len,
                    ch -> sb.append((char) ch));

            token = sb.toString();
        }

        setToken(line, col, token);

        return this;
    }

    /** Read characters from the stream while the given predicate returns true and set the result
     * as the current token. The next call to {@link #readChar()} will return either a character
     * that fails the predicate test or -1 if the end of the stream has been reached.
     * The token will be null if the end of the stream has been reached prior to the method call.
     * @param pred predicate function passed characters read from the input; reading continues
     *      until the predicate returns false
     * @return this instance
     * @throws IllegalStateException if the length of the produced string exceeds the configured
     *      {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     * @see #consume(IntPredicate, IntConsumer)
     */
    public SimpleTextParser next(final IntPredicate pred) {
        final int line = getLineNumber();
        final int col = getColumnNumber();

        String token = null;
        if (hasMoreCharacters()) {
            final StringCollector collector = new StringCollector(line, col, pred);

            consume(collector, collector);

            token = collector.getString();
        }

        setToken(line, col, token);

        return this;
    }

    /** Read characters from the stream while the given predicate returns true and set the result
     * as the current token. This is similar to {@link #next(IntPredicate)} but with the exception
     * that new line sequences prefixed with {@code lineContinuationChar} are skipped.
     * @param lineContinuationChar character used to indicate skipped new line sequences
     * @param pred predicate function passed characters read from the input; reading continues
     *      until the predicate returns false
     * @return this instance
     * @throws IllegalStateException if the length of the produced string exceeds the configured
     *      {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     * @see #consume(IntPredicate, IntConsumer)
     */
    public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) {
        final int line = getLineNumber();
        final int col = getColumnNumber();

        String token = null;
        if (hasMoreCharacters()) {
            final StringCollector collector = new StringCollector(line, col, pred);

            consumeWithLineContinuation(lineContinuationChar, collector, collector);

            token = collector.getString();
        }

        setToken(line, col, token);

        return this;
    }

    /** Read characters from the current parser position to the next new line sequence and
     * set the result as the current token . The newline character sequence
     * ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token.
     * The token will be null if the end of the stream has been reached prior to the method call.
     * @return this instance
     * @throws IllegalStateException if the length of the produced string exceeds the configured
     *      {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     */
    public SimpleTextParser nextLine() {
        next(SimpleTextParser::isNotNewLinePart);

        discardNewLineSequence();

        return this;
    }

    /** Read a sequence of alphanumeric characters starting from the current parser position
     * and set the result as the current token. The token will be the empty string if the next
     * character in the stream is not alphanumeric and will be null if the end of the stream has
     * been reached prior to the method call.
     * @return this instance
     * @throws IllegalStateException if the length of the produced string exceeds the configured
     *      {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     */
    public SimpleTextParser nextAlphanumeric() {
        return next(SimpleTextParser::isAlphanumeric);
    }

    /** Discard {@code len} number of characters from the character stream. The
     * parser position is updated but the current token is not changed.
     * @param len number of characters to discard
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discard(final int len) {
        return consume(len, NOOP_CONSUMER);
    }

    /** Discard {@code len} number of characters from the character stream. The
     * parser position is updated but the current token is not changed. Lines beginning
     * with {@code lineContinuationChar} are skipped.
     * @param lineContinuationChar character used to indicate skipped new line sequences
     * @param len number of characters to discard
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
            final int len) {
        return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER);
    }

    /** Discard characters from the stream while the given predicate returns true. The next call
     * to {@link #readChar()} will return either a character that fails the predicate test or -1
     * if the end of the stream has been reached. The parser position is updated but the current
     * token is not changed.
     * @param pred predicate test for characters to discard
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discard(final IntPredicate pred) {
        return consume(pred, NOOP_CONSUMER);
    }

    /** Discard characters from the stream while the given predicate returns true. New line sequences
     * beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()}
     * will return either a character that fails the predicate test or -1 if the end of the stream
     * has been reached. The parser position is updated but the current token is not changed.
     * @param lineContinuationChar character used to indicate skipped new line sequences
     * @param pred predicate test for characters to discard
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
            final IntPredicate pred) {
        return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER);
    }

    /** Discard a sequence of whitespace characters from the character stream starting from the
     * current parser position. The next call to {@link #readChar()} will return either a non-whitespace
     * character or -1 if the end of the stream has been reached. The parser position is updated
     * but the current token is not changed.
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discardWhitespace() {
        return discard(SimpleTextParser::isWhitespace);
    }

    /** Discard the next whitespace characters on the current line. The next call to
     * {@link #readChar()} will return either a non-whitespace character on the current line,
     * the newline character sequence (indicating the end of the line), or -1 (indicating the
     * end of the stream). The parser position is updated but the current token is not changed.
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discardLineWhitespace() {
        return discard(SimpleTextParser::isLineWhitespace);
    }

    /** Discard the newline character sequence at the current reader position. The sequence
     * is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned
     * at a newline sequence. The parser position is updated but the current token is not changed.
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discardNewLineSequence() {
        final int value = peekChar();
        if (value == LF) {
            readChar();
        } else if (value == CR) {
            readChar();

            if (peekChar() == LF) {
                readChar();
            }
        }

        return this;
    }

    /** Discard all remaining characters on the current line, including the terminating
     * newline character sequence. The next call to {@link #readChar()} will return either the
     * first character on the next line or -1 if the end of the stream has been reached.
     * The parser position is updated but the current token is not changed.
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser discardLine() {
        discard(SimpleTextParser::isNotNewLinePart);

        discardNewLineSequence();

        return this;
    }

    /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
     * returns true. The operation ends when the predicate returns false or the end of the stream is
     * reached.
     * @param pred predicate test for characters to consume
     * @param consumer object to be passed each consumed character
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) {
        int ch;
        while ((ch = peekChar()) != EOF && pred.test(ch)) {
            consumer.accept(readChar());
        }

        return this;
    }

    /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
     * This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line
     * sequences prefixed with {@code lineContinuationChar} are skipped.
     * @param lineContinuationChar character used to indicate skipped new line sequences
     * @param len number of characters to consume
     * @param consumer function to be passed each consumed character
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
            final int len, final IntConsumer consumer) {
        int i = -1;
        int ch;
        while (++i < len && (ch = readChar()) != EOF) {
            if (ch == lineContinuationChar && isNewLinePart(peekChar())) {
                --i; // don't count the continuation char toward the total length
                discardNewLineSequence();
            } else {
                consumer.accept(ch);
            }
        }

        return this;
    }

    /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
     * The operation continues until {@code len} number of characters have been read or the end of
     * the stream has been reached.
     * @param len number of characters to consume
     * @param consumer object to be passed each consumed character
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser consume(final int len, final IntConsumer consumer) {
        int ch;
        for (int i = 0; i < len; ++i) {
            ch = readChar();
            if (ch != EOF) {
                consumer.accept(ch);
            } else {
                break;
            }
        }

        return this;
    }

    /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
     * returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the
     * exception that new lines sequences beginning with {@code lineContinuationChar} are skipped.
     * @param lineContinuationChar character used to indicate skipped new line sequences
     * @param pred predicate test for characters to consume
     * @param consumer object to be passed each consumed character
     * @return this instance
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
            final IntPredicate pred, final IntConsumer consumer) {
        int ch;
        while ((ch = peekChar()) != EOF) {
            if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) {
                readChar();
                discardNewLineSequence();
            } else if (pred.test(ch)) {
                consumer.accept(readChar());
            } else {
                break;
            }
        }

        return this;
    }

    /** Return the next character in the stream but do not advance the parser position.
     * @return the next character in the stream or -1 if the end of the stream has been
     *      reached
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #readChar()
     */
    public int peekChar() {
        return buffer.peek();
    }

    /** Return a string containing containing at most {@code len} characters from the stream but
     * without changing the parser position. Characters are added to the string until the
     * string has the specified length or the end of the stream is reached.
     * @param len the maximum length of the returned string
     * @return a string containing containing at most {@code len} characters from the stream
     *      or null if the parser has already reached the end of the stream
     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
     *      configured {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #next(int)
     */
    public String peek(final int len) {
        validateRequestedStringLength(len);

        return buffer.peekString(len);
    }

    /** Read characters from the stream while the given predicate returns true but do not
     * change the current token or advance the parser position.
     * @param pred predicate function passed characters read from the input; reading continues
     *      until the predicate returns false
     * @return string containing characters matching {@code pred} or null if the parser has already
     *      reached the end of the stream
     * @throws IllegalStateException if the length of the produced string exceeds the configured
     *      {@link #getMaxStringLength() maximum string length}
     * @throws java.io.UncheckedIOException if an I/O error occurs
     * @see #getCurrentToken()
     */
    public String peek(final IntPredicate pred) {
        String token = null;

        if (hasMoreCharacters()) {
            final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred);

            int i = -1;
            int ch = buffer.charAt(++i);
            while (ch != EOF && collector.test(ch)) {
                collector.accept(ch);

                ch = buffer.charAt(++i);
            }

            token = collector.getString();
        }

        return token;
    }

    /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
     * exception if they are not equal. The comparison is case-sensitive.
     * @param expected expected token
     * @return this instance
     * @throws IllegalStateException if no token has been read or {@code expected} does not exactly
     *      equal the current token
     */
    public SimpleTextParser match(final String expected) {
        matchInternal(expected, true, true);
        return this;
    }

    /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
     * exception if they are not equal. The comparison is <em>not</em> case-sensitive.
     * @param expected expected token
     * @return this instance
     * @throws IllegalStateException if no token has been read or {@code expected} does not equal
     *      the current token (ignoring case)
     */
    public SimpleTextParser matchIgnoreCase(final String expected) {
        matchInternal(expected, false, true);
        return this;
    }

    /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
     * The comparison is case-sensitive.
     * @param expected expected token
     * @return true if the argument exactly equals the current token
     * @throws IllegalStateException if no token has been read
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    public boolean tryMatch(final String expected) {
        return matchInternal(expected, true, false);
    }

    /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
     * The comparison is <em>not</em> case-sensitive.
     * @param expected expected token
     * @return true if the argument equals the current token (ignoring case)
     * @throws IllegalStateException if no token has been read
     */
    public boolean tryMatchIgnoreCase(final String expected) {
        return matchInternal(expected, false, false);
    }

    /** Internal method to compare the current token with the argument.
     * @param expected expected token
     * @param caseSensitive if the comparison should be case-sensitive
     * @param throwOnFailure if an exception should be thrown if the argument is not
     *      equal to the current token
     * @return true if the argument is equal to the current token
     * @throws IllegalStateException if no token has been read or {@code expected} does not match the
     *      current token and {@code throwOnFailure} is true
     */
    private boolean matchInternal(final String expected, final boolean caseSensitive,
            final boolean throwOnFailure) {
        ensureHasSetToken();

        if (!stringsEqual(expected, currentToken, caseSensitive)) {
            if (throwOnFailure) {
                throw unexpectedToken("[" + expected + "]");
            }

            return false;
        }

        return true;
    }

    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
     * An exception is thrown if no match is found. String comparisons are case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that exactly matches the current token
     * @throws IllegalStateException if no token has been read or no match is found among the arguments
     */
    public int choose(final String... expected) {
        return choose(Arrays.asList(expected));
    }

    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
     * An exception is thrown if no match is found. String comparisons are case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that exactly matches the current token
     * @throws IllegalStateException if no token has been read or no match is found among the arguments
     */
    public int choose(final List<String> expected) {
        return chooseInternal(expected, true, true);
    }

    /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
     * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
     * case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that matches the current token (ignoring case)
     * @throws IllegalStateException if no token has been read or no match is found among the arguments
     */
    public int chooseIgnoreCase(final String... expected) {
        return chooseIgnoreCase(Arrays.asList(expected));
    }

    /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
     * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
     * case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that matches the current token (ignoring case)
     * @throws IllegalStateException if no token has been read or no match is found among the arguments
     */
    public int chooseIgnoreCase(final List<String> expected) {
        return chooseInternal(expected, false, true);
    }

    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
     * or -1 if no match is found. String comparisons are case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that exactly matches the current token or -1 if
     *      no match is found
     * @throws IllegalStateException if no token has been read
     */
    public int tryChoose(final String... expected) {
        return tryChoose(Arrays.asList(expected));
    }

    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
     * or -1 if no match is found. String comparisons are case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that exactly matches the current token or -1 if
     *      no match is found
     * @throws IllegalStateException if no token has been read
     */
    public int tryChoose(final List<String> expected) {
        return chooseInternal(expected, true, false);
    }

    /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
     * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that matches the current token (ignoring case) or -1 if
     *      no match is found
     * @throws IllegalStateException if no token has been read
     */
    public int tryChooseIgnoreCase(final String... expected) {
        return tryChooseIgnoreCase(Arrays.asList(expected));
    }

    /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
     * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
     * @param expected strings to compare with the current token
     * @return index of the argument that matches the current token (ignoring case) or -1 if
     *      no match is found
     * @throws IllegalStateException if no token has been read
     */
    public int tryChooseIgnoreCase(final List<String> expected) {
        return chooseInternal(expected, false, false);
    }

    /** Internal method to compare the current token with a list of possible strings. The index of
     * the matching argument is returned.
     * @param expected strings to compare with the current token
     * @param caseSensitive if the comparisons should be case-sensitive
     * @param throwOnFailure if an exception should be thrown if no match is found
     * @return the index of the matching argument or -1 if no match is found
     * @throws IllegalStateException if no token has been read or no match is found and
     *      {@code throwOnFailure} is true
     */
    private int chooseInternal(final List<String> expected, final boolean caseSensitive,
            final boolean throwOnFailure) {
        ensureHasSetToken();

        int i = 0;
        for (final String str : expected) {
            if (stringsEqual(str, currentToken, caseSensitive)) {
                return i;
            }

            ++i;
        }

        if (throwOnFailure) {
            throw unexpectedToken("one of " + expected);
        }

        return -1;
    }

    /** Get an exception indicating that the current token was unexpected. The returned
     * exception contains a message with the line number and column of the current token and
     * a description of its value.
     * @param expected string describing what was expected
     * @return exception indicating that the current token was unexpected
     */
    public IllegalStateException unexpectedToken(final String expected) {
        return unexpectedToken(expected, null);
    }

    /** Get an exception indicating that the current token was unexpected. The returned
     * exception contains a message with the line number and column of the current token and
     * a description of its value.
     * @param expected string describing what was expected
     * @param cause cause of the error
     * @return exception indicating that the current token was unexpected
     */
    public IllegalStateException unexpectedToken(final String expected, final Throwable cause) {

        StringBuilder msg = new StringBuilder();
        msg.append("expected ")
            .append(expected)
            .append(" but found ")
            .append(getCurrentTokenDescription());

        final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
        final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;

        return parseError(line, col, msg.toString(), cause);
    }

    /** Get an exception indicating an error during parsing at the current token position.
     * @param msg error message
     * @return an exception indicating an error during parsing at the current token position
     */
    public IllegalStateException tokenError(final String msg) {
        return tokenError(msg, null);
    }

    /** Get an exception indicating an error during parsing at the current token position.
     * @param msg error message
     * @param cause the cause of the error; may be null
     * @return an exception indicating an error during parsing at the current token position
     */
    public IllegalStateException tokenError(final String msg, final Throwable cause) {
        final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
        final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;

        return parseError(line, col, msg, cause);
    }

    /** Return an exception indicating an error occurring at the current parser position.
     * @param msg error message
     * @return an exception indicating an error during parsing
     */
    public IllegalStateException parseError(final String msg) {
        return parseError(msg, null);
    }

    /** Return an exception indicating an error occurring at the current parser position.
     * @param msg error message
     * @param cause the cause of the error; may be null
     * @return an exception indicating an error during parsing
     */
    public IllegalStateException parseError(final String msg, final Throwable cause) {
        return parseError(lineNumber, columnNumber, msg, cause);
    }

    /** Return an exception indicating an error during parsing.
     * @param line line number of the error
     * @param col column number of the error
     * @param msg error message
     * @return an exception indicating an error during parsing
     */
    public IllegalStateException parseError(final int line, final int col, final String msg) {
        return parseError(line, col, msg, null);
    }

    /** Return an exception indicating an error during parsing.
     * @param line line number of the error
     * @param col column number of the error
     * @param msg error message
     * @param cause the cause of the error
     * @return an exception indicating an error during parsing
     */
    public IllegalStateException parseError(final int line, final int col, final String msg,
            final Throwable cause) {
        final String fullMsg = String.format("Parsing failed at line %d, column %d: %s",
                line, col, msg);
        return GeometryIOUtils.parseError(fullMsg, cause);
    }

    /** Set the current token string and position.
     * @param line line number for the start of the token
     * @param col column number for the start of the token
     * @param token token to set
     */
    private void setToken(final int line, final int col, final String token) {
        currentTokenLineNumber = line;
        currentTokenColumnNumber = col;
        currentToken = token;

        hasSetToken = true;
    }

    /** Get a user-friendly description of the current token.
     * @return a user-friendly description of the current token.
     */
    private String getCurrentTokenDescription() {
        if (currentToken == null || currentToken.isEmpty()) {
            // attempt to return a more helpful message about the location
            // of empty tokens by checking the buffer content; if this fails
            // we'll ignore the error and continue with a more generic message
            try {
                if (!hasMoreCharacters()) {
                    return "end of content";
                } else if (currentToken != null) {
                    if (!hasMoreCharactersOnLine()) {
                        return "end of line";
                    }
                    return "empty token followed by [" + peek(1) + "]";
                }
            } catch (IllegalStateException exc) {
                // ignore
            }
        }

        if (currentToken == null) {
            return "no current token";
        } else if (currentToken.isEmpty()) {
            return "empty token";
        }

        return "[" + currentToken + "]";
    }

    /** Validate the requested string length.
     * @param len requested string length
     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength}
     */
    private void validateRequestedStringLength(final int len) {
        if (len < 0) {
            throw new IllegalArgumentException("Requested string length cannot be negative; was " + len);
        } else if (len > maxStringLength) {
            throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " +
                    maxStringLength);
        }
    }

    /** Ensure that a token read operation has been performed, throwing an exception if not.
     * @throws IllegalStateException if no token read operation has been performed
     */
    private void ensureHasSetToken() {
        if (!hasSetToken) {
            throw new IllegalStateException("No token has been read from the character stream");
        }
    }

    /** Return true if the given character (Unicode code point) is whitespace.
     * @param ch character (Unicode code point) to test
     * @return true if the given character is whitespace
     * @see Character#isWhitespace(int)
     */
    public static boolean isWhitespace(final int ch) {
        return Character.isWhitespace(ch);
    }

    /** Return true if the given character (Unicode code point) is not whitespace.
     * @param ch character (Unicode code point) to test
     * @return true if the given character is not whitespace
     * @see #isWhitespace(int)
     */
    public static boolean isNotWhitespace(final int ch) {
        return !isWhitespace(ch);
    }

    /** Return true if the given character (Unicode code point) is whitespace
     * that is not used in newline sequences (ie, not '\r' or '\n').
     * @param ch character (Unicode code point) to test
     * @return true if the given character is a whitespace character not used in newline
     *      sequences
     */
    public static boolean isLineWhitespace(final int ch) {
        return isWhitespace(ch) && isNotNewLinePart(ch);
    }

    /** Return true if the given character (Unicode code point) is used
     * as part of newline sequences (ie, is either '\r' or '\n').
     * @param ch character (Unicode code point) to test
     * @return true if the given character is used as part of newline sequences
     */
    public static boolean isNewLinePart(final int ch) {
        return ch == CR || ch == LF;
    }

    /** Return true if the given character (Unicode code point) is not used as
     * part of newline sequences (ie, not '\r' or '\n').
     * @param ch character (Unicode code point) to test
     * @return true if the given character is not used as part of newline sequences
     * @see #isNewLinePart(int)
     */
    public static boolean isNotNewLinePart(final int ch) {
        return !isNewLinePart(ch);
    }

    /** Return true if the given character (Unicode code point) is alphanumeric.
     * @param ch character (Unicode code point) to test
     * @return true if the argument is alphanumeric
     * @see Character#isAlphabetic(int)
     * @see Character#isDigit(int)
     */
    public static boolean isAlphanumeric(final int ch) {
        return Character.isAlphabetic(ch) ||
                Character.isDigit(ch);
    }

    /** Return true if the given character (Unicode code point) is not alphanumeric.
     * @param ch character (Unicode code point) to test
     * @return true if the argument is not alphanumeric
     * @see #isAlphanumeric(int)
     */
    public static boolean isNotAlphanumeric(final int ch) {
        return !isAlphanumeric(ch);
    }

    /** Return true if the given character (Unicode code point) can be used as part of
     * the string representation of an integer. This will be true for the following types
     * of characters:
     * <ul>
     *  <li>{@link Character#isDigit(int) digits}</li>
     *  <li>the '-' (minus) character</li>
     *  <li>the '+' (plus) character</li>
     * </ul>
     * @param ch character (Unicode code point) to test
     * @return true if the given character can be used as part of an integer string
     */
    public static boolean isIntegerPart(final int ch) {
        return Character.isDigit(ch) ||
                ch == '-' ||
                ch == '+';
    }

    /** Return true if the given character (Unicode code point) can be used as part of
     * the string representation of a decimal number. This will be true for the following types
     * of characters:
     * <ul>
     *  <li>{@link Character#isDigit(int) digits}</li>
     *  <li>the '-' (minus) character</li>
     *  <li>the '+' (plus) character</li>
     *  <li>the '.' (period) character</li>
     *  <li>the 'e' character</li>
     *  <li>the 'E' character</li>
     * </ul>
     * @param ch character (Unicode code point) to test
     * @return true if the given character can be used as part of a decimal number string
     */
    public static boolean isDecimalPart(final int ch) {
        return Character.isDigit(ch) ||
            ch == '-' ||
            ch == '+' ||
            ch == '.' ||
            ch == 'e' ||
            ch == 'E';
    }

    /** Test two strings for equality. One or both arguments may be null.
     * @param a first string
     * @param b second string
     * @param caseSensitive comparison is case-sensitive if set to true
     * @return true if the string arguments are considered equal
     */
    private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) {
        if (a == null) {
            return b == null;
        }

        return caseSensitive ?
                a.equals(b) :
                a.equalsIgnoreCase(b);
    }

    /** Internal class used to collect strings from the character stream while ensuring that the
     * collected strings do not exceed the maximum configured string length.
     */
    private final class StringCollector implements IntPredicate, IntConsumer {

        /** String builder instance. */
        private final StringBuilder sb = new StringBuilder();

        /** Start position line. */
        private final int line;

        /** Start position column. */
        private final int col;

        /** Character predicate. */
        private final IntPredicate pred;

        /** Construct a new instance with the given start position and character predicate.
         * @param line start position line
         * @param col start position col
         * @param pred character predicate
         */
        StringCollector(final int line, final int col, final IntPredicate pred) {
            this.line = line;
            this.col = col;
            this.pred = pred;
        }

        /** {@inheritDoc} */
        @Override
        public boolean test(final int value) {
            return pred.test(value) && !hasExceededMaxStringLength();
        }

        /** {@inheritDoc} */
        @Override
        public void accept(final int value) {
            sb.append((char) value);
        }

        /** Get the string collected by this instance.
         * @return the string collected by this instance
         * @throws IllegalStateException if the string exceeds the maximum configured length
         */
        public String getString() {
            if (hasExceededMaxStringLength()) {
                throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength);
            }

            return sb.toString();
        }

        /** Return true if this collector has exceeded the maximum configured string length.
         * @return true if this collector has exceeded the maximum string length
         */
        private boolean hasExceededMaxStringLength() {
            return sb.length() > maxStringLength;
        }
    }
}