SimpleTextParser.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.geometry.io.core.internal;
import java.io.Reader;
import java.util.Arrays;
import java.util.List;
import java.util.function.IntConsumer;
import java.util.function.IntPredicate;
/** Class providing basic text parsing capabilities. The goals of this class are to
* (1) provide a simple, flexible API for performing common text parsing operations and
* (2) provide a mechanism for creating consistent and informative parsing errors.
* This class is not intended as a replacement for grammar-based parsers and/or lexers.
*/
public class SimpleTextParser {
/** Constant indicating that the end of the input has been reached. */
private static final int EOF = -1;
/** Carriage return character. */
private static final char CR = '\r';
/** Line feed character. */
private static final char LF = '\n';
/** Default value for the max string length property. */
private static final int DEFAULT_MAX_STRING_LENGTH = 1024;
/** Error message used when a string exceeds the configured maximum length. */
private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of ";
/** Initial token position number. */
private static final int INITIAL_TOKEN_POS = -1;
/** Int consumer that does nothing. */
private static final IntConsumer NOOP_CONSUMER = ch -> { };
/** Current line number; line numbers start counting at 1. */
private int lineNumber = 1;
/** Current character column on the current line; column numbers start at 1.*/
private int columnNumber = 1;
/** Maximum length for strings returned by this instance. */
private int maxStringLength = DEFAULT_MAX_STRING_LENGTH;
/** The current token. */
private String currentToken;
/** The line number that the current token started on. */
private int currentTokenLineNumber = INITIAL_TOKEN_POS;
/** The character number that the current token started on. */
private int currentTokenColumnNumber = INITIAL_TOKEN_POS;
/** Flag used to indicate that at least one token has been read from the stream. */
private boolean hasSetToken;
/** Character read buffer used to access the character stream. */
private final CharReadBuffer buffer;
/** Construct a new instance that reads characters from the given reader. The
* reader will not be closed.
* @param reader reader instance to read characters from
*/
public SimpleTextParser(final Reader reader) {
this(new CharReadBuffer(reader));
}
/** Construct a new instance that reads characters from the given character buffer.
* @param buffer read buffer to read characters from
*/
public SimpleTextParser(final CharReadBuffer buffer) {
this.buffer = buffer;
}
/** Get the current line number. Line numbers start at 1.
* @return the current line number
*/
public int getLineNumber() {
return lineNumber;
}
/** Set the current line number. This does not affect the character stream position,
* only the value returned by {@link #getLineNumber()}.
* @param lineNumber line number to set; line numbers start at 1
*/
public void setLineNumber(final int lineNumber) {
this.lineNumber = lineNumber;
}
/** Get the current column number. This indicates the column position of the
* character that will returned by the next call to {@link #readChar()}. The first
* character of each line has a column number of 1.
* @return the current column number; column numbers start at 1
*/
public int getColumnNumber() {
return columnNumber;
}
/** Set the current column number. This does not affect the character stream position,
* only the value returned by {@link #getColumnNumber()}.
* @param column the column number to set; column numbers start at 1
*/
public void setColumnNumber(final int column) {
this.columnNumber = column;
}
/** Get the maximum length for strings returned by this instance. Operations
* that produce strings longer than this length will throw an exception.
* @return maximum length for strings returned by this instance
*/
public int getMaxStringLength() {
return maxStringLength;
}
/** Set the maximum length for strings returned by this instance. Operations
* that produce strings longer than this length will throw an exception.
* @param maxStringLength maximum length for strings returned by this instance
* @throws IllegalArgumentException if the argument is less than zero
*/
public void setMaxStringLength(final int maxStringLength) {
if (maxStringLength < 0) {
throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " +
maxStringLength);
}
this.maxStringLength = maxStringLength;
}
/** Get the current token. This is the most recent string read by one of the {@code nextXXX()}
* methods. This value will be null if no token has yet been read or if the end of content has
* been reached.
* @return the current token
* @see #next(int)
* @see #next(IntPredicate)
* @see #nextLine()
* @see #nextAlphanumeric()
*/
public String getCurrentToken() {
return currentToken;
}
/** Return true if the current token is not null or empty.
* @return true if the current token is not null or empty
* @see #getCurrentToken()
*/
public boolean hasNonEmptyToken() {
return currentToken != null && !currentToken.isEmpty();
}
/** Get the line number that the current token started on. This value will
* be -1 if no token has been read yet.
* @return current token starting line number or -1 if no token has been
* read yet
* @see #getCurrentToken()
*/
public int getCurrentTokenLineNumber() {
return currentTokenLineNumber;
}
/** Get the column position that the current token started on. This value will
* be -1 if no token has been read yet.
* @return current token column number or -1 if no oken has been read yet
* @see #getCurrentToken()
*/
public int getCurrentTokenColumnNumber() {
return currentTokenColumnNumber;
}
/** Get the current token parsed as an integer.
* @return the current token parsed as an integer
* @throws IllegalStateException if no token has been read or the
* current token cannot be parsed as an integer
*/
public int getCurrentTokenAsInt() {
ensureHasSetToken();
Throwable cause = null;
if (currentToken != null) {
try {
return Integer.parseInt(currentToken);
} catch (NumberFormatException exc) {
cause = exc;
}
}
throw unexpectedToken("integer", cause);
}
/** Get the current token parsed as a double.
* @return the current token parsed as a double
* @throws IllegalStateException if no token has been read or the
* current token cannot be parsed as a double
*/
public double getCurrentTokenAsDouble() {
ensureHasSetToken();
Throwable cause = null;
if (currentToken != null) {
try {
return Double.parseDouble(currentToken);
} catch (NumberFormatException exc) {
cause = exc;
}
}
throw unexpectedToken("double", cause);
}
/** Return true if there are more characters to read from this instance.
* @return true if there are more characters to read from this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public boolean hasMoreCharacters() {
return buffer.hasMoreCharacters();
}
/** Return true if there are more characters to read on the current line.
* @return true if there are more characters to read on the current line
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public boolean hasMoreCharactersOnLine() {
return hasMoreCharacters() && isNotNewLinePart(peekChar());
}
/** Read and return the next character in the stream and advance the parser position.
* This method updates the current line number and column number but does <strong>not</strong>
* set the {@link #getCurrentToken() current token}.
* @return the next character in the stream or -1 if the end of the stream has been
* reached
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #peekChar()
*/
public int readChar() {
final int value = buffer.read();
if (value == LF ||
(value == CR && peekChar() != LF)) {
++lineNumber;
columnNumber = 1;
} else if (value != EOF) {
++columnNumber;
}
return value;
}
/** Read a string containing at most {@code len} characters from the stream and
* set it as the current token. Characters are added to the string until the string
* has the specified length or the end of the stream is reached. The characters are
* consumed from the stream. The token is set to null if no more characters are available
* from the character stream when this method is called.
* @param len the maximum length of the extracted string
* @return this instance
* @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
* configured {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
* @see #consume(int, IntConsumer)
*/
public SimpleTextParser next(final int len) {
validateRequestedStringLength(len);
final int line = getLineNumber();
final int col = getColumnNumber();
String token = null;
if (hasMoreCharacters()) {
final StringBuilder sb = new StringBuilder(len);
consume(len, ch -> sb.append((char) ch));
token = sb.toString();
}
setToken(line, col, token);
return this;
}
/** Read a string containing at most {@code len} characters from the stream and
* set it as the current token. This is similar to {@link #next(int)} but with the exception
* that new line sequences beginning with {@code lineContinuationChar} are skipped.
* @param lineContinuationChar character used to indicate skipped new line sequences
* @param len the maximum length of the extracted string
* @return this instance
* @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
* configured {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
* @see #consumeWithLineContinuation(char, int, IntConsumer)
*/
public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) {
validateRequestedStringLength(len);
final int line = getLineNumber();
final int col = getColumnNumber();
String token = null;
if (hasMoreCharacters()) {
final StringBuilder sb = new StringBuilder(len);
consumeWithLineContinuation(lineContinuationChar, len,
ch -> sb.append((char) ch));
token = sb.toString();
}
setToken(line, col, token);
return this;
}
/** Read characters from the stream while the given predicate returns true and set the result
* as the current token. The next call to {@link #readChar()} will return either a character
* that fails the predicate test or -1 if the end of the stream has been reached.
* The token will be null if the end of the stream has been reached prior to the method call.
* @param pred predicate function passed characters read from the input; reading continues
* until the predicate returns false
* @return this instance
* @throws IllegalStateException if the length of the produced string exceeds the configured
* {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
* @see #consume(IntPredicate, IntConsumer)
*/
public SimpleTextParser next(final IntPredicate pred) {
final int line = getLineNumber();
final int col = getColumnNumber();
String token = null;
if (hasMoreCharacters()) {
final StringCollector collector = new StringCollector(line, col, pred);
consume(collector, collector);
token = collector.getString();
}
setToken(line, col, token);
return this;
}
/** Read characters from the stream while the given predicate returns true and set the result
* as the current token. This is similar to {@link #next(IntPredicate)} but with the exception
* that new line sequences prefixed with {@code lineContinuationChar} are skipped.
* @param lineContinuationChar character used to indicate skipped new line sequences
* @param pred predicate function passed characters read from the input; reading continues
* until the predicate returns false
* @return this instance
* @throws IllegalStateException if the length of the produced string exceeds the configured
* {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
* @see #consume(IntPredicate, IntConsumer)
*/
public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) {
final int line = getLineNumber();
final int col = getColumnNumber();
String token = null;
if (hasMoreCharacters()) {
final StringCollector collector = new StringCollector(line, col, pred);
consumeWithLineContinuation(lineContinuationChar, collector, collector);
token = collector.getString();
}
setToken(line, col, token);
return this;
}
/** Read characters from the current parser position to the next new line sequence and
* set the result as the current token . The newline character sequence
* ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token.
* The token will be null if the end of the stream has been reached prior to the method call.
* @return this instance
* @throws IllegalStateException if the length of the produced string exceeds the configured
* {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
*/
public SimpleTextParser nextLine() {
next(SimpleTextParser::isNotNewLinePart);
discardNewLineSequence();
return this;
}
/** Read a sequence of alphanumeric characters starting from the current parser position
* and set the result as the current token. The token will be the empty string if the next
* character in the stream is not alphanumeric and will be null if the end of the stream has
* been reached prior to the method call.
* @return this instance
* @throws IllegalStateException if the length of the produced string exceeds the configured
* {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
*/
public SimpleTextParser nextAlphanumeric() {
return next(SimpleTextParser::isAlphanumeric);
}
/** Discard {@code len} number of characters from the character stream. The
* parser position is updated but the current token is not changed.
* @param len number of characters to discard
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discard(final int len) {
return consume(len, NOOP_CONSUMER);
}
/** Discard {@code len} number of characters from the character stream. The
* parser position is updated but the current token is not changed. Lines beginning
* with {@code lineContinuationChar} are skipped.
* @param lineContinuationChar character used to indicate skipped new line sequences
* @param len number of characters to discard
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
final int len) {
return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER);
}
/** Discard characters from the stream while the given predicate returns true. The next call
* to {@link #readChar()} will return either a character that fails the predicate test or -1
* if the end of the stream has been reached. The parser position is updated but the current
* token is not changed.
* @param pred predicate test for characters to discard
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discard(final IntPredicate pred) {
return consume(pred, NOOP_CONSUMER);
}
/** Discard characters from the stream while the given predicate returns true. New line sequences
* beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()}
* will return either a character that fails the predicate test or -1 if the end of the stream
* has been reached. The parser position is updated but the current token is not changed.
* @param lineContinuationChar character used to indicate skipped new line sequences
* @param pred predicate test for characters to discard
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
final IntPredicate pred) {
return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER);
}
/** Discard a sequence of whitespace characters from the character stream starting from the
* current parser position. The next call to {@link #readChar()} will return either a non-whitespace
* character or -1 if the end of the stream has been reached. The parser position is updated
* but the current token is not changed.
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discardWhitespace() {
return discard(SimpleTextParser::isWhitespace);
}
/** Discard the next whitespace characters on the current line. The next call to
* {@link #readChar()} will return either a non-whitespace character on the current line,
* the newline character sequence (indicating the end of the line), or -1 (indicating the
* end of the stream). The parser position is updated but the current token is not changed.
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discardLineWhitespace() {
return discard(SimpleTextParser::isLineWhitespace);
}
/** Discard the newline character sequence at the current reader position. The sequence
* is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned
* at a newline sequence. The parser position is updated but the current token is not changed.
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discardNewLineSequence() {
final int value = peekChar();
if (value == LF) {
readChar();
} else if (value == CR) {
readChar();
if (peekChar() == LF) {
readChar();
}
}
return this;
}
/** Discard all remaining characters on the current line, including the terminating
* newline character sequence. The next call to {@link #readChar()} will return either the
* first character on the next line or -1 if the end of the stream has been reached.
* The parser position is updated but the current token is not changed.
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser discardLine() {
discard(SimpleTextParser::isNotNewLinePart);
discardNewLineSequence();
return this;
}
/** Consume characters from the stream and pass them to {@code consumer} while the given predicate
* returns true. The operation ends when the predicate returns false or the end of the stream is
* reached.
* @param pred predicate test for characters to consume
* @param consumer object to be passed each consumed character
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) {
int ch;
while ((ch = peekChar()) != EOF && pred.test(ch)) {
consumer.accept(readChar());
}
return this;
}
/** Consume at most {@code len} characters from the stream, passing each to the given consumer.
* This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line
* sequences prefixed with {@code lineContinuationChar} are skipped.
* @param lineContinuationChar character used to indicate skipped new line sequences
* @param len number of characters to consume
* @param consumer function to be passed each consumed character
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
final int len, final IntConsumer consumer) {
int i = -1;
int ch;
while (++i < len && (ch = readChar()) != EOF) {
if (ch == lineContinuationChar && isNewLinePart(peekChar())) {
--i; // don't count the continuation char toward the total length
discardNewLineSequence();
} else {
consumer.accept(ch);
}
}
return this;
}
/** Consume at most {@code len} characters from the stream, passing each to the given consumer.
* The operation continues until {@code len} number of characters have been read or the end of
* the stream has been reached.
* @param len number of characters to consume
* @param consumer object to be passed each consumed character
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser consume(final int len, final IntConsumer consumer) {
int ch;
for (int i = 0; i < len; ++i) {
ch = readChar();
if (ch != EOF) {
consumer.accept(ch);
} else {
break;
}
}
return this;
}
/** Consume characters from the stream and pass them to {@code consumer} while the given predicate
* returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the
* exception that new lines sequences beginning with {@code lineContinuationChar} are skipped.
* @param lineContinuationChar character used to indicate skipped new line sequences
* @param pred predicate test for characters to consume
* @param consumer object to be passed each consumed character
* @return this instance
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
final IntPredicate pred, final IntConsumer consumer) {
int ch;
while ((ch = peekChar()) != EOF) {
if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) {
readChar();
discardNewLineSequence();
} else if (pred.test(ch)) {
consumer.accept(readChar());
} else {
break;
}
}
return this;
}
/** Return the next character in the stream but do not advance the parser position.
* @return the next character in the stream or -1 if the end of the stream has been
* reached
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #readChar()
*/
public int peekChar() {
return buffer.peek();
}
/** Return a string containing containing at most {@code len} characters from the stream but
* without changing the parser position. Characters are added to the string until the
* string has the specified length or the end of the stream is reached.
* @param len the maximum length of the returned string
* @return a string containing containing at most {@code len} characters from the stream
* or null if the parser has already reached the end of the stream
* @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
* configured {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #next(int)
*/
public String peek(final int len) {
validateRequestedStringLength(len);
return buffer.peekString(len);
}
/** Read characters from the stream while the given predicate returns true but do not
* change the current token or advance the parser position.
* @param pred predicate function passed characters read from the input; reading continues
* until the predicate returns false
* @return string containing characters matching {@code pred} or null if the parser has already
* reached the end of the stream
* @throws IllegalStateException if the length of the produced string exceeds the configured
* {@link #getMaxStringLength() maximum string length}
* @throws java.io.UncheckedIOException if an I/O error occurs
* @see #getCurrentToken()
*/
public String peek(final IntPredicate pred) {
String token = null;
if (hasMoreCharacters()) {
final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred);
int i = -1;
int ch = buffer.charAt(++i);
while (ch != EOF && collector.test(ch)) {
collector.accept(ch);
ch = buffer.charAt(++i);
}
token = collector.getString();
}
return token;
}
/** Compare the {@link #getCurrentToken() current token} with the argument and throw an
* exception if they are not equal. The comparison is case-sensitive.
* @param expected expected token
* @return this instance
* @throws IllegalStateException if no token has been read or {@code expected} does not exactly
* equal the current token
*/
public SimpleTextParser match(final String expected) {
matchInternal(expected, true, true);
return this;
}
/** Compare the {@link #getCurrentToken() current token} with the argument and throw an
* exception if they are not equal. The comparison is <em>not</em> case-sensitive.
* @param expected expected token
* @return this instance
* @throws IllegalStateException if no token has been read or {@code expected} does not equal
* the current token (ignoring case)
*/
public SimpleTextParser matchIgnoreCase(final String expected) {
matchInternal(expected, false, true);
return this;
}
/** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
* The comparison is case-sensitive.
* @param expected expected token
* @return true if the argument exactly equals the current token
* @throws IllegalStateException if no token has been read
* @throws java.io.UncheckedIOException if an I/O error occurs
*/
public boolean tryMatch(final String expected) {
return matchInternal(expected, true, false);
}
/** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
* The comparison is <em>not</em> case-sensitive.
* @param expected expected token
* @return true if the argument equals the current token (ignoring case)
* @throws IllegalStateException if no token has been read
*/
public boolean tryMatchIgnoreCase(final String expected) {
return matchInternal(expected, false, false);
}
/** Internal method to compare the current token with the argument.
* @param expected expected token
* @param caseSensitive if the comparison should be case-sensitive
* @param throwOnFailure if an exception should be thrown if the argument is not
* equal to the current token
* @return true if the argument is equal to the current token
* @throws IllegalStateException if no token has been read or {@code expected} does not match the
* current token and {@code throwOnFailure} is true
*/
private boolean matchInternal(final String expected, final boolean caseSensitive,
final boolean throwOnFailure) {
ensureHasSetToken();
if (!stringsEqual(expected, currentToken, caseSensitive)) {
if (throwOnFailure) {
throw unexpectedToken("[" + expected + "]");
}
return false;
}
return true;
}
/** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
* An exception is thrown if no match is found. String comparisons are case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that exactly matches the current token
* @throws IllegalStateException if no token has been read or no match is found among the arguments
*/
public int choose(final String... expected) {
return choose(Arrays.asList(expected));
}
/** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
* An exception is thrown if no match is found. String comparisons are case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that exactly matches the current token
* @throws IllegalStateException if no token has been read or no match is found among the arguments
*/
public int choose(final List<String> expected) {
return chooseInternal(expected, true, true);
}
/** Return the index of the argument that matches the {@link #getCurrentToken() current token},
* ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
* case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that matches the current token (ignoring case)
* @throws IllegalStateException if no token has been read or no match is found among the arguments
*/
public int chooseIgnoreCase(final String... expected) {
return chooseIgnoreCase(Arrays.asList(expected));
}
/** Return the index of the argument that matches the {@link #getCurrentToken() current token},
* ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
* case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that matches the current token (ignoring case)
* @throws IllegalStateException if no token has been read or no match is found among the arguments
*/
public int chooseIgnoreCase(final List<String> expected) {
return chooseInternal(expected, false, true);
}
/** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
* or -1 if no match is found. String comparisons are case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that exactly matches the current token or -1 if
* no match is found
* @throws IllegalStateException if no token has been read
*/
public int tryChoose(final String... expected) {
return tryChoose(Arrays.asList(expected));
}
/** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
* or -1 if no match is found. String comparisons are case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that exactly matches the current token or -1 if
* no match is found
* @throws IllegalStateException if no token has been read
*/
public int tryChoose(final List<String> expected) {
return chooseInternal(expected, true, false);
}
/** Return the index of the argument that matches the {@link #getCurrentToken() current token}
* or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that matches the current token (ignoring case) or -1 if
* no match is found
* @throws IllegalStateException if no token has been read
*/
public int tryChooseIgnoreCase(final String... expected) {
return tryChooseIgnoreCase(Arrays.asList(expected));
}
/** Return the index of the argument that matches the {@link #getCurrentToken() current token}
* or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
* @param expected strings to compare with the current token
* @return index of the argument that matches the current token (ignoring case) or -1 if
* no match is found
* @throws IllegalStateException if no token has been read
*/
public int tryChooseIgnoreCase(final List<String> expected) {
return chooseInternal(expected, false, false);
}
/** Internal method to compare the current token with a list of possible strings. The index of
* the matching argument is returned.
* @param expected strings to compare with the current token
* @param caseSensitive if the comparisons should be case-sensitive
* @param throwOnFailure if an exception should be thrown if no match is found
* @return the index of the matching argument or -1 if no match is found
* @throws IllegalStateException if no token has been read or no match is found and
* {@code throwOnFailure} is true
*/
private int chooseInternal(final List<String> expected, final boolean caseSensitive,
final boolean throwOnFailure) {
ensureHasSetToken();
int i = 0;
for (final String str : expected) {
if (stringsEqual(str, currentToken, caseSensitive)) {
return i;
}
++i;
}
if (throwOnFailure) {
throw unexpectedToken("one of " + expected);
}
return -1;
}
/** Get an exception indicating that the current token was unexpected. The returned
* exception contains a message with the line number and column of the current token and
* a description of its value.
* @param expected string describing what was expected
* @return exception indicating that the current token was unexpected
*/
public IllegalStateException unexpectedToken(final String expected) {
return unexpectedToken(expected, null);
}
/** Get an exception indicating that the current token was unexpected. The returned
* exception contains a message with the line number and column of the current token and
* a description of its value.
* @param expected string describing what was expected
* @param cause cause of the error
* @return exception indicating that the current token was unexpected
*/
public IllegalStateException unexpectedToken(final String expected, final Throwable cause) {
StringBuilder msg = new StringBuilder();
msg.append("expected ")
.append(expected)
.append(" but found ")
.append(getCurrentTokenDescription());
final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
return parseError(line, col, msg.toString(), cause);
}
/** Get an exception indicating an error during parsing at the current token position.
* @param msg error message
* @return an exception indicating an error during parsing at the current token position
*/
public IllegalStateException tokenError(final String msg) {
return tokenError(msg, null);
}
/** Get an exception indicating an error during parsing at the current token position.
* @param msg error message
* @param cause the cause of the error; may be null
* @return an exception indicating an error during parsing at the current token position
*/
public IllegalStateException tokenError(final String msg, final Throwable cause) {
final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
return parseError(line, col, msg, cause);
}
/** Return an exception indicating an error occurring at the current parser position.
* @param msg error message
* @return an exception indicating an error during parsing
*/
public IllegalStateException parseError(final String msg) {
return parseError(msg, null);
}
/** Return an exception indicating an error occurring at the current parser position.
* @param msg error message
* @param cause the cause of the error; may be null
* @return an exception indicating an error during parsing
*/
public IllegalStateException parseError(final String msg, final Throwable cause) {
return parseError(lineNumber, columnNumber, msg, cause);
}
/** Return an exception indicating an error during parsing.
* @param line line number of the error
* @param col column number of the error
* @param msg error message
* @return an exception indicating an error during parsing
*/
public IllegalStateException parseError(final int line, final int col, final String msg) {
return parseError(line, col, msg, null);
}
/** Return an exception indicating an error during parsing.
* @param line line number of the error
* @param col column number of the error
* @param msg error message
* @param cause the cause of the error
* @return an exception indicating an error during parsing
*/
public IllegalStateException parseError(final int line, final int col, final String msg,
final Throwable cause) {
final String fullMsg = String.format("Parsing failed at line %d, column %d: %s",
line, col, msg);
return GeometryIOUtils.parseError(fullMsg, cause);
}
/** Set the current token string and position.
* @param line line number for the start of the token
* @param col column number for the start of the token
* @param token token to set
*/
private void setToken(final int line, final int col, final String token) {
currentTokenLineNumber = line;
currentTokenColumnNumber = col;
currentToken = token;
hasSetToken = true;
}
/** Get a user-friendly description of the current token.
* @return a user-friendly description of the current token.
*/
private String getCurrentTokenDescription() {
if (currentToken == null || currentToken.isEmpty()) {
// attempt to return a more helpful message about the location
// of empty tokens by checking the buffer content; if this fails
// we'll ignore the error and continue with a more generic message
try {
if (!hasMoreCharacters()) {
return "end of content";
} else if (currentToken != null) {
if (!hasMoreCharactersOnLine()) {
return "end of line";
}
return "empty token followed by [" + peek(1) + "]";
}
} catch (IllegalStateException exc) {
// ignore
}
}
if (currentToken == null) {
return "no current token";
} else if (currentToken.isEmpty()) {
return "empty token";
}
return "[" + currentToken + "]";
}
/** Validate the requested string length.
* @param len requested string length
* @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength}
*/
private void validateRequestedStringLength(final int len) {
if (len < 0) {
throw new IllegalArgumentException("Requested string length cannot be negative; was " + len);
} else if (len > maxStringLength) {
throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " +
maxStringLength);
}
}
/** Ensure that a token read operation has been performed, throwing an exception if not.
* @throws IllegalStateException if no token read operation has been performed
*/
private void ensureHasSetToken() {
if (!hasSetToken) {
throw new IllegalStateException("No token has been read from the character stream");
}
}
/** Return true if the given character (Unicode code point) is whitespace.
* @param ch character (Unicode code point) to test
* @return true if the given character is whitespace
* @see Character#isWhitespace(int)
*/
public static boolean isWhitespace(final int ch) {
return Character.isWhitespace(ch);
}
/** Return true if the given character (Unicode code point) is not whitespace.
* @param ch character (Unicode code point) to test
* @return true if the given character is not whitespace
* @see #isWhitespace(int)
*/
public static boolean isNotWhitespace(final int ch) {
return !isWhitespace(ch);
}
/** Return true if the given character (Unicode code point) is whitespace
* that is not used in newline sequences (ie, not '\r' or '\n').
* @param ch character (Unicode code point) to test
* @return true if the given character is a whitespace character not used in newline
* sequences
*/
public static boolean isLineWhitespace(final int ch) {
return isWhitespace(ch) && isNotNewLinePart(ch);
}
/** Return true if the given character (Unicode code point) is used
* as part of newline sequences (ie, is either '\r' or '\n').
* @param ch character (Unicode code point) to test
* @return true if the given character is used as part of newline sequences
*/
public static boolean isNewLinePart(final int ch) {
return ch == CR || ch == LF;
}
/** Return true if the given character (Unicode code point) is not used as
* part of newline sequences (ie, not '\r' or '\n').
* @param ch character (Unicode code point) to test
* @return true if the given character is not used as part of newline sequences
* @see #isNewLinePart(int)
*/
public static boolean isNotNewLinePart(final int ch) {
return !isNewLinePart(ch);
}
/** Return true if the given character (Unicode code point) is alphanumeric.
* @param ch character (Unicode code point) to test
* @return true if the argument is alphanumeric
* @see Character#isAlphabetic(int)
* @see Character#isDigit(int)
*/
public static boolean isAlphanumeric(final int ch) {
return Character.isAlphabetic(ch) ||
Character.isDigit(ch);
}
/** Return true if the given character (Unicode code point) is not alphanumeric.
* @param ch character (Unicode code point) to test
* @return true if the argument is not alphanumeric
* @see #isAlphanumeric(int)
*/
public static boolean isNotAlphanumeric(final int ch) {
return !isAlphanumeric(ch);
}
/** Return true if the given character (Unicode code point) can be used as part of
* the string representation of an integer. This will be true for the following types
* of characters:
* <ul>
* <li>{@link Character#isDigit(int) digits}</li>
* <li>the '-' (minus) character</li>
* <li>the '+' (plus) character</li>
* </ul>
* @param ch character (Unicode code point) to test
* @return true if the given character can be used as part of an integer string
*/
public static boolean isIntegerPart(final int ch) {
return Character.isDigit(ch) ||
ch == '-' ||
ch == '+';
}
/** Return true if the given character (Unicode code point) can be used as part of
* the string representation of a decimal number. This will be true for the following types
* of characters:
* <ul>
* <li>{@link Character#isDigit(int) digits}</li>
* <li>the '-' (minus) character</li>
* <li>the '+' (plus) character</li>
* <li>the '.' (period) character</li>
* <li>the 'e' character</li>
* <li>the 'E' character</li>
* </ul>
* @param ch character (Unicode code point) to test
* @return true if the given character can be used as part of a decimal number string
*/
public static boolean isDecimalPart(final int ch) {
return Character.isDigit(ch) ||
ch == '-' ||
ch == '+' ||
ch == '.' ||
ch == 'e' ||
ch == 'E';
}
/** Test two strings for equality. One or both arguments may be null.
* @param a first string
* @param b second string
* @param caseSensitive comparison is case-sensitive if set to true
* @return true if the string arguments are considered equal
*/
private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) {
if (a == null) {
return b == null;
}
return caseSensitive ?
a.equals(b) :
a.equalsIgnoreCase(b);
}
/** Internal class used to collect strings from the character stream while ensuring that the
* collected strings do not exceed the maximum configured string length.
*/
private final class StringCollector implements IntPredicate, IntConsumer {
/** String builder instance. */
private final StringBuilder sb = new StringBuilder();
/** Start position line. */
private final int line;
/** Start position column. */
private final int col;
/** Character predicate. */
private final IntPredicate pred;
/** Construct a new instance with the given start position and character predicate.
* @param line start position line
* @param col start position col
* @param pred character predicate
*/
StringCollector(final int line, final int col, final IntPredicate pred) {
this.line = line;
this.col = col;
this.pred = pred;
}
/** {@inheritDoc} */
@Override
public boolean test(final int value) {
return pred.test(value) && !hasExceededMaxStringLength();
}
/** {@inheritDoc} */
@Override
public void accept(final int value) {
sb.append((char) value);
}
/** Get the string collected by this instance.
* @return the string collected by this instance
* @throws IllegalStateException if the string exceeds the maximum configured length
*/
public String getString() {
if (hasExceededMaxStringLength()) {
throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength);
}
return sb.toString();
}
/** Return true if this collector has exceeded the maximum configured string length.
* @return true if this collector has exceeded the maximum string length
*/
private boolean hasExceededMaxStringLength() {
return sb.length() > maxStringLength;
}
}
}