001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.ArrayUtils;
022import org.apache.commons.text.matcher.StringMatcherFactory;
023
024/**
025 * A matcher class that can be queried to determine if a character array
026 * portion matches.
027 * <p>
028 * This class comes complete with various factory methods.
029 * If these do not suffice, you can subclass and implement your own matcher.
030 * </p>
031 *
032 * @since 1.0
033 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
034 */
035@Deprecated
036public abstract class StrMatcher {
037
038    /**
039     * Class used to define a character for matching purposes.
040     */
041    private static final class CharMatcher extends StrMatcher {
042
043        /** The character to match. */
044        private final char ch;
045
046        /**
047         * Constructor that creates a matcher that matches a single character.
048         *
049         * @param ch  the character to match
050         */
051        private CharMatcher(final char ch) {
052            this.ch = ch;
053        }
054
055        /**
056         * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
057         *
058         * @param buffer  the text content to match against, do not change
059         * @param pos  the starting position for the match, valid for buffer
060         * @param bufferStart  the first active index in the buffer, valid for buffer
061         * @param bufferEnd  the end index of the active buffer, valid for buffer
062         * @return The number of matching characters, or zero if there is no match
063         */
064        @Override
065        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
066            return ch == buffer[pos] ? 1 : 0;
067        }
068    }
069
070    /**
071     * Class used to define a set of characters for matching purposes.
072     */
073    private static final class CharSetMatcher extends StrMatcher {
074
075        /** The set of characters to match. */
076        private final char[] chars;
077
078        /**
079         * Constructor that creates a matcher from a character array.
080         *
081         * @param chars  the characters to match, must not be null
082         */
083        private CharSetMatcher(final char[] chars) {
084            this.chars = chars.clone();
085            Arrays.sort(this.chars);
086        }
087
088        /**
089         * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
090         *
091         * @param buffer  the text content to match against, do not change
092         * @param pos  the starting position for the match, valid for buffer
093         * @param bufferStart  the first active index in the buffer, valid for buffer
094         * @param bufferEnd  the end index of the active buffer, valid for buffer
095         * @return The number of matching characters, or zero if there is no match
096         */
097        @Override
098        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
099            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
100        }
101    }
102
103    /**
104     * Class used to match no characters.
105     */
106    private static final class NoMatcher extends StrMatcher {
107
108        /**
109         * Constructs a new instance of {@code NoMatcher}.
110         */
111        private NoMatcher() {
112        }
113
114        /**
115         * Always returns {@code 0}.
116         *
117         * @param buffer  the text content to match against, do not change
118         * @param pos  the starting position for the match, valid for buffer
119         * @param bufferStart  the first active index in the buffer, valid for buffer
120         * @param bufferEnd  the end index of the active buffer, valid for buffer
121         * @return The number of matching characters, or zero if there is no match
122         */
123        @Override
124        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
125            return 0;
126        }
127    }
128
129    /**
130     * Class used to define a set of characters for matching purposes.
131     */
132    private static final class StringMatcher extends StrMatcher {
133
134        /** The string to match, as a character array. */
135        private final char[] chars;
136
137        /**
138         * Constructor that creates a matcher from a String.
139         *
140         * @param str  the string to match, must not be null
141         */
142        private StringMatcher(final String str) {
143            chars = str.toCharArray();
144        }
145
146        /**
147         * Returns the number of matching characters, or zero if there is no match.
148         *
149         * @param buffer  the text content to match against, do not change
150         * @param pos  the starting position for the match, valid for buffer
151         * @param bufferStart  the first active index in the buffer, valid for buffer
152         * @param bufferEnd  the end index of the active buffer, valid for buffer
153         * @return The number of matching characters, or zero if there is no match
154         */
155        @Override
156        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
157            final int len = chars.length;
158            if (pos + len > bufferEnd) {
159                return 0;
160            }
161            for (int i = 0; i < chars.length; i++, pos++) {
162                if (chars[i] != buffer[pos]) {
163                    return 0;
164                }
165            }
166            return len;
167        }
168
169        @Override
170        public String toString() {
171            return super.toString() + ' ' + Arrays.toString(chars);
172        }
173
174    }
175
176    /**
177     * Class used to match whitespace as per trim().
178     */
179    private static final class TrimMatcher extends StrMatcher {
180
181        /**
182         * Constructs a new instance of {@code TrimMatcher}.
183         */
184        private TrimMatcher() {
185        }
186
187        /**
188         * Returns whether or not the given character matches.
189         *
190         * @param buffer  the text content to match against, do not change
191         * @param pos  the starting position for the match, valid for buffer
192         * @param bufferStart  the first active index in the buffer, valid for buffer
193         * @param bufferEnd  the end index of the active buffer, valid for buffer
194         * @return The number of matching characters, or zero if there is no match
195         */
196        @Override
197        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
198            return buffer[pos] <= 32 ? 1 : 0;
199        }
200    }
201
202    /**
203     * Matches the comma character.
204     */
205    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
206
207    /**
208     * Matches the tab character.
209     */
210    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
211
212    /**
213     * Matches the space character.
214     */
215    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
216
217    /**
218     * Matches the same characters as StringTokenizer,
219     * namely space, tab, newline, form feed.
220     */
221    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
222
223    /**
224     * Matches the String trim() whitespace characters.
225     */
226    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
227
228    /**
229     * Matches the double quote character.
230     */
231    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
232
233    /**
234     * Matches the double quote character.
235     */
236    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
237
238    /**
239     * Matches the single or double quote character.
240     */
241    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
242
243    /**
244     * Matches no characters.
245     */
246    private static final StrMatcher NONE_MATCHER = new NoMatcher();
247
248    /**
249     * Creates a matcher from a character.
250     *
251     * @param ch  the character to match, must not be null
252     * @return a new Matcher for the given char
253     */
254    public static StrMatcher charMatcher(final char ch) {
255        return new CharMatcher(ch);
256    }
257
258    /**
259     * Creates a matcher from a set of characters.
260     *
261     * @param chars  the characters to match, null or empty matches nothing
262     * @return a new matcher for the given char[]
263     */
264    public static StrMatcher charSetMatcher(final char... chars) {
265        if (ArrayUtils.isEmpty(chars)) {
266            return NONE_MATCHER;
267        }
268        if (chars.length == 1) {
269            return new CharMatcher(chars[0]);
270        }
271        return new CharSetMatcher(chars);
272    }
273
274    /**
275     * Creates a matcher from a string representing a set of characters.
276     *
277     * @param chars  the characters to match, null or empty matches nothing
278     * @return a new Matcher for the given characters
279     */
280    public static StrMatcher charSetMatcher(final String chars) {
281        if (chars == null || chars.isEmpty()) {
282            return NONE_MATCHER;
283        }
284        if (chars.length() == 1) {
285            return new CharMatcher(chars.charAt(0));
286        }
287        return new CharSetMatcher(chars.toCharArray());
288    }
289
290    /**
291     * Returns a matcher which matches the comma character.
292     *
293     * @return a matcher for a comma
294     */
295    public static StrMatcher commaMatcher() {
296        return COMMA_MATCHER;
297    }
298
299    /**
300     * Returns a matcher which matches the double quote character.
301     *
302     * @return a matcher for a double quote
303     */
304    public static StrMatcher doubleQuoteMatcher() {
305        return DOUBLE_QUOTE_MATCHER;
306    }
307
308    /**
309     * Matches no characters.
310     *
311     * @return a matcher that matches nothing
312     */
313    public static StrMatcher noneMatcher() {
314        return NONE_MATCHER;
315    }
316
317    /**
318     * Returns a matcher which matches the single or double quote character.
319     *
320     * @return a matcher for a single or double quote
321     */
322    public static StrMatcher quoteMatcher() {
323        return QUOTE_MATCHER;
324    }
325
326    /**
327     * Returns a matcher which matches the single quote character.
328     *
329     * @return a matcher for a single quote
330     */
331    public static StrMatcher singleQuoteMatcher() {
332        return SINGLE_QUOTE_MATCHER;
333    }
334
335    /**
336     * Returns a matcher which matches the space character.
337     *
338     * @return a matcher for a space
339     */
340    public static StrMatcher spaceMatcher() {
341        return SPACE_MATCHER;
342    }
343
344    /**
345     * Matches the same characters as StringTokenizer,
346     * namely space, tab, newline and form feed.
347     *
348     * @return The split matcher
349     */
350    public static StrMatcher splitMatcher() {
351        return SPLIT_MATCHER;
352    }
353
354    /**
355     * Creates a matcher from a string.
356     *
357     * @param str  the string to match, null or empty matches nothing
358     * @return a new Matcher for the given String
359     */
360    public static StrMatcher stringMatcher(final String str) {
361        if (str == null || str.isEmpty()) {
362            return NONE_MATCHER;
363        }
364        return new StringMatcher(str);
365    }
366
367    /**
368     * Returns a matcher which matches the tab character.
369     *
370     * @return a matcher for a tab
371     */
372    public static StrMatcher tabMatcher() {
373        return TAB_MATCHER;
374    }
375
376    /**
377     * Matches the String trim() whitespace characters.
378     *
379     * @return The trim matcher
380     */
381    public static StrMatcher trimMatcher() {
382        return TRIM_MATCHER;
383    }
384
385    /**
386     * Constructs a new instance.
387     */
388    protected StrMatcher() {
389    }
390
391    /**
392     * Returns the number of matching characters, or zero if there is no match.
393     * <p>
394     * This method is called to check for a match.
395     * The parameter {@code pos} represents the current position to be
396     * checked in the string {@code buffer} (a character array which must
397     * not be changed).
398     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
399     * </p>
400     * <p>
401     * The matching code may check one character or many.
402     * It may check characters preceding {@code pos} as well as those after.
403     * </p>
404     * <p>
405     * It must return zero for no match, or a positive number if a match was found.
406     * The number indicates the number of characters that matched.
407     * </p>
408     *
409     * @param buffer  the text content to match against, do not change
410     * @param pos  the starting position for the match, valid for buffer
411     * @return The number of matching characters, or zero if there is no match
412     */
413    public int isMatch(final char[] buffer, final int pos) {
414        return isMatch(buffer, pos, 0, buffer.length);
415    }
416
417    /**
418     * Returns the number of matching characters, or zero if there is no match.
419     * <p>
420     * This method is called to check for a match.
421     * The parameter {@code pos} represents the current position to be
422     * checked in the string {@code buffer} (a character array which must
423     * not be changed).
424     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
425     * </p>
426     * <p>
427     * The character array may be larger than the active area to be matched.
428     * Only values in the buffer between the specified indices may be accessed.
429     * </p>
430     * <p>
431     * The matching code may check one character or many.
432     * It may check characters preceding {@code pos} as well as those
433     * after, so long as no checks exceed the bounds specified.
434     * </p>
435     * <p>
436     * It must return zero for no match, or a positive number if a match was found.
437     * The number indicates the number of characters that matched.
438     * </p>
439     *
440     * @param buffer  the text content to match against, do not change
441     * @param pos  the starting position for the match, valid for buffer
442     * @param bufferStart  the first active index in the buffer, valid for buffer
443     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
444     * @return The number of matching characters, or zero if there is no match
445     */
446    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
447
448}