| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| NumericEntityUnescaper |
|
| 10.666666666666666;10.667 | ||||
| NumericEntityUnescaper$OPTION |
|
| 10.666666666666666;10.667 |
| 1 | /* | |
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
| 3 | * contributor license agreements. See the NOTICE file distributed with | |
| 4 | * this work for additional information regarding copyright ownership. | |
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
| 6 | * (the "License"); you may not use this file except in compliance with | |
| 7 | * the License. You may obtain a copy of the License at | |
| 8 | * | |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | * | |
| 11 | * Unless required by applicable law or agreed to in writing, software | |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | * See the License for the specific language governing permissions and | |
| 15 | * limitations under the License. | |
| 16 | */ | |
| 17 | package org.apache.commons.lang3.text.translate; | |
| 18 | ||
| 19 | import java.io.IOException; | |
| 20 | import java.io.Writer; | |
| 21 | import java.util.Arrays; | |
| 22 | import java.util.EnumSet; | |
| 23 | ||
| 24 | /** | |
| 25 | * Translate XML numeric entities of the form &#[xX]?\d+;? to | |
| 26 | * the specific codepoint. | |
| 27 | * | |
| 28 | * Note that the semi-colon is optional. | |
| 29 | * | |
| 30 | * @since 3.0 | |
| 31 | * @version $Id: NumericEntityUnescaper.java 1436770 2013-01-22 07:09:45Z ggregory $ | |
| 32 | */ | |
| 33 | public class NumericEntityUnescaper extends CharSequenceTranslator { | |
| 34 | ||
| 35 | 5 | public static enum OPTION { semiColonRequired, semiColonOptional, errorIfNoSemiColon } |
| 36 | ||
| 37 | // TODO?: Create an OptionsSet class to hide some of the conditional logic below | |
| 38 | private final EnumSet<OPTION> options; | |
| 39 | ||
| 40 | /** | |
| 41 | * Create a UnicodeUnescaper. | |
| 42 | * | |
| 43 | * The constructor takes a list of options, only one type of which is currently | |
| 44 | * available (whether to allow, error or ignore the semi-colon on the end of a | |
| 45 | * numeric entity to being missing). | |
| 46 | * | |
| 47 | * For example, to support numeric entities without a ';': | |
| 48 | * new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional) | |
| 49 | * and to throw an IllegalArgumentException when they're missing: | |
| 50 | * new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon) | |
| 51 | * | |
| 52 | * Note that the default behaviour is to ignore them. | |
| 53 | * | |
| 54 | * @param options to apply to this unescaper | |
| 55 | */ | |
| 56 | 8 | public NumericEntityUnescaper(final OPTION... options) { |
| 57 | 8 | if(options.length > 0) { |
| 58 | 2 | this.options = EnumSet.copyOf(Arrays.asList(options)); |
| 59 | } else { | |
| 60 | 6 | this.options = EnumSet.copyOf(Arrays.asList(new OPTION[] { OPTION.semiColonRequired })); |
| 61 | } | |
| 62 | 8 | } |
| 63 | ||
| 64 | /** | |
| 65 | * Whether the passed in option is currently set. | |
| 66 | * | |
| 67 | * @param option to check state of | |
| 68 | * @return whether the option is set | |
| 69 | */ | |
| 70 | public boolean isSet(final OPTION option) { | |
| 71 | 7 | return options == null ? false : options.contains(option); |
| 72 | } | |
| 73 | ||
| 74 | /** | |
| 75 | * {@inheritDoc} | |
| 76 | */ | |
| 77 | @Override | |
| 78 | public int translate(final CharSequence input, final int index, final Writer out) throws IOException { | |
| 79 | 131492 | final int seqEnd = input.length(); |
| 80 | // Uses -2 to ensure there is something after the &# | |
| 81 | 131492 | if(input.charAt(index) == '&' && index < seqEnd - 2 && input.charAt(index + 1) == '#') { |
| 82 | 131088 | int start = index + 2; |
| 83 | 131088 | boolean isHex = false; |
| 84 | ||
| 85 | 131088 | final char firstChar = input.charAt(start); |
| 86 | 131088 | if(firstChar == 'x' || firstChar == 'X') { |
| 87 | 131079 | start++; |
| 88 | 131079 | isHex = true; |
| 89 | ||
| 90 | // Check there's more than just an x after the &# | |
| 91 | 131079 | if(start == seqEnd) { |
| 92 | 2 | return 0; |
| 93 | } | |
| 94 | } | |
| 95 | ||
| 96 | 131086 | int end = start; |
| 97 | // Note that this supports character codes without a ; on the end | |
| 98 | 646676 | while(end < seqEnd && ( input.charAt(end) >= '0' && input.charAt(end) <= '9' || |
| 99 | input.charAt(end) >= 'a' && input.charAt(end) <= 'f' || | |
| 100 | input.charAt(end) >= 'A' && input.charAt(end) <= 'F' ) ) | |
| 101 | { | |
| 102 | 515590 | end++; |
| 103 | } | |
| 104 | ||
| 105 | 131086 | final boolean semiNext = end != seqEnd && input.charAt(end) == ';'; |
| 106 | ||
| 107 | 131086 | if(!semiNext) { |
| 108 | 5 | if(isSet(OPTION.semiColonRequired)) { |
| 109 | 3 | return 0; |
| 110 | } else | |
| 111 | 2 | if(isSet(OPTION.errorIfNoSemiColon)) { |
| 112 | 1 | throw new IllegalArgumentException("Semi-colon required at end of numeric entity"); |
| 113 | } | |
| 114 | } | |
| 115 | ||
| 116 | int entityValue; | |
| 117 | try { | |
| 118 | 131082 | if(isHex) { |
| 119 | 131075 | entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 16); |
| 120 | } else { | |
| 121 | 7 | entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10); |
| 122 | } | |
| 123 | 1 | } catch(final NumberFormatException nfe) { |
| 124 | 1 | return 0; |
| 125 | 131081 | } |
| 126 | ||
| 127 | 131081 | if(entityValue > 0xFFFF) { |
| 128 | 2 | final char[] chrs = Character.toChars(entityValue); |
| 129 | 2 | out.write(chrs[0]); |
| 130 | 2 | out.write(chrs[1]); |
| 131 | 2 | } else { |
| 132 | 131079 | out.write(entityValue); |
| 133 | } | |
| 134 | ||
| 135 | 131081 | return 2 + end - start + (isHex ? 1 : 0) + (semiNext ? 1 : 0); |
| 136 | } | |
| 137 | 404 | return 0; |
| 138 | } | |
| 139 | } |