View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.compress.archivers.zip;
21  
22  import java.io.IOException;
23  import java.nio.ByteBuffer;
24  import java.nio.CharBuffer;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetDecoder;
27  import java.nio.charset.CharsetEncoder;
28  import java.nio.charset.CoderResult;
29  import java.nio.charset.CodingErrorAction;
30  
31  /**
32   * A ZipEncoding, which uses a {@link Charset} to encode names.
33   * <p>
34   * The methods of this class are reentrant.
35   * </p>
36   *
37   * @Immutable
38   */
39  final class NioZipEncoding implements ZipEncoding, CharsetAccessor {
40  
41      private static final char REPLACEMENT = '?';
42      private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
43      private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
44      private static final char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
45  
46      private static ByteBuffer encodeFully(final CharsetEncoder enc, final CharBuffer cb, final ByteBuffer out) {
47          ByteBuffer o = out;
48          while (cb.hasRemaining()) {
49              final CoderResult result = enc.encode(cb, o, false);
50              if (result.isOverflow()) {
51                  final int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
52                  o = ZipEncodingHelper.growBufferBy(o, increment);
53              }
54          }
55          return o;
56      }
57  
58      private static CharBuffer encodeSurrogate(final CharBuffer cb, final char c) {
59          cb.position(0).limit(6);
60          cb.put('%');
61          cb.put('U');
62  
63          cb.put(HEX_CHARS[c >> 12 & 0x0f]);
64          cb.put(HEX_CHARS[c >> 8 & 0x0f]);
65          cb.put(HEX_CHARS[c >> 4 & 0x0f]);
66          cb.put(HEX_CHARS[c & 0x0f]);
67          cb.flip();
68          return cb;
69      }
70  
71      /**
72       * Estimate the size needed for remaining characters
73       *
74       * @param enc       encoder to use for estimates
75       * @param charCount number of characters remaining
76       * @return estimated size in bytes.
77       */
78      private static int estimateIncrementalEncodingSize(final CharsetEncoder enc, final int charCount) {
79          return (int) Math.ceil(charCount * enc.averageBytesPerChar());
80      }
81  
82      /**
83       * Estimate the initial encoded size (in bytes) for a character buffer.
84       * <p>
85       * The estimate assumes that one character consumes uses the maximum length encoding, whilst the rest use an average size encoding. This accounts for any
86       * BOM for UTF-16, at the expense of a couple of extra bytes for UTF-8 encoded ASCII.
87       * </p>
88       *
89       * @param enc        encoder to use for estimates
90       * @param charChount number of characters in string
91       * @return estimated size in bytes.
92       */
93      private static int estimateInitialBufferSize(final CharsetEncoder enc, final int charChount) {
94          final float first = enc.maxBytesPerChar();
95          final float rest = (charChount - 1) * enc.averageBytesPerChar();
96          return (int) Math.ceil(first + rest);
97      }
98  
99      private final Charset charset;
100 
101     private final boolean useReplacement;
102 
103     /**
104      * Constructs an NioZipEncoding using the given charset.
105      *
106      * @param charset        The character set to use.
107      */
108     NioZipEncoding(final Charset charset) {
109         this.charset = charset;
110         this.useReplacement = ZipEncodingHelper.isUTF8(charset);
111     }
112 
113     /**
114      * @see ZipEncoding#canEncode(String)
115      */
116     @Override
117     public boolean canEncode(final String name) {
118         return newEncoder().canEncode(name);
119     }
120 
121     /**
122      * @see ZipEncoding#decode(byte[])
123      */
124     @Override
125     public String decode(final byte[] data) throws IOException {
126         return newDecoder().decode(ByteBuffer.wrap(data)).toString();
127     }
128 
129     /**
130      * @see ZipEncoding#encode(String)
131      */
132     @Override
133     public ByteBuffer encode(final String name) {
134         final CharsetEncoder enc = newEncoder();
135 
136         final CharBuffer cb = CharBuffer.wrap(name);
137         CharBuffer tmp = null;
138         ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
139 
140         while (cb.hasRemaining()) {
141             final CoderResult res = enc.encode(cb, out, false);
142 
143             if (res.isUnmappable() || res.isMalformed()) {
144 
145                 // write the unmappable characters in utf-16
146                 // pseudo-URL encoding style to ByteBuffer.
147 
148                 final int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
149                 if (spaceForSurrogate > out.remaining()) {
150                     // if the destination buffer isn't oversized, assume that the presence of one
151                     // unmappable character makes it likely that there will be more. Find all the
152                     // un-encoded characters and allocate space based on those estimates.
153                     int charCount = 0;
154                     for (int i = cb.position(); i < cb.limit(); i++) {
155                         charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
156                     }
157                     final int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
158                     out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
159                 }
160                 if (tmp == null) {
161                     tmp = CharBuffer.allocate(6);
162                 }
163                 for (int i = 0; i < res.length(); ++i) {
164                     out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
165                 }
166 
167             } else if (res.isOverflow()) {
168                 final int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
169                 out = ZipEncodingHelper.growBufferBy(out, increment);
170 
171             } else if (res.isUnderflow() || res.isError()) {
172                 break;
173             }
174         }
175         // tell the encoder we are done
176         enc.encode(cb, out, true);
177         // may have caused underflow, but that's been ignored traditionally
178 
179         out.limit(out.position());
180         out.rewind();
181         return out;
182     }
183 
184     @Override
185     public Charset getCharset() {
186         return charset;
187     }
188 
189     private CharsetDecoder newDecoder() {
190         if (!useReplacement) {
191             return this.charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
192         }
193         return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
194                 .replaceWith(REPLACEMENT_STRING);
195     }
196 
197     private CharsetEncoder newEncoder() {
198         if (useReplacement) {
199             return charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
200                     .replaceWith(REPLACEMENT_BYTES);
201         }
202         return charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
203     }
204 
205 }