1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.compress.archivers.zip;
21
22 import java.io.IOException;
23 import java.nio.ByteBuffer;
24 import java.nio.CharBuffer;
25 import java.nio.charset.Charset;
26 import java.nio.charset.CharsetDecoder;
27 import java.nio.charset.CharsetEncoder;
28 import java.nio.charset.CoderResult;
29 import java.nio.charset.CodingErrorAction;
30
31
32
33
34
35
36
37
38
39 final class NioZipEncoding implements ZipEncoding, CharsetAccessor {
40
41 private static final char REPLACEMENT = '?';
42 private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
43 private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
44 private static final char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
45
46 private static ByteBuffer encodeFully(final CharsetEncoder enc, final CharBuffer cb, final ByteBuffer out) {
47 ByteBuffer o = out;
48 while (cb.hasRemaining()) {
49 final CoderResult result = enc.encode(cb, o, false);
50 if (result.isOverflow()) {
51 final int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
52 o = ZipEncodingHelper.growBufferBy(o, increment);
53 }
54 }
55 return o;
56 }
57
58 private static CharBuffer encodeSurrogate(final CharBuffer cb, final char c) {
59 cb.position(0).limit(6);
60 cb.put('%');
61 cb.put('U');
62
63 cb.put(HEX_CHARS[c >> 12 & 0x0f]);
64 cb.put(HEX_CHARS[c >> 8 & 0x0f]);
65 cb.put(HEX_CHARS[c >> 4 & 0x0f]);
66 cb.put(HEX_CHARS[c & 0x0f]);
67 cb.flip();
68 return cb;
69 }
70
71
72
73
74
75
76
77
78 private static int estimateIncrementalEncodingSize(final CharsetEncoder enc, final int charCount) {
79 return (int) Math.ceil(charCount * enc.averageBytesPerChar());
80 }
81
82
83
84
85
86
87
88
89
90
91
92
93 private static int estimateInitialBufferSize(final CharsetEncoder enc, final int charChount) {
94 final float first = enc.maxBytesPerChar();
95 final float rest = (charChount - 1) * enc.averageBytesPerChar();
96 return (int) Math.ceil(first + rest);
97 }
98
99 private final Charset charset;
100
101 private final boolean useReplacement;
102
103
104
105
106
107
108 NioZipEncoding(final Charset charset) {
109 this.charset = charset;
110 this.useReplacement = ZipEncodingHelper.isUTF8(charset);
111 }
112
113
114
115
116 @Override
117 public boolean canEncode(final String name) {
118 return newEncoder().canEncode(name);
119 }
120
121
122
123
124 @Override
125 public String decode(final byte[] data) throws IOException {
126 return newDecoder().decode(ByteBuffer.wrap(data)).toString();
127 }
128
129
130
131
132 @Override
133 public ByteBuffer encode(final String name) {
134 final CharsetEncoder enc = newEncoder();
135
136 final CharBuffer cb = CharBuffer.wrap(name);
137 CharBuffer tmp = null;
138 ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
139
140 while (cb.hasRemaining()) {
141 final CoderResult res = enc.encode(cb, out, false);
142
143 if (res.isUnmappable() || res.isMalformed()) {
144
145
146
147
148 final int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
149 if (spaceForSurrogate > out.remaining()) {
150
151
152
153 int charCount = 0;
154 for (int i = cb.position(); i < cb.limit(); i++) {
155 charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
156 }
157 final int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
158 out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
159 }
160 if (tmp == null) {
161 tmp = CharBuffer.allocate(6);
162 }
163 for (int i = 0; i < res.length(); ++i) {
164 out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
165 }
166
167 } else if (res.isOverflow()) {
168 final int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
169 out = ZipEncodingHelper.growBufferBy(out, increment);
170
171 } else if (res.isUnderflow() || res.isError()) {
172 break;
173 }
174 }
175
176 enc.encode(cb, out, true);
177
178
179 out.limit(out.position());
180 out.rewind();
181 return out;
182 }
183
184 @Override
185 public Charset getCharset() {
186 return charset;
187 }
188
189 private CharsetDecoder newDecoder() {
190 if (!useReplacement) {
191 return this.charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
192 }
193 return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
194 .replaceWith(REPLACEMENT_STRING);
195 }
196
197 private CharsetEncoder newEncoder() {
198 if (useReplacement) {
199 return charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
200 .replaceWith(REPLACEMENT_BYTES);
201 }
202 return charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
203 }
204
205 }