1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.commons.compress.archivers.zip;
21
22 import java.io.IOException;
23 import java.nio.ByteBuffer;
24
25 /**
26 * An interface for encoders that do a pretty encoding of ZIP file names.
27 *
28 * <p>
29 * There are mostly two implementations, one that uses java.nio {@link java.nio.charset.Charset Charset} and one implementation, which copes with simple 8 bit
30 * charsets, because java-1.4 did not support Cp437 in java.nio.
31 * </p>
32 *
33 * <p>
34 * The main reason for defining an own encoding layer comes from the problems with {@link String#getBytes(String) String.getBytes}, which encodes
35 * unknown characters as ASCII quotation marks ('?'). Quotation marks are per definition an invalid file name on some operating systems like Windows, which
36 * leads to ignored ZIP entries.
37 * </p>
38 *
39 * <p>
40 * All implementations should implement this interface in a reentrant way.
41 * </p>
42 */
43 public interface ZipEncoding {
44 /**
45 * Check, whether the given string may be losslessly encoded using this encoding.
46 *
47 * @param name A file name or ZIP comment.
48 * @return Whether the given name may be encoded without any losses.
49 */
50 boolean canEncode(String name);
51
52 /**
53 * @param data The byte values to decode.
54 * @return The decoded string.
55 * @throws IOException on error
56 */
57 String decode(byte[] data) throws IOException;
58
59 /**
60 * Encode a file name or a comment to a byte array suitable for storing it to a serialized ZIP entry.
61 *
62 * <p>
63 * Examples for CP 437 (in pseudo-notation, right hand side is C-style notation):
64 * </p>
65 *
66 * <pre>
67 * encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
68 * encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
69 * </pre>
70 *
71 * @param name A file name or ZIP comment.
72 * @return A byte buffer with a backing array containing the encoded name. Unmappable characters or malformed character sequences are mapped to a sequence
73 * of utf-16 words encoded in the format {@code %Uxxxx}. It is assumed, that the byte buffer is positioned at the beginning of the encoded result,
74 * the byte buffer has a backing array and the limit of the byte buffer points to the end of the encoded result.
75 * @throws IOException on error
76 */
77 ByteBuffer encode(String name) throws IOException;
78 }