View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.compress.archivers.zip;
21  
22  import java.io.IOException;
23  import java.nio.ByteBuffer;
24  
25  /**
26   * An interface for encoders that do a pretty encoding of ZIP file names.
27   *
28   * <p>
29   * There are mostly two implementations, one that uses java.nio {@link java.nio.charset.Charset Charset} and one implementation, which copes with simple 8 bit
30   * charsets, because java-1.4 did not support Cp437 in java.nio.
31   * </p>
32   *
33   * <p>
34   * The main reason for defining an own encoding layer comes from the problems with {@link String#getBytes(String) String.getBytes}, which encodes
35   * unknown characters as ASCII quotation marks ('?'). Quotation marks are per definition an invalid file name on some operating systems like Windows, which
36   * leads to ignored ZIP entries.
37   * </p>
38   *
39   * <p>
40   * All implementations should implement this interface in a reentrant way.
41   * </p>
42   */
43  public interface ZipEncoding {
44      /**
45       * Check, whether the given string may be losslessly encoded using this encoding.
46       *
47       * @param name A file name or ZIP comment.
48       * @return Whether the given name may be encoded without any losses.
49       */
50      boolean canEncode(String name);
51  
52      /**
53       * @param data The byte values to decode.
54       * @return The decoded string.
55       * @throws IOException on error
56       */
57      String decode(byte[] data) throws IOException;
58  
59      /**
60       * Encode a file name or a comment to a byte array suitable for storing it to a serialized ZIP entry.
61       *
62       * <p>
63       * Examples for CP 437 (in pseudo-notation, right hand side is C-style notation):
64       * </p>
65       *
66       * <pre>
67       *  encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
68       *  encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
69       * </pre>
70       *
71       * @param name A file name or ZIP comment.
72       * @return A byte buffer with a backing array containing the encoded name. Unmappable characters or malformed character sequences are mapped to a sequence
73       *         of utf-16 words encoded in the format {@code %Uxxxx}. It is assumed, that the byte buffer is positioned at the beginning of the encoded result,
74       *         the byte buffer has a backing array and the limit of the byte buffer points to the end of the encoded result.
75       * @throws IOException on error
76       */
77      ByteBuffer encode(String name) throws IOException;
78  }