001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020package org.apache.commons.compress.archivers.zip;
021
022import java.io.IOException;
023import java.nio.ByteBuffer;
024
025/**
026 * An interface for encoders that do a pretty encoding of ZIP file names.
027 *
028 * <p>
029 * There are mostly two implementations, one that uses java.nio {@link java.nio.charset.Charset Charset} and one implementation, which copes with simple 8 bit
030 * charsets, because java-1.4 did not support Cp437 in java.nio.
031 * </p>
032 *
033 * <p>
034 * The main reason for defining an own encoding layer comes from the problems with {@link String#getBytes(String) String.getBytes}, which encodes
035 * unknown characters as ASCII quotation marks ('?'). Quotation marks are per definition an invalid file name on some operating systems like Windows, which
036 * leads to ignored ZIP entries.
037 * </p>
038 *
039 * <p>
040 * All implementations should implement this interface in a reentrant way.
041 * </p>
042 */
043public interface ZipEncoding {
044    /**
045     * Check, whether the given string may be losslessly encoded using this encoding.
046     *
047     * @param name A file name or ZIP comment.
048     * @return Whether the given name may be encoded without any losses.
049     */
050    boolean canEncode(String name);
051
052    /**
053     * @param data The byte values to decode.
054     * @return The decoded string.
055     * @throws IOException on error
056     */
057    String decode(byte[] data) throws IOException;
058
059    /**
060     * Encode a file name or a comment to a byte array suitable for storing it to a serialized ZIP entry.
061     *
062     * <p>
063     * Examples for CP 437 (in pseudo-notation, right hand side is C-style notation):
064     * </p>
065     *
066     * <pre>
067     *  encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
068     *  encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
069     * </pre>
070     *
071     * @param name A file name or ZIP comment.
072     * @return A byte buffer with a backing array containing the encoded name. Unmappable characters or malformed character sequences are mapped to a sequence
073     *         of utf-16 words encoded in the format {@code %Uxxxx}. It is assumed, that the byte buffer is positioned at the beginning of the encoded result,
074     *         the byte buffer has a backing array and the limit of the byte buffer points to the end of the encoded result.
075     * @throws IOException on error
076     */
077    ByteBuffer encode(String name) throws IOException;
078}