View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertFalse;
23  import static org.junit.jupiter.api.Assertions.assertNotEquals;
24  import static org.junit.jupiter.api.Assertions.assertThrows;
25  
26  import java.io.ByteArrayInputStream;
27  import java.nio.charset.StandardCharsets;
28  import java.nio.file.Files;
29  import java.nio.file.Path;
30  import java.nio.file.Paths;
31  import java.nio.file.attribute.PosixFilePermissions;
32  import java.security.MessageDigest;
33  import java.util.ArrayList;
34  import java.util.Arrays;
35  import java.util.List;
36  import java.util.stream.Stream;
37  
38  import org.apache.commons.codec.DecoderException;
39  import org.apache.commons.codec.binary.Hex;
40  import org.apache.commons.codec.digest.GitIdentifiers.DirectoryEntry;
41  import org.junit.jupiter.api.Assumptions;
42  import org.junit.jupiter.api.Test;
43  import org.junit.jupiter.api.io.TempDir;
44  import org.junit.jupiter.params.ParameterizedTest;
45  import org.junit.jupiter.params.provider.Arguments;
46  import org.junit.jupiter.params.provider.MethodSource;
47  import org.junit.jupiter.params.provider.ValueSource;
48  
49  /**
50   * Tests {@link GitIdentifiers}.
51   */
52  class GitIdentifiersTest {
53  
54      private static final byte[] ZERO_ID = new byte[20];
55  
56      // Virtual tree:
57      //
58      // link      -> src           (symlink)
59      // link.txt  -> src/hello.txt (symlink)
60      // src/
61      //   hello.txt                (regular file)
62      //   run.sh                   (executable file)
63  
64      /** Content of {@code src/hello.txt}. */
65      private static final byte[] HELLO_CONTENT = "hello\n".getBytes(StandardCharsets.UTF_8);
66      /** SHA-1 blob id of {@link #HELLO_CONTENT}: {@code printf 'hello\n' | git hash-object --stdin} */
67      private static final byte[] HELLO_BLOB_ID_SHA1 = hex("ce013625030ba8dba906f756967f9e9ca394464a");
68      /** SHA-256 blob id of {@link #HELLO_CONTENT}. */
69      private static final byte[] HELLO_BLOB_ID_SHA256 = hex("2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4");
70  
71      /** Content of {@code src/run.sh}. */
72      private static final byte[] RUN_CONTENT = "#!/bin/sh\n".getBytes(StandardCharsets.UTF_8);
73      /** SHA-1 blob id of {@link #RUN_CONTENT}: {@code printf '#!/bin/sh\n' | git hash-object --stdin} */
74      private static final byte[] RUN_BLOB_ID_SHA1 = hex("1a2485251c33a70432394c93fb89330ef214bfc9");
75      /** SHA-256 blob id of {@link #RUN_CONTENT}. */
76      private static final byte[] RUN_BLOB_ID_SHA256 = hex("1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1");
77  
78      /** Target of symlink {@code link}. */
79      private static final String LINK_CONTENT = "src";
80      /** SHA-1 blob id of the symlink target {@link #LINK_CONTENT}: {@code printf 'src' | git hash-object --stdin} */
81      private static final byte[] LINK_BLOB_ID_SHA1 = hex("e8310385c56dc4bbe379f43400f3181f6a59f260");
82      /** SHA-256 blob id of the symlink target {@link #LINK_CONTENT}. */
83      private static final byte[] LINK_BLOB_ID_SHA256 = hex("e1bdca538422554ea204da85e0cec156b12b6808473083610ff95ea390843ab6");
84  
85      /** Target of symlink {@code link.txt}. */
86      private static final String LINK_TXT_CONTENT = "src/hello.txt";
87      /** SHA-1 blob id of the symlink target {@link #LINK_TXT_CONTENT}: {@code printf 'src/hello.txt' | git hash-object --stdin} */
88      private static final byte[] LINK_TXT_BLOB_ID_SHA1 = hex("132a953033e00dcff94f5cccb261f52cd1d71173");
89      /** SHA-256 blob id of the symlink target {@link #LINK_TXT_CONTENT}. */
90      private static final byte[] LINK_TXT_BLOB_ID_SHA256 = hex("2499925193a48a84a546a2f7cd3ce7789d4e073ef1e7276fe682bfbb2b636cef");
91  
92      // Tree ids can be recomputed in a git repository with:
93      //   git init /tmp/t && cd /tmp/t
94      // followed by writing the blob objects and calling git mktree.
95  
96      /**
97       * SHA-1 tree id of {@code src/} (hello.txt + run.sh):
98       * <pre>
99       * printf '100644 blob ce013625030ba8dba906f756967f9e9ca394464a\thello.txt\n
100      *         100755 blob 1a2485251c33a70432394c93fb89330ef214bfc9\trun.sh\n' | git mktree
101      * </pre>
102      */
103     private static final byte[] SRC_TREE_ID_SHA1 = hex("5575b4a0141a2287ec2836a620e5d6aa8fb203ba");
104     /**
105      * SHA-256 tree id of {@code src/}:
106      * <pre>
107      * printf '100644 blob 2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4\thello.txt\n
108      *         100755 blob 1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1\trun.sh\n' | git mktree
109      * </pre>
110      */
111     private static final byte[] SRC_TREE_ID_SHA256 = hex("5b4e74befcb98e3050c511d02353d00565b2172be0a2bc5de833f011ad27f694");
112 
113     /**
114      * SHA-1 tree id of the main directory (link + link.txt + src/):
115      * <pre>
116      * printf '120000 blob e8310385c56dc4bbe379f43400f3181f6a59f260\tlink\n
117      *         120000 blob 132a953033e00dcff94f5cccb261f52cd1d71173\tlink.txt\n
118      *         040000 tree 5575b4a0141a2287ec2836a620e5d6aa8fb203ba\tsrc\n' | git mktree
119      * </pre>
120      */
121     private static final byte[] MAIN_TREE_ID_SHA1 = hex("3217900fd0a6624cd6aa169c2a9f289f7f34432b");
122     /**
123      * SHA-256 tree id of the main directory:
124      * <pre>
125      * printf '120000 blob e1bdca538422554ea204da85e0cec156b12b6808473083610ff95ea390843ab6\tlink\n
126      *         120000 blob 2499925193a48a84a546a2f7cd3ce7789d4e073ef1e7276fe682bfbb2b636cef\tlink.txt\n
127      *         040000 tree 5b4e74befcb98e3050c511d02353d00565b2172be0a2bc5de833f011ad27f694\tsrc\n' | git mktree
128      * </pre>
129      */
130     private static final byte[] MAIN_TREE_ID_SHA256 = hex("58e9a59940e4d2ae7e374b63fedf3b7bba8cfdc60308f64abd066db137300bcd");
131 
132     static Stream<Arguments> blobIdProvider() {
133         return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
134                 Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"),
135                 Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719"));
136     }
137 
138     /** Decodes a compile-time hex literal; throws {@link AssertionError} on malformed input. */
139     private static byte[] hex(final String hex) {
140         try {
141             return Hex.decodeHex(hex);
142         } catch (final DecoderException e) {
143             throw new AssertionError(e);
144         }
145     }
146 
147     private static Path resourcePath(final String resourceName) throws Exception {
148         return Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI());
149     }
150 
151     static Stream<Arguments> virtualTreeProvider() {
152         return Stream.of(
153                 Arguments.of(MessageDigestAlgorithms.SHA_1, HELLO_BLOB_ID_SHA1, LINK_BLOB_ID_SHA1, LINK_TXT_BLOB_ID_SHA1, RUN_BLOB_ID_SHA1,
154                         SRC_TREE_ID_SHA1, MAIN_TREE_ID_SHA1),
155                 Arguments.of(MessageDigestAlgorithms.SHA_256, HELLO_BLOB_ID_SHA256, LINK_BLOB_ID_SHA256, LINK_TXT_BLOB_ID_SHA256, RUN_BLOB_ID_SHA256,
156                         SRC_TREE_ID_SHA256, MAIN_TREE_ID_SHA256));
157     }
158 
159     @ParameterizedTest
160     @MethodSource("blobIdProvider")
161     void testBlobIdByteArray(final String resourceName, final String expectedSha1Hex) throws Exception {
162         final byte[] data = Files.readAllBytes(resourcePath(resourceName));
163         assertArrayEquals(Hex.decodeHex(expectedSha1Hex), GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data));
164     }
165 
166     @ParameterizedTest
167     @MethodSource("blobIdProvider")
168     void testBlobIdInputStreamWithSize(final String resourceName, final String expectedSha1Hex) throws Exception {
169         final byte[] data = Files.readAllBytes(resourcePath(resourceName));
170         assertArrayEquals(Hex.decodeHex(expectedSha1Hex),
171                 GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data.length, new ByteArrayInputStream(data)));
172     }
173 
174     @ParameterizedTest
175     @MethodSource("blobIdProvider")
176     void testBlobIdPath(final String resourceName, final String expectedSha1Hex) throws Exception {
177         assertArrayEquals(Hex.decodeHex(expectedSha1Hex), GitIdentifiers.blobId(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
178     }
179 
180     @Test
181     void testBlobIdSymlink(@TempDir final Path tempDir) throws Exception {
182         final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
183         Files.write(subDir.resolve("file.txt"), "hello".getBytes(StandardCharsets.UTF_8));
184         try {
185             final Path linkToDir = Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
186             final Path linkToFile = Files.createSymbolicLink(tempDir.resolve("link-to-file"), Paths.get("subdir/file.txt"));
187             final MessageDigest sha1 = DigestUtils.getSha1Digest();
188             assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), GitIdentifiers.blobId(sha1, linkToDir));
189             assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), GitIdentifiers.blobId(sha1, linkToFile));
190         } catch (final UnsupportedOperationException e) {
191             Assumptions.abort("Symbolic links not supported on this filesystem");
192         }
193     }
194 
195     @Test
196     void testDirectoryEntryConstructor() {
197         assertThrows(NullPointerException.class, () -> new DirectoryEntry(null, GitIdentifiers.FileMode.REGULAR, ZERO_ID));
198         assertThrows(NullPointerException.class, () -> new DirectoryEntry("hello.txt", null, ZERO_ID));
199         assertThrows(NullPointerException.class, () -> new DirectoryEntry("hello.txt", GitIdentifiers.FileMode.REGULAR, null));
200         assertThrows(IllegalArgumentException.class, () -> new DirectoryEntry("/", GitIdentifiers.FileMode.REGULAR, ZERO_ID));
201     }
202 
203     /**
204      * Equality and hash code are based solely on the entry name.
205      */
206     @Test
207     void testDirectoryEntryEqualityBasedOnNameOnly() {
208         final byte[] otherId = new byte[20];
209         Arrays.fill(otherId, (byte) 0xff);
210         final DirectoryEntry regular = new DirectoryEntry("foo", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
211         final DirectoryEntry executable = new DirectoryEntry("foo", GitIdentifiers.FileMode.EXECUTABLE, otherId);
212         // Same name, different type and object id -> equal
213         assertEquals(regular, executable);
214         assertEquals(regular.hashCode(), executable.hashCode());
215         // Different name -> not equal
216         assertNotEquals(regular, new DirectoryEntry("bar", GitIdentifiers.FileMode.REGULAR, ZERO_ID));
217         // Same reference -> equal
218         assertEquals(regular, regular);
219         // Not equal to null or unrelated type
220         assertFalse(regular.equals(null));
221         assertFalse(regular.equals("foo"));
222     }
223 
224     /**
225      * Entries should be sorted by Git sort rule.
226      *
227      * <p>Git compares the names of the entries, but adds a {@code /} at the end of directory entries.</p>
228      */
229     @Test
230     void testDirectoryEntrySortOrder() {
231         final DirectoryEntry alpha = new DirectoryEntry("alpha.txt", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
232         final DirectoryEntry fooTxt = new DirectoryEntry("foo.txt", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
233         final DirectoryEntry fooDir = new DirectoryEntry("foo", GitIdentifiers.FileMode.DIRECTORY, ZERO_ID);
234         final DirectoryEntry foobar = new DirectoryEntry("foobar", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
235         final DirectoryEntry zeta = new DirectoryEntry("zeta.txt", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
236         final List<DirectoryEntry> entries = new ArrayList<>(Arrays.asList(zeta, foobar, fooDir, alpha, fooTxt));
237         entries.sort(DirectoryEntry::compareTo);
238         assertEquals(Arrays.asList(alpha, fooTxt, fooDir, foobar, zeta), entries);
239     }
240 
241     @ParameterizedTest
242     @MethodSource("virtualTreeProvider")
243     void testTreeIdBuilder(final String algorithm, final byte[] helloId, final byte[] linkId, final byte[] linkTxtId, final byte[] runId,
244             final byte[] srcTreeId, final byte[] mainTreeId) throws Exception {
245         final MessageDigest md = DigestUtils.getDigest(algorithm);
246 
247         // Verify individual blob IDs against pre-computed constants.
248         assertArrayEquals(helloId, GitIdentifiers.blobId(md, HELLO_CONTENT));
249         assertArrayEquals(linkId, GitIdentifiers.blobId(md, LINK_CONTENT.getBytes(StandardCharsets.UTF_8)));
250         assertArrayEquals(linkTxtId, GitIdentifiers.blobId(md, LINK_TXT_CONTENT.getBytes(StandardCharsets.UTF_8)));
251         assertArrayEquals(runId, GitIdentifiers.blobId(md, RUN_CONTENT));
252 
253         // Entries are supplied out of order to verify that the builder sorts them correctly.
254         final GitIdentifiers.TreeIdBuilder builder = GitIdentifiers.treeIdBuilder(md);
255         builder.addSymbolicLink("link.txt", LINK_TXT_CONTENT);
256         builder.addFile(GitIdentifiers.FileMode.REGULAR, "src/hello.txt", HELLO_CONTENT);
257         builder.addSymbolicLink("link", LINK_CONTENT);
258         builder.addFile(GitIdentifiers.FileMode.EXECUTABLE, "src/run.sh", RUN_CONTENT);
259 
260         // Check trees
261         assertArrayEquals(mainTreeId, builder.get());
262         assertArrayEquals(srcTreeId, builder.addDirectory("src").get());
263     }
264 
265     @Test
266     void testTreeIdBuilderAddFileInputStream() throws Exception {
267         final MessageDigest md = DigestUtils.getSha1Digest();
268         final byte[] content = "Hello, World!\n".getBytes(StandardCharsets.UTF_8);
269 
270         final GitIdentifiers.TreeIdBuilder byteArrayBuilder = GitIdentifiers.treeIdBuilder(md);
271         byteArrayBuilder.addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content);
272         final byte[] expected = byteArrayBuilder.get();
273 
274         final GitIdentifiers.TreeIdBuilder sizedStreamBuilder = GitIdentifiers.treeIdBuilder(md);
275         sizedStreamBuilder.addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content.length, new ByteArrayInputStream(content));
276         assertArrayEquals(expected, sizedStreamBuilder.get());
277     }
278 
279     @Test
280     void testTreeIdBuilderInvalidPathSegments() {
281         final MessageDigest md = DigestUtils.getSha1Digest();
282         final byte[] data = {};
283         // Sole path component
284         assertThrows(IllegalArgumentException.class,
285                 () -> GitIdentifiers.treeIdBuilder(md).addFile(GitIdentifiers.FileMode.REGULAR, "..", data));
286         assertThrows(IllegalArgumentException.class,
287                 () -> GitIdentifiers.treeIdBuilder(md).addDirectory(".."));
288         // Embedded in a longer path
289         assertThrows(IllegalArgumentException.class,
290                 () -> GitIdentifiers.treeIdBuilder(md).addFile(GitIdentifiers.FileMode.REGULAR, "subdir/../file.txt", data));
291         assertThrows(IllegalArgumentException.class,
292                 () -> GitIdentifiers.treeIdBuilder(md).addDirectory("subdir/.."));
293     }
294 
295     @Test
296     void testTreeIdBuilderNestedFileEquivalentToDirectoryAndFile() throws Exception {
297         final MessageDigest md = DigestUtils.getSha1Digest();
298         final byte[] content = "hello\n".getBytes(StandardCharsets.UTF_8);
299 
300         final GitIdentifiers.TreeIdBuilder direct = GitIdentifiers.treeIdBuilder(md);
301         direct.addFile(GitIdentifiers.FileMode.REGULAR, "nested/file.txt", content);
302 
303         final GitIdentifiers.TreeIdBuilder indirect = GitIdentifiers.treeIdBuilder(md);
304         indirect.addDirectory("nested").addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content);
305 
306         assertArrayEquals(direct.get(), indirect.get());
307     }
308 
309     @ParameterizedTest
310     @ValueSource(strings = {"", "."})
311     void testTreeIdBuilderNoopPathSegments(final String segment) throws Exception {
312         final MessageDigest md = DigestUtils.getSha1Digest();
313         final byte[] content = "hello\n".getBytes(StandardCharsets.UTF_8);
314 
315         // Canonical form
316         final GitIdentifiers.TreeIdBuilder canonical = GitIdentifiers.treeIdBuilder(md);
317         canonical.addFile(GitIdentifiers.FileMode.REGULAR, "subdir/file.txt", content);
318         final byte[] expected = canonical.get();
319 
320         // Leading segment
321         final GitIdentifiers.TreeIdBuilder withLeading = GitIdentifiers.treeIdBuilder(md);
322         withLeading.addFile(GitIdentifiers.FileMode.REGULAR, segment + "/subdir/file.txt", content);
323         assertArrayEquals(expected, withLeading.get());
324 
325         // Intermediate segment
326         final GitIdentifiers.TreeIdBuilder withIntermediate = GitIdentifiers.treeIdBuilder(md);
327         withIntermediate.addFile(GitIdentifiers.FileMode.REGULAR, "subdir/" + segment + "/file.txt", content);
328         assertArrayEquals(expected, withIntermediate.get());
329 
330         // addDirectory with leading/trailing segments
331         final GitIdentifiers.TreeIdBuilder viaDirectory = GitIdentifiers.treeIdBuilder(md);
332         viaDirectory.addDirectory(segment + "/subdir/" + segment).addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content);
333         assertArrayEquals(expected, viaDirectory.get());
334     }
335 
336     @Test
337     void testTreeIdPath() throws Exception {
338         assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"),
339                 GitIdentifiers.treeId(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest")));
340     }
341 
342     @ParameterizedTest
343     @MethodSource("virtualTreeProvider")
344     void testTreeIdPathUnix(final String algorithm, final byte[] helloId, final byte[] linkId, final byte[] linkTxtId,
345             final byte[] runId, final byte[] srcTreeId, final byte[] mainTreeId, final @TempDir Path tempDir) throws Exception {
346         final MessageDigest md = DigestUtils.getDigest(algorithm);
347 
348         // Files
349         final Path link = tempDir.resolve("link");
350         final Path linkTxt = tempDir.resolve("link.txt");
351         final Path src = tempDir.resolve("src");
352         final Path hello = src.resolve("hello.txt");
353         final Path run = src.resolve("run.sh");
354 
355         // Create the same structure as the virtual tree.
356         try {
357             Files.createSymbolicLink(link, Paths.get(LINK_CONTENT));
358             Files.createSymbolicLink(linkTxt, Paths.get(LINK_TXT_CONTENT));
359         } catch (final UnsupportedOperationException e) {
360             Assumptions.abort("Symbolic links not supported on this filesystem");
361         }
362         Files.createDirectory(src);
363         Files.write(hello, HELLO_CONTENT);
364         Files.write(run, RUN_CONTENT);
365         Files.setPosixFilePermissions(run, PosixFilePermissions.fromString("rwxr-xr-x"));
366 
367         // Verify individual blob IDs against pre-computed constants.
368         assertArrayEquals(helloId, GitIdentifiers.blobId(md, hello));
369         assertArrayEquals(linkId, GitIdentifiers.blobId(md, link));
370         assertArrayEquals(linkTxtId, GitIdentifiers.blobId(md, linkTxt));
371         assertArrayEquals(runId, GitIdentifiers.blobId(md, run));
372 
373         // Check trees
374         assertArrayEquals(mainTreeId, GitIdentifiers.treeId(md, tempDir));
375         assertArrayEquals(srcTreeId, GitIdentifiers.treeId(md, src));
376     }
377 }