View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.ml.clustering;
18  
19  import java.util.Arrays;
20  import java.util.List;
21  
22  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
23  import org.apache.commons.math4.legacy.exception.NullArgumentException;
24  import org.junit.Assert;
25  import org.junit.Test;
26  
27  public class DBSCANClustererTest {
28  
29      @Test
30      public void testCluster() {
31          // Test data generated using: http://people.cs.nctu.edu.tw/~rsliang/dbscan/testdatagen.html
32          final DoublePoint[] points = new DoublePoint[] {
33                  new DoublePoint(new double[] { 83.08303244924173, 58.83387754182331 }),
34                  new DoublePoint(new double[] { 45.05445510940626, 23.469642649637535 }),
35                  new DoublePoint(new double[] { 14.96417921432294, 69.0264096390456 }),
36                  new DoublePoint(new double[] { 73.53189604333602, 34.896145021310076 }),
37                  new DoublePoint(new double[] { 73.28498173551634, 33.96860806993209 }),
38                  new DoublePoint(new double[] { 73.45828098873608, 33.92584423092194 }),
39                  new DoublePoint(new double[] { 73.9657889183145, 35.73191006924026 }),
40                  new DoublePoint(new double[] { 74.0074097183533, 36.81735596177168 }),
41                  new DoublePoint(new double[] { 73.41247541410848, 34.27314856695011 }),
42                  new DoublePoint(new double[] { 73.9156256353017, 36.83206791547127 }),
43                  new DoublePoint(new double[] { 74.81499205809087, 37.15682749846019 }),
44                  new DoublePoint(new double[] { 74.03144880081527, 37.57399178552441 }),
45                  new DoublePoint(new double[] { 74.51870941207744, 38.674258946906775 }),
46                  new DoublePoint(new double[] { 74.50754595105536, 35.58903978415765 }),
47                  new DoublePoint(new double[] { 74.51322752749547, 36.030572259100154 }),
48                  new DoublePoint(new double[] { 59.27900996617973, 46.41091720294207 }),
49                  new DoublePoint(new double[] { 59.73744793841615, 46.20015558367595 }),
50                  new DoublePoint(new double[] { 58.81134076672606, 45.71150126331486 }),
51                  new DoublePoint(new double[] { 58.52225539437495, 47.416083617601544 }),
52                  new DoublePoint(new double[] { 58.218626647023484, 47.36228902172297 }),
53                  new DoublePoint(new double[] { 60.27139669447206, 46.606106348801404 }),
54                  new DoublePoint(new double[] { 60.894962462363765, 46.976924697402865 }),
55                  new DoublePoint(new double[] { 62.29048673878424, 47.66970563563518 }),
56                  new DoublePoint(new double[] { 61.03857608977705, 46.212924720020965 }),
57                  new DoublePoint(new double[] { 60.16916214139201, 45.18193661351688 }),
58                  new DoublePoint(new double[] { 59.90036905976012, 47.555364347063005 }),
59                  new DoublePoint(new double[] { 62.33003634144552, 47.83941489877179 }),
60                  new DoublePoint(new double[] { 57.86035536718555, 47.31117930193432 }),
61                  new DoublePoint(new double[] { 58.13715479685925, 48.985960494028404 }),
62                  new DoublePoint(new double[] { 56.131923963548616, 46.8508904252667 }),
63                  new DoublePoint(new double[] { 55.976329887053, 47.46384037658572 }),
64                  new DoublePoint(new double[] { 56.23245975235477, 47.940035191131756 }),
65                  new DoublePoint(new double[] { 58.51687048212625, 46.622885352699086 }),
66                  new DoublePoint(new double[] { 57.85411081905477, 45.95394361577928 }),
67                  new DoublePoint(new double[] { 56.445776311447844, 45.162093662656844 }),
68                  new DoublePoint(new double[] { 57.36691949656233, 47.50097194337286 }),
69                  new DoublePoint(new double[] { 58.243626387557015, 46.114052729681134 }),
70                  new DoublePoint(new double[] { 56.27224595635198, 44.799080066150054 }),
71                  new DoublePoint(new double[] { 57.606924816500396, 46.94291057763621 }),
72                  new DoublePoint(new double[] { 30.18714230041951, 13.877149710431695 }),
73                  new DoublePoint(new double[] { 30.449448810657486, 13.490778346545994 }),
74                  new DoublePoint(new double[] { 30.295018390286714, 13.264889000216499 }),
75                  new DoublePoint(new double[] { 30.160201832884923, 11.89278262341395 }),
76                  new DoublePoint(new double[] { 31.341509791789576, 15.282655921997502 }),
77                  new DoublePoint(new double[] { 31.68601630325429, 14.756873246748 }),
78                  new DoublePoint(new double[] { 29.325963742565364, 12.097849250072613 }),
79                  new DoublePoint(new double[] { 29.54820742388256, 13.613295356975868 }),
80                  new DoublePoint(new double[] { 28.79359608888626, 10.36352064087987 }),
81                  new DoublePoint(new double[] { 31.01284597092308, 12.788479208014905 }),
82                  new DoublePoint(new double[] { 27.58509216737002, 11.47570110601373 }),
83                  new DoublePoint(new double[] { 28.593799561727792, 10.780998203903437 }),
84                  new DoublePoint(new double[] { 31.356105766724795, 15.080316198524088 }),
85                  new DoublePoint(new double[] { 31.25948503636755, 13.674329151166603 }),
86                  new DoublePoint(new double[] { 32.31590076372959, 14.95261758659035 }),
87                  new DoublePoint(new double[] { 30.460413702763617, 15.88402809202671 }),
88                  new DoublePoint(new double[] { 32.56178203062154, 14.586076852632686 }),
89                  new DoublePoint(new double[] { 32.76138648530468, 16.239837325178087 }),
90                  new DoublePoint(new double[] { 30.1829453331884, 14.709592407103628 }),
91                  new DoublePoint(new double[] { 29.55088173528202, 15.0651247180067 }),
92                  new DoublePoint(new double[] { 29.004155302187428, 14.089665298582986 }),
93                  new DoublePoint(new double[] { 29.339624439831823, 13.29096065578051 }),
94                  new DoublePoint(new double[] { 30.997460327576846, 14.551914158277214 }),
95                  new DoublePoint(new double[] { 30.66784126125276, 16.269703107886016 })
96          };
97  
98          final DBSCANClusterer<DoublePoint> transformer =
99                  new DBSCANClusterer<>(2.0, 5);
100         final List<Cluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
101 
102         final List<DoublePoint> clusterOne =
103                 Arrays.asList(points[3], points[4], points[5], points[6], points[7], points[8], points[9], points[10],
104                               points[11], points[12], points[13], points[14]);
105         final List<DoublePoint> clusterTwo =
106                 Arrays.asList(points[15], points[16], points[17], points[18], points[19], points[20], points[21],
107                               points[22], points[23], points[24], points[25], points[26], points[27], points[28],
108                               points[29], points[30], points[31], points[32], points[33], points[34], points[35],
109                               points[36], points[37], points[38]);
110         final List<DoublePoint> clusterThree =
111                 Arrays.asList(points[39], points[40], points[41], points[42], points[43], points[44], points[45],
112                               points[46], points[47], points[48], points[49], points[50], points[51], points[52],
113                               points[53], points[54], points[55], points[56], points[57], points[58], points[59],
114                               points[60], points[61], points[62]);
115 
116         boolean cluster1Found = false;
117         boolean cluster2Found = false;
118         boolean cluster3Found = false;
119         Assert.assertEquals(3, clusters.size());
120         for (final Cluster<DoublePoint> cluster : clusters) {
121             if (cluster.getPoints().containsAll(clusterOne)) {
122                 cluster1Found = true;
123             }
124             if (cluster.getPoints().containsAll(clusterTwo)) {
125                 cluster2Found = true;
126             }
127             if (cluster.getPoints().containsAll(clusterThree)) {
128                 cluster3Found = true;
129             }
130         }
131         Assert.assertTrue(cluster1Found);
132         Assert.assertTrue(cluster2Found);
133         Assert.assertTrue(cluster3Found);
134     }
135 
136     @Test
137     public void testSingleLink() {
138         final DoublePoint[] points = {
139                 new DoublePoint(new int[] {10, 10}), // A
140                 new DoublePoint(new int[] {12, 9}),
141                 new DoublePoint(new int[] {10, 8}),
142                 new DoublePoint(new int[] {8, 8}),
143                 new DoublePoint(new int[] {8, 6}),
144                 new DoublePoint(new int[] {7, 7}),
145                 new DoublePoint(new int[] {5, 6}),  // B
146                 new DoublePoint(new int[] {14, 8}), // C
147                 new DoublePoint(new int[] {7, 15}), // N - Noise, should not be present
148                 new DoublePoint(new int[] {17, 8}), // D - single-link connected to C should not be present
149         };
150 
151         final DBSCANClusterer<DoublePoint> clusterer = new DBSCANClusterer<>(3, 3);
152         List<Cluster<DoublePoint>> clusters = clusterer.cluster(Arrays.asList(points));
153 
154         Assert.assertEquals(1, clusters.size());
155 
156         final List<DoublePoint> clusterOne =
157                 Arrays.asList(points[0], points[1], points[2], points[3], points[4], points[5], points[6], points[7]);
158         Assert.assertTrue(clusters.get(0).getPoints().containsAll(clusterOne));
159     }
160 
161     @Test
162     public void testGetEps() {
163         final DBSCANClusterer<DoublePoint> transformer = new DBSCANClusterer<>(2.0, 5);
164         Assert.assertEquals(2.0, transformer.getEps(), 0.0);
165     }
166 
167     @Test
168     public void testGetMinPts() {
169         final DBSCANClusterer<DoublePoint> transformer = new DBSCANClusterer<>(2.0, 5);
170         Assert.assertEquals(5, transformer.getMinPts());
171     }
172 
173     @Test(expected = MathIllegalArgumentException.class)
174     public void testNegativeEps() {
175         new DBSCANClusterer<>(-2.0, 5);
176     }
177 
178     @Test(expected = MathIllegalArgumentException.class)
179     public void testNegativeMinPts() {
180         new DBSCANClusterer<>(2.0, -5);
181     }
182 
183     @Test(expected = NullArgumentException.class)
184     public void testNullDataset() {
185         DBSCANClusterer<DoublePoint> clusterer = new DBSCANClusterer<>(2.0, 5);
186         clusterer.cluster(null);
187     }
188 }