001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.functor.aggregator.functions; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.List; 022 023import org.apache.commons.functor.Function; 024 025/** 026 * Aggregator function to be used with subclasses of 027 * {@link org.apache.commons.functor.aggregator.AbstractListBackedAggregator} 028 * which retrieves the <a 029 * href="http://en.wikipedia.org/wiki/Percentile">percentile</a> value for a 030 * given percentile. The percentile rank/index for a value P can be obtained 031 * using formula: <code>n = round((P / 100) * N + 0.5)</code> where N is the 032 * number of items in a list. 033 */ 034public class DoublePercentileAggregatorFunction implements Function<List<Double>, Double> { 035 /** A percentile goes from 0 to 100% and that's it. */ 036 private static final double MAX_PERCENTAGE = 100.0; 037 /** 038 * Percentile value to calculate. 0 < percentile <= 100 039 */ 040 private double percentile; 041 042 /** 043 * Flag to indicate whether we are going to operate on a copy of the list 044 * given or not. In order to compute the percentile, we need to sort the 045 * list first (and then choose the item based on the formula given above). 046 * This function offers 2 ways of doing the sorting: 047 * <ul> 048 * <li>by sorting (modifying) the original list (<code>useCopy=false</code>) 049 * </li> 050 * <li>by operating on a copy of the original list and leaving the original 051 * untouched (<code>useCopy=true</code>)</li> 052 * </ul> 053 * NOTE: While using a copy ensures the original list is untouched, it does 054 * mean we are creating a temporary list for the purpose of this computation 055 * so it will have an impact on memory! 056 */ 057 private boolean useCopy; 058 059 /** 060 * Similar to {@link #DoublePercentileAggregatorFunction(double, boolean) 061 * DoublePercentilAggregatorFunction(percentile,true)}. 062 * 063 * @param percentile 064 * Percentile this function will return the value for 065 */ 066 public DoublePercentileAggregatorFunction(double percentile) { 067 this(percentile, true); 068 } 069 070 /** 071 * Initializes the function with the given percentile and decides whether 072 * the function will modify the original list or not. 073 * 074 * @param percentile 075 * Percentile this function will return the value for 076 * @param useCopy 077 * If set to true, the original list will not be modified and 078 * will contain the data in sorted order, if false, this instance 079 * will operate on a copy of the list 080 */ 081 public DoublePercentileAggregatorFunction(double percentile, boolean useCopy) { 082 if (percentile < 0.0 || percentile > MAX_PERCENTAGE) { 083 throw new IllegalArgumentException("Invalid value for percentile: " + percentile); 084 } 085 this.percentile = percentile; 086 this.useCopy = useCopy; 087 } 088 089 /** 090 * Used internally to compute the rank of the item in the list for the 091 * requested percentile. This is invoked internally from 092 * {@link #evaluate(List)}. 093 * 094 * @param data 095 * List containing data. This cannot be <code>null</code> (throws 096 * <code>NullPointerException</code>) or empty (throws 097 * <code>ArrayIndexOutOfBoundsException</code>). 098 * @return Index of the item for the requested percentile 099 * @see #getPercentile() 100 */ 101 final int computeRank(List<Double> data) { 102 int maxRank = data.size() - 1; 103 int rank = (int) Math.floor((percentile * maxRank) / MAX_PERCENTAGE); 104 return rank; 105 } 106 107 /** 108 * Traverses the list and computes the percentile. In doing so, it sorts the 109 * list first -- and might or might not use the original list or a copy 110 * depending on {@link #isUseCopy()}. 111 * 112 * @param data 113 * List to compute the percentile for 114 * @return percentile of the given list or null if list is <code>null</code> 115 * or empty (zero size). 116 */ 117 public Double evaluate(List<Double> data) { 118 if (data == null || data.size() == 0) { 119 return null; 120 } 121 List<Double> copy = data; 122 if (useCopy) { 123 copy = new ArrayList<Double>(data); 124 } 125 Collections.sort(copy); 126 int rank = computeRank(data); 127 return copy.get(rank); 128 } 129 130 /** 131 * Getter for {@link #percentile}. 132 * 133 * @return Retrieves the percentile this instance will return 134 */ 135 public double getPercentile() { 136 return percentile; 137 } 138 139 /** 140 * Does this instance modify the passed in list or not? 141 * 142 * @return If true, this instance will sort the list passed in and then use 143 * it to compute the percentile; if false, it will operate on a copy 144 * of the list 145 * @see DoublePercentileAggregatorFunction#DoublePercentileAggregatorFunction(double, 146 * boolean) 147 */ 148 public boolean isUseCopy() { 149 return useCopy; 150 } 151 152 @Override 153 public String toString() { 154 return DoublePercentileAggregatorFunction.class.getName(); 155 } 156}