implemented Cohen's Kappa Statistics
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@76846 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
1634249c07
commit
029a5dba24
|
@ -18,7 +18,11 @@ public class MathFunctions {
|
|||
System.out.print(a[i]+" ");
|
||||
}
|
||||
*/
|
||||
System.out.println(" "+roundDecimal(300.23454,2));
|
||||
// System.out.println(" "+roundDecimal(300.23454,2));
|
||||
|
||||
// System.out.println(cohensKappaForDichotomy(20, 5, 10, 15));
|
||||
// System.out.println(cohensKappaForDichotomy(45, 15, 25, 15));
|
||||
System.out.println(cohensKappaForDichotomy(25,35,5,35));
|
||||
}
|
||||
|
||||
//rounds to the xth decimal position
|
||||
|
@ -250,4 +254,47 @@ public class MathFunctions {
|
|||
|
||||
return linearpoints;
|
||||
}
|
||||
|
||||
|
||||
public static double cohensKappaForDichotomy(long NumOf_A1_B1, long NumOf_A1_B0, long NumOf_A0_B1, long NumOf_A0_B0){
|
||||
long T = NumOf_A1_B1+NumOf_A1_B0+NumOf_A0_B1+NumOf_A0_B0;
|
||||
|
||||
double Pra = (double)(NumOf_A1_B1+NumOf_A0_B0)/(double) T ;
|
||||
double Pre1 = (double) (NumOf_A1_B1+NumOf_A1_B0) * (double) (NumOf_A1_B1+NumOf_A0_B1)/(double) (T*T);
|
||||
double Pre2 = (double) (NumOf_A0_B0+NumOf_A0_B1) * (double) (NumOf_A0_B0+NumOf_A1_B0)/(double) (T*T);
|
||||
double Pre = Pre1+Pre2;
|
||||
double Kappa = (Pra-Pre)/(1d-Pre);
|
||||
return roundDecimal(Kappa,3);
|
||||
}
|
||||
|
||||
public static String kappaClassificationLandisKoch(double kappa){
|
||||
if (kappa<0)
|
||||
return "Poor";
|
||||
else if ((kappa>=0)&&(kappa<=0.20))
|
||||
return "Slight";
|
||||
else if ((kappa>=0.21)&&(kappa<=0.40))
|
||||
return "Fair";
|
||||
else if ((kappa>=0.41)&&(kappa<=0.60))
|
||||
return "Moderate";
|
||||
else if ((kappa>=0.61)&&(kappa<=0.80))
|
||||
return "Substantial";
|
||||
else if (kappa>=0.81)
|
||||
return "Almost Perfect";
|
||||
else
|
||||
return "Not Applicable";
|
||||
}
|
||||
|
||||
public static String kappaClassificationFleiss(double kappa){
|
||||
if (kappa<0)
|
||||
return "Poor";
|
||||
else if ((kappa>=0)&&(kappa<=0.40))
|
||||
return "Marginal";
|
||||
else if ((kappa>0.4)&&(kappa<=0.75))
|
||||
return "Good";
|
||||
else if (kappa>0.75)
|
||||
return "Excellent";
|
||||
else
|
||||
return "Not Applicable";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package org.gcube.dataanalysis.ecoengine.evaluation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -16,17 +15,25 @@ import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
|
|||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
||||
import org.gcube.dataanalysis.ecoengine.interfaces.DataAnalysis;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.Operations;
|
||||
|
||||
public class DiscrepancyAnalysis extends DataAnalysis {
|
||||
|
||||
// static String discrepancyQuery = "select distinct a.%1$s as csquareone,b.%2$s as csquaretwo,a.%3$s as firstprob,b.%4$s as secondprob from %5$s as a inner join %6$s as b on a.%1$s=b.%2$s and (a.%3$s<>b.%4$s)";
|
||||
// static String discrepancyQuery = "select distinct a.%1$s as csquareone,b.%2$s as csquaretwo,a.%3$s as firstprob,b.%4$s as secondprob from (select * from %5$s order by %1$s limit %7$s) as a inner join (select * from %6$s order by %2$s limit %7$s) as b on a.%1$s=b.%2$s and (a.%3$s<>b.%4$s)";
|
||||
//version 3
|
||||
/*
|
||||
static String discrepancyQuery = "select * from (select distinct a.%1$s as csquareone,b.%2$s as csquaretwo,a.%3$s as firstprob,b.%4$s as secondprob from " +
|
||||
"(select %1$s , avg(%3$s) as %3$s from (select distinct * from %5$s order by %1$s limit %7$s) as aa group by %1$s) as a " +
|
||||
"left join " +
|
||||
"(select %2$s , avg(%4$s) as %4$s from (select distinct * from %6$s order by %2$s limit %7$s) as aa group by %2$s) as b " +
|
||||
"on a.%1$s=b.%2$s) as sel where firstprob<>secondprob";
|
||||
*/
|
||||
|
||||
static String discrepancyQuery = "select * from (select distinct a.%1$s as csquareone,b.%2$s as csquaretwo,a.%3$s as firstprob,b.%4$s as secondprob from " +
|
||||
"(select %1$s , avg(%3$s) as %3$s from (select distinct * from %5$s order by %1$s limit %7$s) as aa group by %1$s) as a " +
|
||||
"left join " +
|
||||
"(select %2$s , avg(%4$s) as %4$s from (select distinct * from %6$s order by %2$s limit %7$s) as aa group by %2$s) as b " +
|
||||
"on a.%1$s=b.%2$s) as sel";
|
||||
|
||||
static String getNumberOfElementsQuery = "select count(*) from %1$s";
|
||||
private static int minElements = 100;
|
||||
|
@ -38,10 +45,19 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
List<Float> errors;
|
||||
double mean;
|
||||
double variance;
|
||||
int numberoferrors;
|
||||
int numberofvectors;
|
||||
double kthreshold;
|
||||
long numberoferrors;
|
||||
long numberofvectors;
|
||||
long numberofcomparisons;
|
||||
float maxerror;
|
||||
String maxdiscrepancyPoint;
|
||||
long numHigher = 0;
|
||||
long numLower = 0;
|
||||
long agreementA1B1=0;
|
||||
long agreementA0B0=0;
|
||||
long agreementA1B0=0;
|
||||
long agreementA0B1=0;
|
||||
|
||||
private LinkedHashMap<String, String> output;
|
||||
|
||||
@Override
|
||||
|
@ -60,6 +76,8 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
PrimitiveType p6 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, "ComparisonThreshold","the comparison threshold","0.1");
|
||||
PrimitiveType p7 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "MaxSamples","the comparison threshold","10000");
|
||||
|
||||
PrimitiveType p8 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, "KThreshold", "Threshold for K-Statistic: over this threshold values will be considered 1 for agreement calculation. Default is 0.5","0.5");
|
||||
|
||||
parameters.add(p1);
|
||||
parameters.add(p2);
|
||||
parameters.add(p3);
|
||||
|
@ -68,6 +86,7 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
parameters.add(p13);
|
||||
parameters.add(p6);
|
||||
parameters.add(p7);
|
||||
parameters.add(p8);
|
||||
|
||||
DatabaseType.addDefaultDBPars(parameters);
|
||||
return parameters;
|
||||
|
@ -84,6 +103,15 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
String FirstTable = config.getParam("FirstTable");
|
||||
String SecondTable = config.getParam("SecondTable");
|
||||
String maxSamples = config.getParam("MaxSamples");
|
||||
|
||||
String kthresholdString = config.getParam("KThreshold");
|
||||
kthreshold = 0.5;
|
||||
try{
|
||||
kthreshold = Double.parseDouble(kthresholdString);
|
||||
}catch(Exception e){}
|
||||
|
||||
AnalysisLogger.getLogger().trace("Using Cohen's Kappa Threshold: "+kthreshold);
|
||||
|
||||
int maxCompElements = maxElements;
|
||||
if (maxSamples!=null && maxSamples.length()>0){
|
||||
int maxx = Integer.parseInt(maxSamples);
|
||||
|
@ -104,20 +132,18 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
output.put("NUMBER_OF_ERRORS", "0");
|
||||
output.put("NUMBER_OF_COMPARISONS", "" + numberofvectors);
|
||||
output.put("ACCURACY", "100.0");
|
||||
output.put("MAXIMUM_ERROR", "-");
|
||||
output.put("MAXIMUM_ERROR", "0");
|
||||
output.put("MAXIMUM_ERROR_POINT", "-");
|
||||
output.put("COHENS_KAPPA", "1");
|
||||
output.put("COHENS_KAPPA_CLASSIFICATION_LANDIS_KOCH", MathFunctions.kappaClassificationLandisKoch(1));
|
||||
output.put("COHENS_KAPPA_CLASSIFICATION_FLEISS", MathFunctions.kappaClassificationFleiss(1));
|
||||
output.put("TREND", "STATIONARY");
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
// String query = String.format(discrepancyQuery, FirstTableCsquareColumn, SecondTableCsquareColumn, FirstTableProbabilityColumn, SecondTableProbabilityColumn, FirstTable, SecondTable);
|
||||
// List<Object> nelementsQ = DatabaseFactory.executeSQLQuery(DatabaseUtils.countElementsStatement(FirstTable),connection);
|
||||
// int nelements = Integer.parseInt(""+nelementsQ.get(0));
|
||||
|
||||
// int nelements = Math.min(Operations.calcNumOfRepresentativeElements(nPoints, minElements),maxCompElements);
|
||||
int nelements = nPoints;
|
||||
|
||||
AnalysisLogger.getLogger().trace("Number Of Elements to take: "+nelements);
|
||||
String query = String.format(discrepancyQuery, FirstTableCsquareColumn, SecondTableCsquareColumn, FirstTableProbabilityColumn, SecondTableProbabilityColumn, FirstTable, SecondTable,""+nelements);
|
||||
AnalysisLogger.getLogger().trace("Number Of Elements to take: "+numberofvectors);
|
||||
String query = String.format(discrepancyQuery, FirstTableCsquareColumn, SecondTableCsquareColumn, FirstTableProbabilityColumn, SecondTableProbabilityColumn, FirstTable, SecondTable,""+numberofvectors);
|
||||
|
||||
AnalysisLogger.getLogger().debug("Discrepancy Calculation - Query to perform :" + query);
|
||||
List<Object> takePoints = DatabaseFactory.executeSQLQuery(query, connection);
|
||||
|
@ -129,21 +155,29 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
threshold = Float.parseFloat(config.getParam("ComparisonThreshold"));
|
||||
analyzeCompareList(takePoints);
|
||||
calcDiscrepancy();
|
||||
|
||||
|
||||
float accuracy = 100;
|
||||
if (processedRecords>0)
|
||||
accuracy = (1 - (float) numberoferrors / (float) numberofcomparisons) * 100;
|
||||
|
||||
if (maxdiscrepancyPoint==null)
|
||||
maxdiscrepancyPoint="-";
|
||||
|
||||
double kappa = MathFunctions.cohensKappaForDichotomy(agreementA1B1, agreementA1B0, agreementA0B1, agreementA0B0);
|
||||
AnalysisLogger.getLogger().debug("Discrepancy Calculation - Calculated Cohen's Kappa:" + kappa);
|
||||
|
||||
output = new LinkedHashMap<String, String>();
|
||||
output.put("MEAN", "" + MathFunctions.roundDecimal(mean,2));
|
||||
output.put("VARIANCE", "" + MathFunctions.roundDecimal(variance,2));
|
||||
output.put("NUMBER_OF_ERRORS", "" + numberoferrors);
|
||||
output.put("NUMBER_OF_COMPARISONS", "" + nelements);
|
||||
|
||||
float accuracy = 100;
|
||||
if (processedRecords>0)
|
||||
accuracy = (1 - (float) numberoferrors / (float) nelements) * 100;
|
||||
|
||||
|
||||
output.put("NUMBER_OF_COMPARISONS", "" + numberofcomparisons);
|
||||
output.put("ACCURACY", "" + MathFunctions.roundDecimal(accuracy,2));
|
||||
output.put("MAXIMUM_ERROR", "" + MathFunctions.roundDecimal(maxerror,2));
|
||||
output.put("MAXIMUM_ERROR_POINT", "" + maxdiscrepancyPoint);
|
||||
output.put("MAXIMUM_ERROR_POINT", maxdiscrepancyPoint);
|
||||
output.put("COHENS_KAPPA", "" + kappa);
|
||||
output.put("COHENS_KAPPA_CLASSIFICATION_LANDIS_KOCH", MathFunctions.kappaClassificationLandisKoch(kappa));
|
||||
output.put("COHENS_KAPPA_CLASSIFICATION_FLEISS", MathFunctions.kappaClassificationFleiss(kappa));
|
||||
|
||||
if (numLower>numHigher)
|
||||
output.put("TREND", "CONTRACTION");
|
||||
else if (numLower<numHigher)
|
||||
|
@ -173,14 +207,16 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
}
|
||||
|
||||
|
||||
long numHigher = 0;
|
||||
long numLower = 0;
|
||||
|
||||
public void analyzeCompareList(List<Object> points) {
|
||||
errors = new ArrayList<Float>();
|
||||
|
||||
if (points != null) {
|
||||
maxerror = 0;
|
||||
for (Object vector : points) {
|
||||
//number of comparison equals to the aggregation
|
||||
numberofcomparisons++;
|
||||
|
||||
Object[] elements = (Object[]) vector;
|
||||
String csquare = (String) elements[0];
|
||||
float probabilityPoint1 = 0;
|
||||
|
@ -203,6 +239,16 @@ public class DiscrepancyAnalysis extends DataAnalysis {
|
|||
else if (probabilityPoint2<probabilityPoint1)
|
||||
numLower++;
|
||||
}
|
||||
|
||||
//calculations for Cohen's Kappa
|
||||
if ((probabilityPoint1>=kthreshold) && (probabilityPoint2>=kthreshold))
|
||||
agreementA1B1++;
|
||||
else if ((probabilityPoint1<kthreshold) && (probabilityPoint2<kthreshold))
|
||||
agreementA0B0++;
|
||||
if ((probabilityPoint1>=kthreshold) && (probabilityPoint2<kthreshold))
|
||||
agreementA1B0++;
|
||||
if ((probabilityPoint1<kthreshold) && (probabilityPoint2>=kthreshold))
|
||||
agreementA0B1++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue