weka.clusterers
Class SemiSupClustererEvaluation

java.lang.Object
  extended byweka.clusterers.ClusterEvaluation
      extended byweka.clusterers.SemiSupClustererEvaluation

public class SemiSupClustererEvaluation
extends ClusterEvaluation

Class for evaluating clustering models - extends ClusterEvaluation.java

Implements different clustering evaluation metrics


Field Summary
protected  boolean m_ClassIsNominal
          Is the class nominal or numeric?
protected  java.lang.String[] m_ClassNames
          The names of the classes.
protected  double[][] m_ConfusionMatrix
          Array for storing the confusion matrix.
protected  double m_Entropy
          Entropy of the clustering
protected  int m_goodPairs
           
protected  double m_KLDivergence
          KL Divergence of the clustering
protected  Instances m_LabeledTrain
          All labeled training instances
protected  java.util.ArrayList m_labeledTrainPairs
          Training pairs
protected  double m_MIMetric
          MI Metric the clustering
protected  int m_NumClasses
          The number of underlying classes
protected  int m_NumClusters
          The number of produced clusters
protected  double m_Objective
          Objective function of the clustering
protected  double m_Purity
          Purity of the clustering
protected  Instances m_Test
          All test instances
protected  double m_TotalCost
          The total cost of predictions (includes instance weights)
protected  int m_totalPairs
          If the class is not nominal, we do not need the confusion matrix but do pairs counts directly
protected  int m_trueGoodPairs
           
protected  Instances m_UnlabeledTrain
          All unlabaled training instances
protected  double m_WeightTestCorrect
          The weight of all correctly categorized test instances.
protected  double m_WeightTestIncorrect
          The weight of all incorrectly categorized test instances.
protected  double m_WeightTestUnclassified
          The weight of all uncategorized test instances.
protected  double m_WeightTestWithClass
          The weight of test instances that had a class assigned to them.
 
Constructor Summary
SemiSupClustererEvaluation(java.util.ArrayList labeledTrainPairs, Instances test, int numClasses, int numClusters)
           
SemiSupClustererEvaluation(Instances test, int numClasses, int numClusters)
           
 
Method Summary
 double entropy()
           
 void evaluateModel(Clusterer clusterer, Instances testInstances, Instances unlabeledTest)
          Evaluates the semi-sup clusterer on a given set of test instances
 void evaluateModelOnce(Clusterer clusterer, Instance testWithoutLabel, int classValue)
          Evaluates the semi-sup clusterer on a given test instance
 java.lang.String globalInfo()
          Returns a string describing this evaluator
 double klDivergence()
           
protected  double[] makeDistribution(int predictedCluster)
          Convert a single prediction into a probability distribution with all zero probabilities except the predicted value which has probability 1.0;
 double mutualInformation()
           
 double numDiffClassPairs()
           
 double numSameClassPairs()
           
 double objectiveFunction()
           
 double pairwiseFMeasure()
           
 double pairwisePrecision()
           
 double pairwiseRecall()
           
 double purity()
           
 java.lang.String toMatrixString(java.lang.String title)
          Outputs the performance statistics as a classification confusion matrix.
 java.lang.String toSummaryString()
           
protected  void updateStatsForClusterer(double[] distrib, int classValue)
          Updates all the statistics about a clusterer performance for the current test instance.
 
Methods inherited from class weka.clusterers.ClusterEvaluation
clusterResultsToString, crossValidateModel, evaluateClusterer, evaluateClusterer, getClassesToClusters, getClusterAssignments, getNumClusters, main, setClusterer, setDoXval, setFolds, setSeed
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

m_Purity

protected double m_Purity
Purity of the clustering


m_Entropy

protected double m_Entropy
Entropy of the clustering


m_Objective

protected double m_Objective
Objective function of the clustering


m_MIMetric

protected double m_MIMetric
MI Metric the clustering


m_KLDivergence

protected double m_KLDivergence
KL Divergence of the clustering


m_NumClasses

protected int m_NumClasses
The number of underlying classes


m_NumClusters

protected int m_NumClusters
The number of produced clusters


m_LabeledTrain

protected Instances m_LabeledTrain
All labeled training instances


m_UnlabeledTrain

protected Instances m_UnlabeledTrain
All unlabaled training instances


m_Test

protected Instances m_Test
All test instances


m_labeledTrainPairs

protected java.util.ArrayList m_labeledTrainPairs
Training pairs


m_WeightTestIncorrect

protected double m_WeightTestIncorrect
The weight of all incorrectly categorized test instances.


m_WeightTestCorrect

protected double m_WeightTestCorrect
The weight of all correctly categorized test instances.


m_WeightTestUnclassified

protected double m_WeightTestUnclassified
The weight of all uncategorized test instances.


m_WeightTestWithClass

protected double m_WeightTestWithClass
The weight of test instances that had a class assigned to them.


m_ConfusionMatrix

protected double[][] m_ConfusionMatrix
Array for storing the confusion matrix.


m_ClassNames

protected java.lang.String[] m_ClassNames
The names of the classes.


m_ClassIsNominal

protected boolean m_ClassIsNominal
Is the class nominal or numeric?


m_totalPairs

protected int m_totalPairs
If the class is not nominal, we do not need the confusion matrix but do pairs counts directly


m_goodPairs

protected int m_goodPairs

m_trueGoodPairs

protected int m_trueGoodPairs

m_TotalCost

protected double m_TotalCost
The total cost of predictions (includes instance weights)

Constructor Detail

SemiSupClustererEvaluation

public SemiSupClustererEvaluation(Instances test,
                                  int numClasses,
                                  int numClusters)

SemiSupClustererEvaluation

public SemiSupClustererEvaluation(java.util.ArrayList labeledTrainPairs,
                                  Instances test,
                                  int numClasses,
                                  int numClusters)
Method Detail

toSummaryString

public java.lang.String toSummaryString()

globalInfo

public java.lang.String globalInfo()
Returns a string describing this evaluator

Returns:
a description of the evaluator suitable for displaying in the explorer/experimenter gui

evaluateModel

public void evaluateModel(Clusterer clusterer,
                          Instances testInstances,
                          Instances unlabeledTest)
                   throws java.lang.Exception
Evaluates the semi-sup clusterer on a given set of test instances

Parameters:
clusterer - semi-supervised clusterer
testInstances - set of test instances for evaluation
Throws:
java.lang.Exception - if model could not be evaluated successfully

evaluateModelOnce

public void evaluateModelOnce(Clusterer clusterer,
                              Instance testWithoutLabel,
                              int classValue)
                       throws java.lang.Exception
Evaluates the semi-sup clusterer on a given test instance

Parameters:
clusterer - semi-supervised clusterer
Throws:
java.lang.Exception - if model could not be evaluated successfully

makeDistribution

protected double[] makeDistribution(int predictedCluster)
Convert a single prediction into a probability distribution with all zero probabilities except the predicted value which has probability 1.0;

Returns:
the probability distribution

updateStatsForClusterer

protected void updateStatsForClusterer(double[] distrib,
                                       int classValue)
Updates all the statistics about a clusterer performance for the current test instance.

Parameters:
distrib - the probabilities assigned to each class
Throws:
java.lang.Exception - if the class of the instance is not set

objectiveFunction

public final double objectiveFunction()

purity

public final double purity()

entropy

public final double entropy()

klDivergence

public final double klDivergence()

mutualInformation

public final double mutualInformation()

toMatrixString

public java.lang.String toMatrixString(java.lang.String title)
                                throws java.lang.Exception
Outputs the performance statistics as a classification confusion matrix. For each class value, shows the distribution of predicted class values.

Parameters:
title - the title for the confusion matrix
Returns:
the confusion matrix as a String
Throws:
java.lang.Exception - if the class is numeric

pairwisePrecision

public final double pairwisePrecision()

pairwiseRecall

public final double pairwiseRecall()

pairwiseFMeasure

public final double pairwiseFMeasure()

numSameClassPairs

public final double numSameClassPairs()

numDiffClassPairs

public final double numDiffClassPairs()