|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectweka.clusterers.Clusterer
weka.clusterers.HAC
Field Summary | |
static int |
COMPLETE_LINK
|
static int |
GROUP_AVERAGE
|
protected java.util.HashMap |
m_checksumHash
A 'checksum hash' where indices are hashed to the sum of their attribute values |
protected double[] |
m_checksumPerturb
|
protected int[] |
m_clusterAssignments
temporary variable holding cluster assignments |
protected int |
m_clusterID
ID of current cluster |
protected java.util.ArrayList |
m_clusters
holds the clusters |
protected double[][] |
m_distanceMatrix
distance matrix |
protected java.lang.String |
m_dotFileName
Dot file name for dumping graph for tree visualization |
protected java.io.PrintWriter |
m_dotWriter
Dot file name for dumping graph for tree visualization |
protected java.util.HashMap |
m_instancesHash
instance hash |
protected boolean |
m_isDistanceBased
Is the metric (and hence the algorithm) relying on similarities or distances? |
protected int |
m_linkingType
Default linking method |
protected double |
m_mergeThreshold
The threshold distance beyond which no clusters are merged (except for one - TODO) |
protected Metric |
m_metric
metric used to calculate similarity/distance |
protected boolean |
m_metricBuilt
has the metric has been constructed? a fix for multiple buildClusterer's |
protected java.lang.String |
m_metricName
|
protected int |
m_numClusters
Number of clusters |
protected int |
m_numCurrentClusters
Number of clusters in the process |
protected int |
m_numSeededClusters
Number of seeded clusters |
protected java.util.Random |
m_randomGen
|
protected int |
m_randomSeed
holds the random Seed, useful for random selection initialization |
protected java.util.HashMap |
m_reverseInstancesHash
reverse instance hash |
protected boolean |
m_seedable
seeding |
protected java.util.HashMap |
m_SeedHash
holds the ([seed instance] -> [clusterLabel of seed instance]) mapping |
protected int |
m_StartingIndexOfTest
starting index of test data in unlabeledData if transductive clustering |
protected boolean |
m_verbose
verbose? |
static int |
SINGLE_LINK
cluster similarity type |
static Tag[] |
TAGS_LINKING
|
Constructor Summary | |
HAC()
empty constructor, required to call using Class.forName |
|
HAC(Metric metric)
|
Method Summary | |
void |
buildClusterer(Instances data)
Cluster given instances. |
void |
buildClusterer(Instances labeledData,
Instances unlabeledData,
int classIndex,
int numClusters)
Clusters unlabeledData and labeledData (with labels removed), using labeledData as seeds |
void |
buildClusterer(Instances labeledData,
Instances unlabeledData,
int classIndex,
int numClusters,
int startingIndexOfTest)
Clusters unlabeledData and labeledData (with labels removed), using labeledData as seeds |
void |
buildClusterer(Instances data,
int num_clusters)
Cluster given instances to form the specified number of clusters. |
protected void |
checkClusters()
|
protected void |
cluster()
Internal method that produces the actual clusters |
protected double |
clusterDistance(Cluster cluster1,
Cluster cluster2)
internal method that returns the distance between two clusters |
int |
clusterInstance(Instance instance)
Clusters an instance. |
protected void |
createDistanceMatrix()
Fill the distance matrix with values using the metric |
protected double |
distance(Instance instance,
Cluster cluster)
internal method that returns the distance between an instance and a cluster |
protected Instances |
filterInstanceDescriptions(Instances instances)
If some of the attributes start with "__", form a separate Instances set with descriptions and filter them out of the argument dataset. |
java.util.ArrayList |
getClusters()
Computes the final clusters from the cluster assignments, for external access |
Instances |
getInstances()
Return training instances |
java.util.ArrayList |
getIntClusters()
Computes the clusters from the cluster assignments |
SelectedTag |
getLinkingType()
Get the linking type |
double |
getMergeThreshold()
Get the merge threshold |
Metric |
getMetric()
Get the distance metric |
int |
getNumClusters()
Return the number of clusters |
java.lang.String[] |
getOptions()
Gets the current settings of Greedy Agglomerative Clustering |
int |
getRandomSeed()
Return the random number seed |
boolean |
getSeedable()
Turn seeding on and off |
java.util.HashMap |
getSeedHash()
returns the SeedHash |
Clusterer |
getThisClusterer()
We always want to implement SemiSupClusterer from a class extending Clusterer. |
boolean |
getVerbose()
get the verbosity level of the clusterer |
protected void |
hashInstances(Instances data)
Create the hashtable from given Instances; keys are numeric indeces, values are actual Instances |
protected void |
initClusterAssignments()
Update the clusterAssignments for all points in two clusters that are about to be merged |
protected void |
initConstraints()
Internal method that initializes distances between seed clusters to POSITIVE_INFINITY |
java.util.Enumeration |
listOptions()
Returns an enumeration describing the available options |
static void |
main(java.lang.String[] argv)
|
protected boolean |
matchInstance(Instance instance1,
Instance instance2)
Internal method: check if two instances match on their attribute values |
protected Cluster |
mergeClusters(int cluster1Idx,
int cluster2Idx)
Internal method to merge two clusters and update distances |
protected double |
mergeStep()
Internal method that finds two most similar clusters and merges them |
java.lang.String |
metricName()
Get the distance metric name |
int |
numberOfClusters()
A duplicate function to conform to Clusterer abstract class. |
double |
objectiveFunction()
returns objective function, needed for compatibility with SemiSupClusterer |
void |
printCluster(int i)
Outputs the specified cluster |
void |
printClusters()
Outputs the current clustering |
void |
printIntClusters()
Outputs the current clustering |
static int[] |
randomSubset(int numIdxs,
int maxIdx)
get an array of random indeces out of n possible values. |
void |
resetClusterer()
Reset all values that have been learned |
void |
seedClusterer(java.util.HashMap SeedHash)
Read the seeds from a hastable, where every key is an instance and every value is: a FastVector of Doubles: [(Double) probInCluster0 ... |
void |
setInstances(Instances instances)
Sets training instances |
void |
setLinkingType(SelectedTag linkingType)
Set the type of clustering |
void |
setMergeThreshold(double threshold)
Set the merge threshold |
void |
setMetric(Metric m)
Set the distance metric |
void |
setNumClusters(int n)
Set the number of clusters to generate |
void |
setOptions(java.lang.String[] options)
Parses a given list of options. |
void |
setRandomSeed(int s)
Set the random number seed |
void |
setSeedable(boolean seedable)
Turn seeding on and off |
void |
setSeedHash(java.util.HashMap seedhash)
Set the m_SeedHash |
void |
setVerbose(boolean verbose)
set the verbosity level of the clusterer |
void |
trainClusterer(Instances instances)
Train the clusterer using specified parameters |
protected void |
unhashClusters()
assuming m_clusters contains the clusters of indeces, convert it to clusters containing actual instances |
Methods inherited from class weka.clusterers.Clusterer |
forName, makeCopies |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
protected int m_numClusters
protected int m_numCurrentClusters
protected int m_clusterID
protected int m_numSeededClusters
protected java.lang.String m_dotFileName
protected java.io.PrintWriter m_dotWriter
protected java.util.ArrayList m_clusters
protected int[] m_clusterAssignments
protected double[][] m_distanceMatrix
public static final int SINGLE_LINK
public static final int COMPLETE_LINK
public static final int GROUP_AVERAGE
public static final Tag[] TAGS_LINKING
protected int m_linkingType
protected int m_StartingIndexOfTest
protected boolean m_seedable
protected java.util.HashMap m_SeedHash
protected java.util.HashMap m_checksumHash
protected double[] m_checksumPerturb
protected int m_randomSeed
protected java.util.Random m_randomGen
protected java.util.HashMap m_instancesHash
protected java.util.HashMap m_reverseInstancesHash
protected double m_mergeThreshold
protected boolean m_verbose
protected Metric m_metric
protected java.lang.String m_metricName
protected boolean m_isDistanceBased
protected boolean m_metricBuilt
Constructor Detail |
public HAC()
public HAC(Metric metric)
Method Detail |
public void setInstances(Instances instances)
public Instances getInstances()
getInstances
in interface SemiSupClusterer
public void setNumClusters(int n)
setNumClusters
in interface SemiSupClusterer
n
- the number of clusters to generatepublic void setMergeThreshold(double threshold)
public double getMergeThreshold()
public void setMetric(Metric m)
setMetric
in interface SemiSupClusterer
public Metric getMetric()
public java.lang.String metricName()
public Clusterer getThisClusterer()
getThisClusterer
in interface SemiSupClusterer
public void buildClusterer(Instances data, int num_clusters) throws java.lang.Exception
data
- instances to be clusterednum_clusters
- number of clusters to create
java.lang.Exception
- if something goes wrong.public void buildClusterer(Instances labeledData, Instances unlabeledData, int classIndex, int numClusters) throws java.lang.Exception
labeledData
- labeled instances to be used as seedsunlabeledData
- unlabeled instancesclassIndex
- attribute index in labeledData which holds class infonumClusters
- number of clusters
java.lang.Exception
- if something goes wrong.public void buildClusterer(Instances labeledData, Instances unlabeledData, int classIndex, int numClusters, int startingIndexOfTest) throws java.lang.Exception
buildClusterer
in interface SemiSupClusterer
labeledData
- labeled instances to be used as seedsunlabeledData
- unlabeled instancesclassIndex
- attribute index in labeledData which holds class infonumClusters
- number of clustersstartingIndexOfTest
- from where test data starts in unlabeledData, useful if clustering is transductive
java.lang.Exception
- if something goes wrong.public void buildClusterer(Instances data) throws java.lang.Exception
buildClusterer
in interface SemiSupClusterer
buildClusterer
in class Clusterer
data
- instances to be clustered
java.lang.Exception
- if something goes wrong.protected Instances filterInstanceDescriptions(Instances instances) throws java.lang.Exception
java.lang.Exception
public void resetClusterer() throws java.lang.Exception
resetClusterer
in interface SemiSupClusterer
java.lang.Exception
public void setSeedHash(java.util.HashMap seedhash)
public void setRandomSeed(int s)
s
- the seedpublic int getRandomSeed()
public void setSeedable(boolean seedable)
seedable
- should seeding be done?public boolean getSeedable()
public void seedClusterer(java.util.HashMap SeedHash)
seedClusterer
in interface SemiSupClusterer
SeedHash
- HashMap of seeding parameterspublic java.util.HashMap getSeedHash()
protected void hashInstances(Instances data)
data
- Instancesprotected void unhashClusters() throws java.lang.Exception
java.lang.Exception
protected void createDistanceMatrix() throws java.lang.Exception
java.lang.Exception
public void setLinkingType(SelectedTag linkingType)
public SelectedTag getLinkingType()
protected void initConstraints()
protected void cluster() throws java.lang.Exception
java.lang.Exception
protected double mergeStep() throws java.lang.Exception
java.lang.Exception
public java.util.ArrayList getIntClusters() throws java.lang.Exception
java.lang.Exception
- if clusters could not be computed successfullypublic java.util.ArrayList getClusters() throws java.lang.Exception
getClusters
in interface SemiSupClusterer
java.lang.Exception
- if clusters could not be computed successfullyprotected double clusterDistance(Cluster cluster1, Cluster cluster2)
protected void checkClusters()
protected Cluster mergeClusters(int cluster1Idx, int cluster2Idx) throws java.lang.Exception
java.lang.Exception
protected void initClusterAssignments()
public void printClusters() throws java.lang.Exception
java.lang.Exception
- if something goes wrongpublic void printCluster(int i) throws java.lang.Exception
java.lang.Exception
- if something goes wrongpublic void printIntClusters() throws java.lang.Exception
java.lang.Exception
- if something goes wrongpublic int clusterInstance(Instance instance) throws java.lang.Exception
clusterInstance
in class Clusterer
instance
- the instance to cluster.
java.lang.Exception
- if something goes wrong.protected boolean matchInstance(Instance instance1, Instance instance2)
protected double distance(Instance instance, Cluster cluster) throws java.lang.Exception
java.lang.Exception
public void setVerbose(boolean verbose)
setVerbose
in interface SemiSupClusterer
verbose
- messages on(true) or off (false)public boolean getVerbose()
public java.util.Enumeration listOptions()
listOptions
in interface OptionHandler
public void setOptions(java.lang.String[] options) throws java.lang.Exception
-A <0-100>
Acuity.
-C <0-100>
Cutoff.
setOptions
in interface OptionHandler
options
- the list of options as an array of strings
java.lang.Exception
- if an option is not supportedpublic java.lang.String[] getOptions()
getOptions
in interface OptionHandler
public void trainClusterer(Instances instances) throws java.lang.Exception
trainClusterer
in interface SemiSupClusterer
instances
- Instances to be used for training
java.lang.Exception
public double objectiveFunction()
objectiveFunction
in interface SemiSupClusterer
public int getNumClusters()
getNumClusters
in interface SemiSupClusterer
public int numberOfClusters()
numberOfClusters
in class Clusterer
public static int[] randomSubset(int numIdxs, int maxIdx)
maxIdx
- - the maximum index of the setnumIdxs
- number of indexes to return
public static void main(java.lang.String[] argv)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |