weka.classifiers.sparse
Class SVMlight

java.lang.Object
  extended byweka.classifiers.Classifier
      extended byweka.classifiers.DistributionClassifier
          extended byweka.classifiers.sparse.SVMlight
All Implemented Interfaces:
java.lang.Cloneable, OptionHandler, java.io.Serializable

public class SVMlight
extends DistributionClassifier
implements OptionHandler

A wrapper for SVMlight package by Thorsten Joachims For more information, see

http://www.cs.cornell.edu/People/tj/svm_light Valid options are:

See Also:
Serialized Form

Field Summary
static int KERNEL_LINEAR
          Kernel type
static int KERNEL_POLYNOMIAL
           
static int KERNEL_RBF
           
static int KERNEL_SIGMOID_TANH
           
protected  boolean m_autoBounds
           
protected  boolean m_biased
          Use biased hyperplane (i.e.
protected  java.lang.String m_binPath
          Path to the directory where SVM-light executables are located
protected  boolean m_bufferedMode
          Is classification done via temporary files or via a buffer?
protected  double m_C
          trade-off between training error and margin (default 0 corresponds to [avg.
protected  double m_c1
          parameter c in sigmoid/poly kernel
protected  double m_costFactor
          Cost: cost-factor, by which training errors on positive examples outweight errors on negative examples
protected  int m_d
          Parameter d in polynomial kernel
protected  boolean m_debug
          Output debugging information
protected  double m_gamma
          Parameter gamma in rbf kernel
protected  int m_kernelType
           
protected  double m_maxMargin
          SVM-light predictions are positive or negative margins; to convert to a distribution we need min/max margin values...
protected  double m_minMargin
           
protected  int m_mode
           
protected  java.lang.String m_modelFilename
           
protected  java.lang.String m_modelFilenameBase
          Name of the file where a model will be temporarily created
protected  java.lang.String m_predictionFilename
           
protected  java.lang.String m_predictionFilenameBase
          Name of the file where predictions will be temporarily stored unless buffered IO is used
protected  java.io.BufferedReader m_procReader
           
protected  java.io.BufferedWriter m_procWriter
           
protected  boolean m_removeInconsistentExamples
          remove inconsistent training examples and retrain
protected  double m_s
          Parameter s in sigmoid/polynomial kernel
protected  boolean m_svmTrained
          Has the SVM been trained
protected  java.io.File m_tempDirFile
           
protected  java.lang.String m_tempDirPath
          Path to the directory where temporary files will be stored
protected  java.lang.String m_testFilename
           
protected  java.lang.String m_testFilenameBase
          Name of the temporary file where a test instance is dumped if buffered IO is not used
protected  Instances m_train
          The training instances used for classification.
protected  java.lang.String m_trainFilename
           
protected  java.lang.String m_trainFilenameBase
          Name of the temporary file where training data will be dumped temporarily
protected  int m_verbosityLevel
          verbosity level
protected  double m_width
          Epsilon width of tube for regression
static int SVM_MODE_CLASSIFICATION
          SVM-light can work in classification, regression and preference ranking modes
static int SVM_MODE_PREFERENCE_RANKING
           
static int SVM_MODE_REGRESSION
           
static Tag[] TAGS_KERNEL_TYPE
           
static Tag[] TAGS_SVM_MODE
           
 
Constructor Summary
SVMlight()
          A default constructor
 
Method Summary
 void buildClassifier(Instances instances)
          Generates the classifier.
protected  double classifySVMlight(Instance instance)
          Launch an SVM-light process and classify a given instance
protected  void cleanupIO()
          The buffered version of SVM-light needs to release some I/O resources before exiting
static java.lang.String concatStringArray(java.lang.String[] strings)
          A little helper to create a single String from an array of Strings
 double[] distributionForInstance(Instance instance)
          Calculates the class membership probabilities for the given test instance.
protected  void dumpInstance(Instance instance, java.io.File file)
          Dump a single instance into a file in SVM-light format
protected  void dumpTrainingData(Instances instances)
          Dump training instances into a file in SVM-light format
protected  void finalize()
          Take care of closing the SVM-light process before the object is destroyed
 boolean getAutoBounds()
          Get whether min/max margins are determined automatically
 boolean getBiased()
          Get whether the hyperplane is biased (i.e.
 java.lang.String getBinPath()
          Get the path for the binaries
 boolean getBufferedMode()
          See whether SVM-light is operating via in/out bufffers or via temporary files
 double getC()
          Get the trade-off between training error and margin (default 0 corresponds to [avg.
 double getC1()
          Get parameter c in sigmoid/poly kernel
 double getCostFactor()
          Get cost-factor, by which training errors on positive examples outweight errors on negative examples
 int getD()
          Get parameter d in polynomial kernel
 boolean getDebug()
          See whether debugging output is on/off
 double getGamma()
          Get parameter gamma in rbf kernel
 SelectedTag getKernelType()
          Get the SVM-light kernel type
 double getMaxMargin()
          Get the maxMargin that an SVM can return
 double getMinMargin()
          Get the minMargin that an SVM can return
 SelectedTag getMode()
          return the SVM-light mode
 java.lang.String[] getOptions()
          Gets the current settings
 boolean getRemoveInconsistentExamples()
          Get whether the inconsistent examples are removed and retraining follows
 double getS()
          Get parameter s in sigmoid/polynomial kernel
 java.lang.String getTempDirPath()
          Get the path for the temporary files
 int getVerbosityLevel()
          Get verbosity level, can be anything between 0 and 3
 double getWidth()
          Get the epsilon width of tube for regression
 java.util.Enumeration listOptions()
          Returns an enumeration describing the available options.
static void main(java.lang.String[] argv)
          Main method for testing this class.
protected  double readPrediction(java.io.File file)
          Read the prediction of SVM-light
 void setAutoBounds(boolean autoBounds)
          Set whether min/max margins are determined automatically
 void setBiased(boolean biased)
          Set whether the hyperplane is biased (i.e.
 void setBinPath(java.lang.String binPath)
          Set the path for the binary files
protected  void setBounds(Instances data)
          Set the bounds using "extreme" training examples - TODO!
 void setBufferedMode(boolean bufferedMode)
          Set SVM-light to operate via in/out bufffers or via temporary files
 void setC(double C)
          Set the trade-off between training error and margin (default 0 corresponds to [avg.
 void setC1(double c1)
          Set parameter c in sigmoid/poly kernel
 void setCostFactor(double costFactor)
          Set cost-factor, by which training errors on positive examples outweight errors on negative examples
 void setD(int d)
          Set parameter d in polynomial kernel
 void setDebug(boolean debug)
          Turn debugging output on/off
 void setGamma(double gamma)
          Set parameter gamma in rbf kernel
 void setKernelType(SelectedTag kernelType)
          Set the kernel type for SVM-light
 void setMaxMargin(double maxMargin)
          Set the maxMargin that an SVM can return
 void setMinMargin(double minMargin)
          Set the minMargin that an SVM can return
 void setMode(SelectedTag mode)
          Set the mode of the SVM
 void setOptions(java.lang.String[] options)
          Parses a given list of options.
 void setRemoveInconsistentExamples(boolean removeInconsistentExamples)
          Set whether the inconsistent examples are removed and retraining follows
 void setS(double s)
          Set parameter s in sigmoid/polynomial kernel
 void setTempDirPath(java.lang.String tempDirPath)
          Set the path for the temporary files
 void setVerbosityLevel(int verbosity)
          Set verbosity level, can be anything between 0 and 3
 void setWidth(double width)
          Set the epsilon width of tube for regression
 java.lang.String toString()
          Returns a description of this classifier.
 boolean trained()
          Check whether the SVM has been trained
protected  void trainSVMlight()
          Launch an SVM-light process assuming that the training data has been dumped
 
Methods inherited from class weka.classifiers.DistributionClassifier
calculateEntropy, calculateLabeledInstanceMargin, calculateMargin, classifyInstance
 
Methods inherited from class weka.classifiers.Classifier
forName, makeCopies
 
Methods inherited from class java.lang.Object
clone, equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

m_train

protected Instances m_train
The training instances used for classification.


m_svmTrained

protected boolean m_svmTrained
Has the SVM been trained


m_debug

protected boolean m_debug
Output debugging information


m_binPath

protected java.lang.String m_binPath
Path to the directory where SVM-light executables are located


m_tempDirPath

protected java.lang.String m_tempDirPath
Path to the directory where temporary files will be stored


m_tempDirFile

protected java.io.File m_tempDirFile

m_trainFilenameBase

protected java.lang.String m_trainFilenameBase
Name of the temporary file where training data will be dumped temporarily


m_trainFilename

protected java.lang.String m_trainFilename

m_testFilenameBase

protected java.lang.String m_testFilenameBase
Name of the temporary file where a test instance is dumped if buffered IO is not used


m_testFilename

protected java.lang.String m_testFilename

m_modelFilenameBase

protected java.lang.String m_modelFilenameBase
Name of the file where a model will be temporarily created


m_modelFilename

protected java.lang.String m_modelFilename

m_predictionFilenameBase

protected java.lang.String m_predictionFilenameBase
Name of the file where predictions will be temporarily stored unless buffered IO is used


m_predictionFilename

protected java.lang.String m_predictionFilename

m_maxMargin

protected double m_maxMargin
SVM-light predictions are positive or negative margins; to convert to a distribution we need min/max margin values...


m_minMargin

protected double m_minMargin

m_autoBounds

protected boolean m_autoBounds

m_bufferedMode

protected boolean m_bufferedMode
Is classification done via temporary files or via a buffer?


m_procReader

protected java.io.BufferedReader m_procReader

m_procWriter

protected java.io.BufferedWriter m_procWriter

m_verbosityLevel

protected int m_verbosityLevel
verbosity level


SVM_MODE_CLASSIFICATION

public static final int SVM_MODE_CLASSIFICATION
SVM-light can work in classification, regression and preference ranking modes

See Also:
Constant Field Values

SVM_MODE_REGRESSION

public static final int SVM_MODE_REGRESSION
See Also:
Constant Field Values

SVM_MODE_PREFERENCE_RANKING

public static final int SVM_MODE_PREFERENCE_RANKING
See Also:
Constant Field Values

TAGS_SVM_MODE

public static final Tag[] TAGS_SVM_MODE

m_mode

protected int m_mode

m_C

protected double m_C
trade-off between training error and margin (default 0 corresponds to [avg. x*x]^-1)


m_width

protected double m_width
Epsilon width of tube for regression


m_costFactor

protected double m_costFactor
Cost: cost-factor, by which training errors on positive examples outweight errors on negative examples


m_biased

protected boolean m_biased
Use biased hyperplane (i.e. x*w+b>0) instead of unbiased hyperplane (i.e. x*w>0)


m_removeInconsistentExamples

protected boolean m_removeInconsistentExamples
remove inconsistent training examples and retrain


KERNEL_LINEAR

public static final int KERNEL_LINEAR
Kernel type

See Also:
Constant Field Values

KERNEL_POLYNOMIAL

public static final int KERNEL_POLYNOMIAL
See Also:
Constant Field Values

KERNEL_RBF

public static final int KERNEL_RBF
See Also:
Constant Field Values

KERNEL_SIGMOID_TANH

public static final int KERNEL_SIGMOID_TANH
See Also:
Constant Field Values

TAGS_KERNEL_TYPE

public static final Tag[] TAGS_KERNEL_TYPE

m_kernelType

protected int m_kernelType

m_d

protected int m_d
Parameter d in polynomial kernel


m_gamma

protected double m_gamma
Parameter gamma in rbf kernel


m_s

protected double m_s
Parameter s in sigmoid/polynomial kernel


m_c1

protected double m_c1
parameter c in sigmoid/poly kernel

Constructor Detail

SVMlight

public SVMlight()
A default constructor

Method Detail

finalize

protected void finalize()
Take care of closing the SVM-light process before the object is destroyed


cleanupIO

protected void cleanupIO()
The buffered version of SVM-light needs to release some I/O resources before exiting


buildClassifier

public void buildClassifier(Instances instances)
                     throws java.lang.Exception
Generates the classifier.

Specified by:
buildClassifier in class Classifier
Parameters:
instances - set of instances serving as training data
Throws:
java.lang.Exception - if the classifier has not been generated successfully

setBounds

protected void setBounds(Instances data)
Set the bounds using "extreme" training examples - TODO!


dumpTrainingData

protected void dumpTrainingData(Instances instances)
Dump training instances into a file in SVM-light format

Parameters:
instances - the training instances

dumpInstance

protected void dumpInstance(Instance instance,
                            java.io.File file)
Dump a single instance into a file in SVM-light format

Parameters:
instance - an instance
file - the file where instance will be dumped

trainSVMlight

protected void trainSVMlight()
Launch an SVM-light process assuming that the training data has been dumped


classifySVMlight

protected double classifySVMlight(Instance instance)
Launch an SVM-light process and classify a given instance

Parameters:
instance - an instance that must be classified

readPrediction

protected double readPrediction(java.io.File file)
Read the prediction of SVM-light

Parameters:
file - file where the prediction is stored

distributionForInstance

public double[] distributionForInstance(Instance instance)
                                 throws java.lang.Exception
Calculates the class membership probabilities for the given test instance.

Specified by:
distributionForInstance in class DistributionClassifier
Parameters:
instance - the instance to be classified
Returns:
predicted class probability distribution
Throws:
java.lang.Exception - if an error occurred during the prediction

trained

public boolean trained()
Check whether the SVM has been trained

Returns:
true if the SVM has been train and is ready to classify instances

listOptions

public java.util.Enumeration listOptions()
Returns an enumeration describing the available options.

Specified by:
listOptions in interface OptionHandler
Returns:
an enumeration of all the available options.

setOptions

public void setOptions(java.lang.String[] options)
                throws java.lang.Exception
Parses a given list of options. Valid options are:

-D
output debugging information

Specified by:
setOptions in interface OptionHandler
Parameters:
options - the list of options as an array of strings
Throws:
java.lang.Exception - if an option is not supported

getOptions

public java.lang.String[] getOptions()
Gets the current settings

Specified by:
getOptions in interface OptionHandler
Returns:
an array of strings suitable for passing to setOptions()

setDebug

public void setDebug(boolean debug)
Turn debugging output on/off

Parameters:
debug - if true, SVM-light output and other debugging info will be printed

getDebug

public boolean getDebug()
See whether debugging output is on/off


setBufferedMode

public void setBufferedMode(boolean bufferedMode)
Set SVM-light to operate via in/out bufffers or via temporary files

Parameters:
bufferedMode - if true, SVM-light classification is performed via stdin/stdout

getBufferedMode

public boolean getBufferedMode()
See whether SVM-light is operating via in/out bufffers or via temporary files


setVerbosityLevel

public void setVerbosityLevel(int verbosity)
Set verbosity level, can be anything between 0 and 3

Parameters:
verbosity - Verbosity level for SVM-light

getVerbosityLevel

public int getVerbosityLevel()
Get verbosity level, can be anything between 0 and 3


setMode

public void setMode(SelectedTag mode)
Set the mode of the SVM

Parameters:
mode - one of classification, regression and preference ranking

getMode

public SelectedTag getMode()
return the SVM-light mode

Returns:
one of classification, regression and preference ranking

setWidth

public void setWidth(double width)
Set the epsilon width of tube for regression


getWidth

public double getWidth()
Get the epsilon width of tube for regression


setC

public void setC(double C)
Set the trade-off between training error and margin (default 0 corresponds to [avg. x*x]^-1)


getC

public double getC()
Get the trade-off between training error and margin (default 0 corresponds to [avg. x*x]^-1)


setCostFactor

public void setCostFactor(double costFactor)
Set cost-factor, by which training errors on positive examples outweight errors on negative examples


getCostFactor

public double getCostFactor()
Get cost-factor, by which training errors on positive examples outweight errors on negative examples


setBiased

public void setBiased(boolean biased)
Set whether the hyperplane is biased (i.e. x*w+b>0) instead of unbiased hyperplane (i.e. x*w>0)

Parameters:
biased - if true, the hyperplane will be biased

getBiased

public boolean getBiased()
Get whether the hyperplane is biased (i.e. x*w+b>0) instead of unbiased hyperplane (i.e. x*w>0)


setRemoveInconsistentExamples

public void setRemoveInconsistentExamples(boolean removeInconsistentExamples)
Set whether the inconsistent examples are removed and retraining follows

Parameters:
removeInconsistentExamples -

getRemoveInconsistentExamples

public boolean getRemoveInconsistentExamples()
Get whether the inconsistent examples are removed and retraining follows


setKernelType

public void setKernelType(SelectedTag kernelType)
Set the kernel type for SVM-light


getKernelType

public SelectedTag getKernelType()
Get the SVM-light kernel type

Returns:
kernel type

setD

public void setD(int d)
Set parameter d in polynomial kernel


getD

public int getD()
Get parameter d in polynomial kernel


setGamma

public void setGamma(double gamma)
Set parameter gamma in rbf kernel


getGamma

public double getGamma()
Get parameter gamma in rbf kernel


setS

public void setS(double s)
Set parameter s in sigmoid/polynomial kernel


getS

public double getS()
Get parameter s in sigmoid/polynomial kernel


setC1

public void setC1(double c1)
Set parameter c in sigmoid/poly kernel


getC1

public double getC1()
Get parameter c in sigmoid/poly kernel


setMaxMargin

public void setMaxMargin(double maxMargin)
Set the maxMargin that an SVM can return


getMaxMargin

public double getMaxMargin()
Get the maxMargin that an SVM can return


setMinMargin

public void setMinMargin(double minMargin)
Set the minMargin that an SVM can return


getMinMargin

public double getMinMargin()
Get the minMargin that an SVM can return


setAutoBounds

public void setAutoBounds(boolean autoBounds)
Set whether min/max margins are determined automatically


getAutoBounds

public boolean getAutoBounds()
Get whether min/max margins are determined automatically


toString

public java.lang.String toString()
Returns a description of this classifier.

Returns:
a description of this classifier as a string.

setTempDirPath

public void setTempDirPath(java.lang.String tempDirPath)
Set the path for the temporary files

Parameters:
tempDirPath - a full path to the temporary directory

getTempDirPath

public java.lang.String getTempDirPath()
Get the path for the temporary files


setBinPath

public void setBinPath(java.lang.String binPath)
Set the path for the binary files


getBinPath

public java.lang.String getBinPath()
Get the path for the binaries


concatStringArray

public static java.lang.String concatStringArray(java.lang.String[] strings)
A little helper to create a single String from an array of Strings

Parameters:
strings - an array of strings

main

public static void main(java.lang.String[] argv)
Main method for testing this class.

Parameters:
argv - should contain command line options (see setOptions)