# Mira implementation import util PRINT = True class MiraClassifier: """ Mira classifier. Note that the variable 'datum' in this code refers to a counter of features (not to a raw samples.Datum). """ def __init__( self, legalLabels, max_iterations): self.legalLabels = legalLabels self.type = "mira" self.automaticTuning = False self.C = 0.001 self.legalLabels = legalLabels self.max_iterations = max_iterations self.initializeWeightsToZero() def initializeWeightsToZero(self): "Resets the weights of each label to zero vectors" self.weights = {} for label in self.legalLabels: self.weights[label] = util.Counter() # this is the data-structure you should use def train(self, trainingData, trainingLabels, validationData, validationLabels): "Outside shell to call your method. Do not modify this method." self.features = trainingData[0].keys() # this could be useful for your code later... if (self.automaticTuning): Cgrid = [0.002, 0.004, 0.008] else: Cgrid = [self.C] return self.trainAndTune(trainingData, trainingLabels, validationData, validationLabels, Cgrid) def trainAndTune(self, trainingData, trainingLabels, validationData, validationLabels, Cgrid): """ This method sets self.weights using MIRA. Train the classifier for each value of C in Cgrid, then store the weights that give the best accuracy on the validationData. Use the provided self.weights[label] data structure so that the classify method works correctly. Also, recall that a datum is a counter from features to values for those features representing a vector of values. """ "*** YOUR CODE HERE ***" util.raiseNotDefined() def classify(self, data ): """ Classifies each datum as the label that most closely matches the prototype vector for that label. See the project description for details. Recall that a datum is a util.counter... """ guesses = [] for datum in data: vectors = util.Counter() for l in self.legalLabels: vectors[l] = self.weights[l] * datum guesses.append(vectors.argMax()) return guesses def findHighOddsFeatures(self, label1, label2): """ Returns a list of the 100 features with the greatest difference in feature values w_label1 - w_label2 """ featuresOdds = [] "*** YOUR CODE HERE ***" return featuresOdds