# multiagentTestClasses.py # ------------------------ # Licensing Information: Please do not distribute or publish solutions to this # project. You are free to use and extend these projects for educational # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). # Student side autograding was added by Brad Miller, Nick Hay, and Pieter # Abbeel in Spring 2013. # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html # A minimax tree which interfaces like gameState # state.getNumAgents() # state.isWin() # state.isLose() # state.generateSuccessor(agentIndex, action) # state.getScore() # used by multiAgents.scoreEvaluationFunction, which is the default # import testClasses import json from collections import defaultdict from pprint import PrettyPrinter pp = PrettyPrinter() from game import Agent from pacman import GameState from ghostAgents import RandomGhost, DirectionalGhost import random, math, traceback, sys, os import layout, pacman import autograder # import grading VERBOSE = False class MultiagentTreeState(object): def __init__(self, problem, state): self.problem = problem self.state = state def generateSuccessor(self, agentIndex, action): if VERBOSE: print "generateSuccessor(%s, %s, %s) -> %s" % (self.state, agentIndex, action, self.problem.stateToSuccessorMap[self.state][action]) successor = self.problem.stateToSuccessorMap[self.state][action] self.problem.generatedStates.add(successor) return MultiagentTreeState(self.problem, successor) def getScore(self): if VERBOSE: print "getScore(%s) -> %s" % (self.state, self.problem.evaluation[self.state]) if self.state not in self.problem.evaluation: raise Exception('getScore() called on non-terminal state or before maximum depth achieved.') return float(self.problem.evaluation[self.state]) def getLegalActions(self, agentIndex=0): if VERBOSE: print "getLegalActions(%s) -> %s" % (self.state, self.problem.stateToActions[self.state]) #if len(self.problem.stateToActions[self.state]) == 0: # print "WARNING: getLegalActions called on leaf state %s" % (self.state,) return list(self.problem.stateToActions[self.state]) def isWin(self): if VERBOSE: print "isWin(%s) -> %s" % (self.state, self.state in self.problem.winStates) return self.state in self.problem.winStates def isLose(self): if VERBOSE: print "isLose(%s) -> %s" % (self.state, self.state in self.problem.loseStates) return self.state in self.problem.loseStates def getNumAgents(self): if VERBOSE: print "getNumAgents(%s) -> %s" % (self.state, self.problem.numAgents) return self.problem.numAgents class MultiagentTreeProblem(object): def __init__(self, numAgents, startState, winStates, loseStates, successors, evaluation): self.startState = MultiagentTreeState(self, startState) self.numAgents = numAgents self.winStates = winStates self.loseStates = loseStates self.evaluation = evaluation self.successors = successors self.reset() self.stateToSuccessorMap = defaultdict(dict) self.stateToActions = defaultdict(list) for state, action, nextState in successors: self.stateToActions[state].append(action) self.stateToSuccessorMap[state][action] = nextState def reset(self): self.generatedStates = set([self.startState.state]) def parseTreeProblem(testDict): numAgents = int(testDict["num_agents"]) startState = testDict["start_state"] winStates = set(testDict["win_states"].split(" ")) loseStates = set(testDict["lose_states"].split(" ")) successors = [] evaluation = {} for line in testDict["evaluation"].split('\n'): tokens = line.split() if len(tokens) == 2: state, value = tokens evaluation[state] = float(value) else: raise Exception, "[parseTree] Bad evaluation line: |%s|" % (line,) for line in testDict["successors"].split('\n'): tokens = line.split() if len(tokens) == 3: state, action, nextState = tokens successors.append((state, action, nextState)) else: raise Exception, "[parseTree] Bad successor line: |%s|" % (line,) return MultiagentTreeProblem(numAgents, startState, winStates, loseStates, successors, evaluation) def run(lay, layName, pac, ghosts, disp, nGames=1, name='games'): """ Runs a few games and outputs their statistics. """ starttime = time.time() print '*** Running %s on' % name, layName, '%d time(s).' % nGames games = pacman.runGames(lay, pac, ghosts, disp, nGames, False, catchExceptions=True, timeout=120) print '*** Finished running %s on' % name, layName, 'after %d seconds.' % (time.time() - starttime) stats = {'time': time.time() - starttime, 'wins': [g.state.isWin() for g in games].count(True), 'games': games, 'scores': [g.state.getScore() for g in games], 'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)} print '*** Won %d out of %d games. Average score: %f ***' % (stats['wins'], len(games), sum(stats['scores']) * 1.0 / len(games)) return stats class GradingAgent(Agent): def __init__(self, seed, studentAgent, optimalActions, altDepthActions, partialPlyBugActions): # save student agent and actions of refernce agents self.studentAgent = studentAgent self.optimalActions = optimalActions self.altDepthActions = altDepthActions self.partialPlyBugActions = partialPlyBugActions # create fields for storing specific wrong actions self.suboptimalMoves = [] self.wrongStatesExplored = -1 # boolean vectors represent types of implementation the student could have self.actionsConsistentWithOptimal = [True for i in range(len(optimalActions[0]))] self.actionsConsistentWithAlternativeDepth = [True for i in range(len(altDepthActions[0]))] self.actionsConsistentWithPartialPlyBug = [True for i in range(len(partialPlyBugActions[0]))] # keep track of elapsed moves self.stepCount = 0 self.seed = seed def registerInitialState(self, state): if 'registerInitialState' in dir(self.studentAgent): self.studentAgent.registerInitialState(state) random.seed(self.seed) def getAction(self, state): GameState.getAndResetExplored() studentAction = (self.studentAgent.getAction(state), len(GameState.getAndResetExplored())) optimalActions = self.optimalActions[self.stepCount] altDepthActions = self.altDepthActions[self.stepCount] partialPlyBugActions = self.partialPlyBugActions[self.stepCount] studentOptimalAction = False curRightStatesExplored = False; for i in range(len(optimalActions)): if studentAction[0] in optimalActions[i][0]: studentOptimalAction = True else: self.actionsConsistentWithOptimal[i] = False if studentAction[1] == int(optimalActions[i][1]): curRightStatesExplored = True if not curRightStatesExplored and self.wrongStatesExplored < 0: self.wrongStatesExplored = 1 for i in range(len(altDepthActions)): if studentAction[0] not in altDepthActions[i]: self.actionsConsistentWithAlternativeDepth[i] = False for i in range(len(partialPlyBugActions)): if studentAction[0] not in partialPlyBugActions[i]: self.actionsConsistentWithPartialPlyBug[i] = False if not studentOptimalAction: self.suboptimalMoves.append((state, studentAction[0], optimalActions[0][0][0])) self.stepCount += 1 random.seed(self.seed + self.stepCount) return optimalActions[0][0][0] def getSuboptimalMoves(self): return self.suboptimalMoves def getWrongStatesExplored(self): return self.wrongStatesExplored def checkFailure(self): """ Return +n if have n suboptimal moves. Return -1 if have only off by one depth moves. Return 0 otherwise. """ if self.wrongStatesExplored > 0: return -3 if self.actionsConsistentWithOptimal.count(True) > 0: return 0 elif self.actionsConsistentWithPartialPlyBug.count(True) > 0: return -2 elif self.actionsConsistentWithAlternativeDepth.count(True) > 0: return -1 else: return len(self.suboptimalMoves) class PolyAgent(Agent): def __init__(self, seed, multiAgents, ourPacOptions, depth): # prepare our pacman agents solutionAgents, alternativeDepthAgents, partialPlyBugAgents = self.construct_our_pacs(multiAgents, ourPacOptions) for p in solutionAgents: p.depth = depth for p in partialPlyBugAgents: p.depth = depth for p in alternativeDepthAgents[:2]: p.depth = max(1, depth - 1) for p in alternativeDepthAgents[2:]: p.depth = depth + 1 self.solutionAgents = solutionAgents self.alternativeDepthAgents = alternativeDepthAgents self.partialPlyBugAgents = partialPlyBugAgents # prepare fields for storing the results self.optimalActionLists = [] self.alternativeDepthLists = [] self.partialPlyBugLists = [] self.seed = seed self.stepCount = 0 def select(self, list, indices): """ Return a sublist of elements given by indices in list. """ return [list[i] for i in indices] def construct_our_pacs(self, multiAgents, keyword_dict): pacs_without_stop = [multiAgents.StaffMultiAgentSearchAgent(**keyword_dict) for i in range(3)] keyword_dict['keepStop'] = 'True' pacs_with_stop = [multiAgents.StaffMultiAgentSearchAgent(**keyword_dict) for i in range(3)] keyword_dict['usePartialPlyBug'] = 'True' partial_ply_bug_pacs = [multiAgents.StaffMultiAgentSearchAgent(**keyword_dict)] keyword_dict['keepStop'] = 'False' partial_ply_bug_pacs = partial_ply_bug_pacs + [multiAgents.StaffMultiAgentSearchAgent(**keyword_dict)] for pac in pacs_with_stop + pacs_without_stop + partial_ply_bug_pacs: pac.verbose = False ourpac = [pacs_with_stop[0], pacs_without_stop[0]] alternative_depth_pacs = self.select(pacs_with_stop + pacs_without_stop, [1, 4, 2, 5]) return (ourpac, alternative_depth_pacs, partial_ply_bug_pacs) def registerInitialState(self, state): for agent in self.solutionAgents + self.alternativeDepthAgents: if 'registerInitialState' in dir(agent): agent.registerInitialState(state) random.seed(self.seed) def getAction(self, state): # survey agents GameState.getAndResetExplored() optimalActionLists = [] for agent in self.solutionAgents: optimalActionLists.append((agent.getBestPacmanActions(state)[0], len(GameState.getAndResetExplored()))) alternativeDepthLists = [agent.getBestPacmanActions(state)[0] for agent in self.alternativeDepthAgents] partialPlyBugLists = [agent.getBestPacmanActions(state)[0] for agent in self.partialPlyBugAgents] # record responses self.optimalActionLists.append(optimalActionLists) self.alternativeDepthLists.append(alternativeDepthLists) self.partialPlyBugLists.append(partialPlyBugLists) self.stepCount += 1 random.seed(self.seed + self.stepCount) return optimalActionLists[0][0][0] def getTraces(self): # return traces from individual agents return (self.optimalActionLists, self.alternativeDepthLists, self.partialPlyBugLists) class PacmanGameTreeTest(testClasses.TestCase): def __init__(self, question, testDict): super(PacmanGameTreeTest, self).__init__(question, testDict) self.seed = int(self.testDict['seed']) self.alg = self.testDict['alg'] self.layout_text = self.testDict['layout'] self.layout_name = self.testDict['layoutName'] self.depth = int(self.testDict['depth']) self.max_points = int(self.testDict['max_points']) def execute(self, grades, moduleDict, solutionDict): # load student code and staff code solutions multiAgents = moduleDict['multiAgents'] studentAgent = getattr(multiAgents, self.alg)(depth=self.depth) allActions = map(lambda x: json.loads(x), solutionDict['optimalActions'].split('\n')) altDepthActions = map(lambda x: json.loads(x), solutionDict['altDepthActions'].split('\n')) partialPlyBugActions = map(lambda x: json.loads(x), solutionDict['partialPlyBugActions'].split('\n')) # set up game state and play a game random.seed(self.seed) lay = layout.Layout([l.strip() for l in self.layout_text.split('\n')]) pac = GradingAgent(self.seed, studentAgent, allActions, altDepthActions, partialPlyBugActions) # check return codes and assign grades disp = self.question.getDisplay() stats = run(lay, self.layout_name, pac, [DirectionalGhost(i + 1) for i in range(2)], disp, name=self.alg) if stats['timeouts'] > 0: self.addMessage('Agent timed out on smallClassic. No credit') return self.testFail(grades) if stats['crashes'] > 0: self.addMessage('Agent crashed on smallClassic. No credit') return self.testFail(grades) code = pac.checkFailure() if code == 0: return self.testPass(grades) elif code == -3: if pac.getWrongStatesExplored() >=0: self.addMessage('Bug: Wrong number of states expanded.') return self.testFail(grades) else: return self.testPass(grades) elif code == -2: self.addMessage('Bug: Partial Ply Bug') return self.testFail(grades) elif code == -1: self.addMessage('Bug: Search depth off by 1') return self.testFail(grades) elif code > 0: moves = pac.getSuboptimalMoves() state, studentMove, optMove = random.choice(moves) self.addMessage('Bug: Suboptimal moves') self.addMessage('State:%s\nStudent Move:%s\nOptimal Move:%s' % (state, studentMove, optMove)) return self.testFail(grades) def writeList(self, handle, name, list): handle.write('%s: """\n' % name) for l in list: handle.write('%s\n' % json.dumps(l)) handle.write('"""\n') def writeSolution(self, moduleDict, filePath): # load module, set seed, create ghosts and macman, run game multiAgents = moduleDict['multiAgents'] random.seed(self.seed) lay = layout.Layout([l.strip() for l in self.layout_text.split('\n')]) if self.alg == 'ExpectimaxAgent': ourPacOptions = {'expectimax': 'True'} elif self.alg == 'AlphaBetaAgent': ourPacOptions = {'alphabeta': 'True'} else: ourPacOptions = {} pac = PolyAgent(self.seed, multiAgents, ourPacOptions, self.depth) disp = self.question.getDisplay() run(lay, self.layout_name, pac, [DirectionalGhost(i + 1) for i in range(2)], disp, name=self.alg) (optimalActions, altDepthActions, partialPlyBugActions) = pac.getTraces() # recover traces and record to file handle = open(filePath, 'w') self.writeList(handle, 'optimalActions', optimalActions) self.writeList(handle, 'altDepthActions', altDepthActions) self.writeList(handle, 'partialPlyBugActions', partialPlyBugActions) handle.close() class GraphGameTreeTest(testClasses.TestCase): def __init__(self, question, testDict): super(GraphGameTreeTest, self).__init__(question, testDict) self.problem = parseTreeProblem(testDict) self.alg = self.testDict['alg'] self.diagram = self.testDict['diagram'].split('\n') self.depth = int(self.testDict['depth']) def solveProblem(self, multiAgents): self.problem.reset() studentAgent = getattr(multiAgents, self.alg)(depth=self.depth) action = studentAgent.getAction(self.problem.startState) generated = self.problem.generatedStates return action, " ".join([str(s) for s in sorted(generated)]) def addDiagram(self): self.addMessage('Tree:') for line in self.diagram: self.addMessage(line) def execute(self, grades, moduleDict, solutionDict): multiAgents = moduleDict['multiAgents'] goldAction = solutionDict['action'] goldGenerated = solutionDict['generated'] action, generated = self.solveProblem(multiAgents) fail = False if action != goldAction: self.addMessage('Incorrect move for depth=%s' % (self.depth,)) self.addMessage(' Student move: %s\n Optimal move: %s' % (action, goldAction)) fail = True if generated != goldGenerated: self.addMessage('Incorrect generated nodes for depth=%s' % (self.depth,)) self.addMessage(' Student generated nodes: %s\n Correct generated nodes: %s' % (generated, goldGenerated)) fail = True if fail: self.addDiagram() return self.testFail(grades) else: return self.testPass(grades) def writeSolution(self, moduleDict, filePath): multiAgents = moduleDict['multiAgents'] action, generated = self.solveProblem(multiAgents) with open(filePath, 'w') as handle: handle.write('# This is the solution file for %s.\n' % self.path) handle.write('action: "%s"\n' % (action,)) handle.write('generated: "%s"\n' % (generated,)) return True import time from util import TimeoutFunction class EvalAgentTest(testClasses.TestCase): def __init__(self, question, testDict): super(EvalAgentTest, self).__init__(question, testDict) self.layoutName = testDict['layoutName'] self.agentName = testDict['agentName'] self.ghosts = eval(testDict['ghosts']) self.maxTime = int(testDict['maxTime']) self.seed = int(testDict['randomSeed']) self.numGames = int(testDict['numGames']) self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()] self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()] self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()] self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]]) self.agentArgs = testDict.get('agentArgs', '') def execute(self, grades, moduleDict, solutionDict): startTime = time.time() agentType = getattr(moduleDict['multiAgents'], self.agentName) agentOpts = pacman.parseAgentArgs(self.agentArgs) if self.agentArgs != '' else {} agent = agentType(**agentOpts) lay = layout.getLayout(self.layoutName, 3) disp = self.question.getDisplay() random.seed(self.seed) games = pacman.runGames(lay, agent, self.ghosts, disp, self.numGames, False, catchExceptions=True, timeout=self.maxTime) totalTime = time.time() - startTime stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True), 'games': games, 'scores': [g.state.getScore() for g in games], 'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)} averageScore = sum(stats['scores']) / float(len(stats['scores'])) nonTimeouts = self.numGames - stats['timeouts'] wins = stats['wins'] def gradeThreshold(value, minimum, thresholds, name): points = 0 passed = (minimum == None) or (value >= minimum) if passed: for t in thresholds: if value >= t: points += 1 return (passed, points, value, minimum, thresholds, name) results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"), gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"), gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")] totalPoints = 0 for passed, points, value, minimum, thresholds, name in results: if minimum == None and len(thresholds)==0: continue # print passed, points, value, minimum, thresholds, name totalPoints += points if not passed: assert points == 0 self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum)) else: self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds))) if minimum != None: self.addMessage(" Grading scheme:") self.addMessage(" < %s: fail" % (minimum,)) if len(thresholds)==0 or minimum != thresholds[0]: self.addMessage(" >= %s: 0 points" % (minimum,)) for idx, threshold in enumerate(thresholds): self.addMessage(" >= %s: %s points" % (threshold, idx+1)) elif len(thresholds) > 0: self.addMessage(" Grading scheme:") self.addMessage(" < %s: 0 points" % (thresholds[0],)) for idx, threshold in enumerate(thresholds): self.addMessage(" >= %s: %s points" % (threshold, idx+1)) if any([not passed for passed, _, _, _, _, _ in results]): totalPoints = 0 return self.testPartial(grades, totalPoints, self.maxPoints) def writeSolution(self, moduleDict, filePath): handle = open(filePath, 'w') handle.write('# This is the solution file for %s.\n' % self.path) handle.write('# File intentionally blank.\n') handle.close() return True