From ddfc8986670e89e87535e737e777cc6e43076ac1 Mon Sep 17 00:00:00 2001 From: Shayak Banerjee Date: Sun, 17 Dec 2017 15:11:34 -0500 Subject: [PATCH] Refactor to move all testing functions into their own script --- board.py | 1 - game.py | 52 +++---------------------------- learning.py | 54 ++++++++++++++++++++++++--------- player.py | 6 ++++ test_scripts.py | 69 ++++++++++++++++++++++++++++++++++++++++++ ultimate_learning.json | 1 - ultimateplayer.py | 14 ++++++++- 7 files changed, 132 insertions(+), 65 deletions(-) create mode 100644 test_scripts.py delete mode 100644 ultimate_learning.json diff --git a/board.py b/board.py index 56b1989..17d501e 100644 --- a/board.py +++ b/board.py @@ -54,7 +54,6 @@ def makeMove(self, who, i, j, verbose=True): # who is PLAYER_X or PLAYER_O if self.board[i][j] != GridStates.EMPTY: print 'That location is not empty' return - #print '%s moves'%(who) self.board[i][j] = who #self.printBoard() self.determineBoardState() diff --git a/game.py b/game.py index 3cd4afd..6b70cd7 100644 --- a/game.py +++ b/game.py @@ -1,13 +1,4 @@ from board import TTTBoardDecision, GridStates, TTTBoard -from ultimateboard import UTTTBoard, UTTTBoardDecision -from player import RandomTTTPlayer, RLTTTPlayer -from ultimateplayer import RandomUTTTPlayer, RLUTTTPlayer -from learning import NNUltimateLearning -from plotting import drawXYPlotByFactor -import os - -LEARNING_FILE = 'ultimate_player_nn1.h5' -WIN_PCT_FILE = 'win_pct_player_1.csv' class GameSequence(object): def __init__(self, numberOfGames, player1, player2, BoardClass=TTTBoard, BoardDecisionClass=TTTBoardDecision): @@ -18,6 +9,8 @@ def __init__(self, numberOfGames, player1, player2, BoardClass=TTTBoard, BoardDe self.BoardDecisionClass = BoardDecisionClass def playAGame(self, board): + self.player1.startNewGame() + self.player2.startNewGame() while board.getBoardDecision() == self.BoardDecisionClass.ACTIVE: self.player1.setBoard(board, GridStates.PLAYER_X) self.player2.setBoard(board, GridStates.PLAYER_O) @@ -27,6 +20,8 @@ def playAGame(self, board): pState2 = self.player2.makeNextMove() self.player1.learnFromMove(pState2) self.player2.learnFromMove(pState2) + self.player1.finishGame() + self.player2.finishGame() return board.getBoardDecision() def playGamesAndGetWinPercent(self): @@ -38,42 +33,3 @@ def playGamesAndGetWinPercent(self): float(results.count(self.BoardDecisionClass.WON_O))/float(self.numberOfGames), \ float(results.count(self.BoardDecisionClass.DRAW))/float(self.numberOfGames) return (xpct, opct, drawpct) - -def playTTTAndPlotResults(): - learningPlayer = RLTTTPlayer() - randomPlayer = RandomTTTPlayer() - results = [] - numberOfSetsOfGames = 40 - for i in range(numberOfSetsOfGames): - games = GameSequence(100, learningPlayer, randomPlayer) - results.append(games.playGamesAndGetWinPercent()) - plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)), - 'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)), - 'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))} - drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction') - -def playUltimateAndPlotResults(): - learningPlayer = RLUTTTPlayer(NNUltimateLearning) - randomPlayer = RandomUTTTPlayer() - results = [] - numberOfSetsOfGames = 40 - if os.path.isfile(LEARNING_FILE): - learningPlayer.loadLearning(LEARNING_FILE) - for i in range(numberOfSetsOfGames): - games = GameSequence(100, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision) - results.append(games.playGamesAndGetWinPercent()) - learningPlayer.saveLearning(LEARNING_FILE) - writeResultsToFile(results) - plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)), - 'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)), - 'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))} - drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction') - -def writeResultsToFile(results): - with open(WIN_PCT_FILE, 'a') as outfile: - for result in results: - outfile.write('%s,%s,%s\n'%(result[0], result[1], result[2])) - -if __name__ == '__main__': - #playTTTAndPlotResults() - playUltimateAndPlotResults() \ No newline at end of file diff --git a/learning.py b/learning.py index 265baba..18af3f9 100644 --- a/learning.py +++ b/learning.py @@ -24,6 +24,12 @@ def loadModel(self, filename): # Use only if also saving the intermediate state above raise NotImplementedError + def resetForNewGame(self): + pass + + def gameOver(self): + pass + class TableLearning(GenericLearning): def __init__(self, DecisionClass=TTTBoardDecision): self.values = {} @@ -35,7 +41,7 @@ def getBoardStateValue(self, player, board, boardState): self.values[boardState] = 1.0 if player == GridStates.PLAYER_X else 0.0 if decision == self.DecisionClass.WON_O: self.values[boardState] = 1.0 if player == GridStates.PLAYER_O else 0.0 - if boardState not in self.values: + if decision == self.DecisionClass.DRAW or boardState not in self.values: self.values[boardState] = 0.5 return self.values[boardState] @@ -48,7 +54,6 @@ def learnFromMove(self, player, board, prevBoardState): def printValues(self): from pprint import pprint - #pprint(filter(lambda x: x!=0.5, self.values.values())) pprint(self.values) print 'Total number of states: %s' % (len(self.values)) print 'Total number of knowledgeable states: %s' % (len(filter(lambda x: x!=0.5, self.values.values()))) @@ -62,45 +67,66 @@ def loadLearning(self, filename): class NNUltimateLearning(GenericLearning): STATE_TO_NUMBER_MAP = {GridStates.EMPTY: 0, GridStates.PLAYER_O: -1, GridStates.PLAYER_X: 1} + TABLE_LEARNING_FILE = 'table_learning.json' def __init__(self, DecisionClass=TTTBoardDecision): self.DecisionClass = DecisionClass + self.values = {} self.initializeModel() def initializeModel(self): self.model = Sequential() self.model.add(Dense(81, input_dim=81, activation='relu')) - self.model.add(Dense(81, activation='relu')) - self.model.add(Dense(1, activation='sigmoid')) + #self.model.add(Dense(81, activation='relu')) + self.model.add(Dense(1, activation='linear', kernel_initializer='glorot_uniform')) self.model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) plot_model(self.model, to_file='model.png') + #self.initialModelTraining(self.TABLE_LEARNING_FILE) + + def initialModelTraining(self, jsonFile): + import os + if os.path.isfile(jsonFile): + self.values = json.load(open(jsonFile, 'r')) + self.gameOver() + + def resetForNewGame(self): + self.values = {} + + def gameOver(self): + boardStates, predYs = [], [] + for (k,v) in self.values.iteritems(): + boardStates.append(self.convertBoardStateToInput(k)) + predYs.append(v) + self.trainModel(boardStates, predYs) def convertBoardStateToInput(self, boardState): - return np.asarray([map(lambda x: self.STATE_TO_NUMBER_MAP.get(x), boardState)]) + return map(lambda x: self.STATE_TO_NUMBER_MAP.get(x), boardState) - def trainModel(self, boardState, y): - self.model.fit(self.convertBoardStateToInput(boardState), np.asarray([y]), verbose=0) + def trainModel(self, boardStates, y): + self.model.fit(np.asarray(boardStates), np.asarray(y), verbose=0) def getPrediction(self, boardState): - return self.model.predict(self.convertBoardStateToInput(boardState))[0] + return self.model.predict(np.asarray([self.convertBoardStateToInput(boardState)]))[0] def getBoardStateValue(self, player, board, boardState): #TODO: Can batch the inputs to do several predictions at once decision = board.getBoardDecision() predY = self.getPrediction(boardState)[0] if decision == self.DecisionClass.WON_X: - predY = 1.0 if player == GridStates.PLAYER_X else 0.0 - self.trainModel(boardState, predY) + predY = 1.0 if player == GridStates.PLAYER_X else 0.0 #TODO: Explore using -1.0 instead of 0.0 + self.values[boardState] = predY if decision == self.DecisionClass.WON_O: predY = 1.0 if player == GridStates.PLAYER_O else 0.0 - self.trainModel(boardState, predY) + self.values[boardState] = predY + if decision == self.DecisionClass.DRAW: + predY = 0.5 + self.values[boardState] = predY return predY def learnFromMove(self, player, board, prevBoardState): curBoardState = board.getBoardState() curBoardStateValue = self.getBoardStateValue(player, board, curBoardState) - prevBoardStateValue = self.getPrediction(prevBoardState) - trainY = prevBoardStateValue + 0.2 * (curBoardStateValue - prevBoardStateValue) - self.trainModel(prevBoardState, trainY) + prevBoardStateValue = self.getPrediction(prevBoardState)[0] + self.values[prevBoardState] = prevBoardStateValue + 0.2 * (curBoardStateValue - prevBoardStateValue) def printValues(self): pass diff --git a/player.py b/player.py index e10a027..e2d0cc9 100644 --- a/player.py +++ b/player.py @@ -20,6 +20,12 @@ def makeNextMove(self): def learnFromMove(self, prevBoardState): raise NotImplementedError + def startNewGame(self): + pass + + def finishGame(self): + pass + class RandomTTTPlayer(TTTPlayer): def makeNextMove(self): previousState = self.board.getBoardState() diff --git a/test_scripts.py b/test_scripts.py new file mode 100644 index 0000000..3d11319 --- /dev/null +++ b/test_scripts.py @@ -0,0 +1,69 @@ +from ultimateboard import UTTTBoard, UTTTBoardDecision +from player import RandomTTTPlayer, RLTTTPlayer +from ultimateplayer import RandomUTTTPlayer, RLUTTTPlayer +from learning import NNUltimateLearning, TableLearning +from plotting import drawXYPlotByFactor +import os, csv +from game import GameSequence + +LEARNING_FILE = 'ultimate_player_nn1.h5' +WIN_PCT_FILE = 'win_pct_player_1.csv' + +def playTTTAndPlotResults(): + learningPlayer = RLTTTPlayer() + randomPlayer = RandomTTTPlayer() + results = [] + numberOfSetsOfGames = 5 + for i in range(numberOfSetsOfGames): + games = GameSequence(1000, learningPlayer, randomPlayer) + results.append(games.playGamesAndGetWinPercent()) + plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)), + 'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)), + 'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))} + drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction') + +def playUltimateAndPlotResults(): + learningPlayer = RLUTTTPlayer(NNUltimateLearning) + randomPlayer = RandomUTTTPlayer() + results = [] + numberOfSetsOfGames = 50 + if os.path.isfile(LEARNING_FILE): + learningPlayer.loadLearning(LEARNING_FILE) + for i in range(numberOfSetsOfGames): + games = GameSequence(100, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision) + results.append(games.playGamesAndGetWinPercent()) + learningPlayer.saveLearning(LEARNING_FILE) + writeResultsToFile(results) + plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)), + 'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)), + 'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))} + drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction') + +def playUltimateForTraining(): + learningPlayer = RLUTTTPlayer(TableLearning) + randomPlayer = RandomUTTTPlayer() + games = GameSequence(4000, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision) + games.playGamesAndGetWinPercent() + learningPlayer.saveLearning(NNUltimateLearning.TABLE_LEARNING_FILE) + +def writeResultsToFile(results): + with open(WIN_PCT_FILE, 'a') as outfile: + for result in results: + outfile.write('%s,%s,%s\n'%(result[0], result[1], result[2])) + +def plotResultsFromFile(resultsFile): + results = [] + with open(resultsFile, 'r') as infile: + reader = csv.reader(infile) + results = map(tuple, reader) + numberOfSetsOfGames = len(results) + plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)), + 'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)), + 'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))} + drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction') + +if __name__ == '__main__': + #playTTTAndPlotResults() + #playUltimateForTraining() + #playUltimateAndPlotResults() + plotResultsFromFile('results/ultmate_nn1_results.csv') diff --git a/ultimate_learning.json b/ultimate_learning.json deleted file mode 100644 index 81750b9..0000000 --- a/ultimate_learning.json +++ /dev/null @@ -1 +0,0 @@ -{ \ No newline at end of file diff --git a/ultimateplayer.py b/ultimateplayer.py index cfead6d..63fd552 100644 --- a/ultimateplayer.py +++ b/ultimateplayer.py @@ -1,5 +1,5 @@ from ultimateboard import UTTTBoardDecision, UTTTBoard -from learning import TableLearning, NNUltimateLearning +from learning import TableLearning import random class UTTTPlayer(object): @@ -20,6 +20,12 @@ def makeNextMove(self): def learnFromMove(self, prevBoardState): raise NotImplementedError + def startNewGame(self): + pass + + def finishGame(self): + pass + class RandomUTTTPlayer(UTTTPlayer): def makeNextMove(self): previousState = self.board.getBoardState() @@ -50,6 +56,12 @@ def testNextMove(self, state, boardLocation, placeOnBoard): boardCopy[loc] = self.player return ''.join(boardCopy) + def startNewGame(self): + self.learningAlgo.resetForNewGame() + + def finishGame(self): + self.learningAlgo.gameOver() + def makeNextMove(self): previousState = self.board.getBoardState() if self.isBoardActive():