Skip to content

Commit

Permalink
Refactor to move all testing functions into their own script
Browse files Browse the repository at this point in the history
  • Loading branch information
shayakbanerjee committed Dec 17, 2017
1 parent 37f34ea commit ddfc898
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 65 deletions.
1 change: 0 additions & 1 deletion board.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def makeMove(self, who, i, j, verbose=True): # who is PLAYER_X or PLAYER_O
if self.board[i][j] != GridStates.EMPTY:
print 'That location is not empty'
return
#print '%s moves'%(who)
self.board[i][j] = who
#self.printBoard()
self.determineBoardState()
Expand Down
52 changes: 4 additions & 48 deletions game.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
from board import TTTBoardDecision, GridStates, TTTBoard
from ultimateboard import UTTTBoard, UTTTBoardDecision
from player import RandomTTTPlayer, RLTTTPlayer
from ultimateplayer import RandomUTTTPlayer, RLUTTTPlayer
from learning import NNUltimateLearning
from plotting import drawXYPlotByFactor
import os

LEARNING_FILE = 'ultimate_player_nn1.h5'
WIN_PCT_FILE = 'win_pct_player_1.csv'

class GameSequence(object):
def __init__(self, numberOfGames, player1, player2, BoardClass=TTTBoard, BoardDecisionClass=TTTBoardDecision):
Expand All @@ -18,6 +9,8 @@ def __init__(self, numberOfGames, player1, player2, BoardClass=TTTBoard, BoardDe
self.BoardDecisionClass = BoardDecisionClass

def playAGame(self, board):
self.player1.startNewGame()
self.player2.startNewGame()
while board.getBoardDecision() == self.BoardDecisionClass.ACTIVE:
self.player1.setBoard(board, GridStates.PLAYER_X)
self.player2.setBoard(board, GridStates.PLAYER_O)
Expand All @@ -27,6 +20,8 @@ def playAGame(self, board):
pState2 = self.player2.makeNextMove()
self.player1.learnFromMove(pState2)
self.player2.learnFromMove(pState2)
self.player1.finishGame()
self.player2.finishGame()
return board.getBoardDecision()

def playGamesAndGetWinPercent(self):
Expand All @@ -38,42 +33,3 @@ def playGamesAndGetWinPercent(self):
float(results.count(self.BoardDecisionClass.WON_O))/float(self.numberOfGames), \
float(results.count(self.BoardDecisionClass.DRAW))/float(self.numberOfGames)
return (xpct, opct, drawpct)

def playTTTAndPlotResults():
learningPlayer = RLTTTPlayer()
randomPlayer = RandomTTTPlayer()
results = []
numberOfSetsOfGames = 40
for i in range(numberOfSetsOfGames):
games = GameSequence(100, learningPlayer, randomPlayer)
results.append(games.playGamesAndGetWinPercent())
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))}
drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction')

def playUltimateAndPlotResults():
learningPlayer = RLUTTTPlayer(NNUltimateLearning)
randomPlayer = RandomUTTTPlayer()
results = []
numberOfSetsOfGames = 40
if os.path.isfile(LEARNING_FILE):
learningPlayer.loadLearning(LEARNING_FILE)
for i in range(numberOfSetsOfGames):
games = GameSequence(100, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision)
results.append(games.playGamesAndGetWinPercent())
learningPlayer.saveLearning(LEARNING_FILE)
writeResultsToFile(results)
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))}
drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction')

def writeResultsToFile(results):
with open(WIN_PCT_FILE, 'a') as outfile:
for result in results:
outfile.write('%s,%s,%s\n'%(result[0], result[1], result[2]))

if __name__ == '__main__':
#playTTTAndPlotResults()
playUltimateAndPlotResults()
54 changes: 40 additions & 14 deletions learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ def loadModel(self, filename):
# Use only if also saving the intermediate state above
raise NotImplementedError

def resetForNewGame(self):
pass

def gameOver(self):
pass

class TableLearning(GenericLearning):
def __init__(self, DecisionClass=TTTBoardDecision):
self.values = {}
Expand All @@ -35,7 +41,7 @@ def getBoardStateValue(self, player, board, boardState):
self.values[boardState] = 1.0 if player == GridStates.PLAYER_X else 0.0
if decision == self.DecisionClass.WON_O:
self.values[boardState] = 1.0 if player == GridStates.PLAYER_O else 0.0
if boardState not in self.values:
if decision == self.DecisionClass.DRAW or boardState not in self.values:
self.values[boardState] = 0.5
return self.values[boardState]

Expand All @@ -48,7 +54,6 @@ def learnFromMove(self, player, board, prevBoardState):

def printValues(self):
from pprint import pprint
#pprint(filter(lambda x: x!=0.5, self.values.values()))
pprint(self.values)
print 'Total number of states: %s' % (len(self.values))
print 'Total number of knowledgeable states: %s' % (len(filter(lambda x: x!=0.5, self.values.values())))
Expand All @@ -62,45 +67,66 @@ def loadLearning(self, filename):

class NNUltimateLearning(GenericLearning):
STATE_TO_NUMBER_MAP = {GridStates.EMPTY: 0, GridStates.PLAYER_O: -1, GridStates.PLAYER_X: 1}
TABLE_LEARNING_FILE = 'table_learning.json'

def __init__(self, DecisionClass=TTTBoardDecision):
self.DecisionClass = DecisionClass
self.values = {}
self.initializeModel()

def initializeModel(self):
self.model = Sequential()
self.model.add(Dense(81, input_dim=81, activation='relu'))
self.model.add(Dense(81, activation='relu'))
self.model.add(Dense(1, activation='sigmoid'))
#self.model.add(Dense(81, activation='relu'))
self.model.add(Dense(1, activation='linear', kernel_initializer='glorot_uniform'))
self.model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
plot_model(self.model, to_file='model.png')
#self.initialModelTraining(self.TABLE_LEARNING_FILE)

def initialModelTraining(self, jsonFile):
import os
if os.path.isfile(jsonFile):
self.values = json.load(open(jsonFile, 'r'))
self.gameOver()

def resetForNewGame(self):
self.values = {}

def gameOver(self):
boardStates, predYs = [], []
for (k,v) in self.values.iteritems():
boardStates.append(self.convertBoardStateToInput(k))
predYs.append(v)
self.trainModel(boardStates, predYs)

def convertBoardStateToInput(self, boardState):
return np.asarray([map(lambda x: self.STATE_TO_NUMBER_MAP.get(x), boardState)])
return map(lambda x: self.STATE_TO_NUMBER_MAP.get(x), boardState)

def trainModel(self, boardState, y):
self.model.fit(self.convertBoardStateToInput(boardState), np.asarray([y]), verbose=0)
def trainModel(self, boardStates, y):
self.model.fit(np.asarray(boardStates), np.asarray(y), verbose=0)

def getPrediction(self, boardState):
return self.model.predict(self.convertBoardStateToInput(boardState))[0]
return self.model.predict(np.asarray([self.convertBoardStateToInput(boardState)]))[0]

def getBoardStateValue(self, player, board, boardState): #TODO: Can batch the inputs to do several predictions at once
decision = board.getBoardDecision()
predY = self.getPrediction(boardState)[0]
if decision == self.DecisionClass.WON_X:
predY = 1.0 if player == GridStates.PLAYER_X else 0.0
self.trainModel(boardState, predY)
predY = 1.0 if player == GridStates.PLAYER_X else 0.0 #TODO: Explore using -1.0 instead of 0.0
self.values[boardState] = predY
if decision == self.DecisionClass.WON_O:
predY = 1.0 if player == GridStates.PLAYER_O else 0.0
self.trainModel(boardState, predY)
self.values[boardState] = predY
if decision == self.DecisionClass.DRAW:
predY = 0.5
self.values[boardState] = predY
return predY

def learnFromMove(self, player, board, prevBoardState):
curBoardState = board.getBoardState()
curBoardStateValue = self.getBoardStateValue(player, board, curBoardState)
prevBoardStateValue = self.getPrediction(prevBoardState)
trainY = prevBoardStateValue + 0.2 * (curBoardStateValue - prevBoardStateValue)
self.trainModel(prevBoardState, trainY)
prevBoardStateValue = self.getPrediction(prevBoardState)[0]
self.values[prevBoardState] = prevBoardStateValue + 0.2 * (curBoardStateValue - prevBoardStateValue)

def printValues(self):
pass
Expand Down
6 changes: 6 additions & 0 deletions player.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ def makeNextMove(self):
def learnFromMove(self, prevBoardState):
raise NotImplementedError

def startNewGame(self):
pass

def finishGame(self):
pass

class RandomTTTPlayer(TTTPlayer):
def makeNextMove(self):
previousState = self.board.getBoardState()
Expand Down
69 changes: 69 additions & 0 deletions test_scripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from ultimateboard import UTTTBoard, UTTTBoardDecision
from player import RandomTTTPlayer, RLTTTPlayer
from ultimateplayer import RandomUTTTPlayer, RLUTTTPlayer
from learning import NNUltimateLearning, TableLearning
from plotting import drawXYPlotByFactor
import os, csv
from game import GameSequence

LEARNING_FILE = 'ultimate_player_nn1.h5'
WIN_PCT_FILE = 'win_pct_player_1.csv'

def playTTTAndPlotResults():
learningPlayer = RLTTTPlayer()
randomPlayer = RandomTTTPlayer()
results = []
numberOfSetsOfGames = 5
for i in range(numberOfSetsOfGames):
games = GameSequence(1000, learningPlayer, randomPlayer)
results.append(games.playGamesAndGetWinPercent())
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))}
drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction')

def playUltimateAndPlotResults():
learningPlayer = RLUTTTPlayer(NNUltimateLearning)
randomPlayer = RandomUTTTPlayer()
results = []
numberOfSetsOfGames = 50
if os.path.isfile(LEARNING_FILE):
learningPlayer.loadLearning(LEARNING_FILE)
for i in range(numberOfSetsOfGames):
games = GameSequence(100, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision)
results.append(games.playGamesAndGetWinPercent())
learningPlayer.saveLearning(LEARNING_FILE)
writeResultsToFile(results)
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))}
drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction')

def playUltimateForTraining():
learningPlayer = RLUTTTPlayer(TableLearning)
randomPlayer = RandomUTTTPlayer()
games = GameSequence(4000, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision)
games.playGamesAndGetWinPercent()
learningPlayer.saveLearning(NNUltimateLearning.TABLE_LEARNING_FILE)

def writeResultsToFile(results):
with open(WIN_PCT_FILE, 'a') as outfile:
for result in results:
outfile.write('%s,%s,%s\n'%(result[0], result[1], result[2]))

def plotResultsFromFile(resultsFile):
results = []
with open(resultsFile, 'r') as infile:
reader = csv.reader(infile)
results = map(tuple, reader)
numberOfSetsOfGames = len(results)
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))}
drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction')

if __name__ == '__main__':
#playTTTAndPlotResults()
#playUltimateForTraining()
#playUltimateAndPlotResults()
plotResultsFromFile('results/ultmate_nn1_results.csv')
1 change: 0 additions & 1 deletion ultimate_learning.json

This file was deleted.

14 changes: 13 additions & 1 deletion ultimateplayer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ultimateboard import UTTTBoardDecision, UTTTBoard
from learning import TableLearning, NNUltimateLearning
from learning import TableLearning
import random

class UTTTPlayer(object):
Expand All @@ -20,6 +20,12 @@ def makeNextMove(self):
def learnFromMove(self, prevBoardState):
raise NotImplementedError

def startNewGame(self):
pass

def finishGame(self):
pass

class RandomUTTTPlayer(UTTTPlayer):
def makeNextMove(self):
previousState = self.board.getBoardState()
Expand Down Expand Up @@ -50,6 +56,12 @@ def testNextMove(self, state, boardLocation, placeOnBoard):
boardCopy[loc] = self.player
return ''.join(boardCopy)

def startNewGame(self):
self.learningAlgo.resetForNewGame()

def finishGame(self):
self.learningAlgo.gameOver()

def makeNextMove(self):
previousState = self.board.getBoardState()
if self.isBoardActive():
Expand Down

0 comments on commit ddfc898

Please sign in to comment.