From 283fa419d900249d0befef6b0d37e7bafea33ea2 Mon Sep 17 00:00:00 2001 From: Donato Meoli Date: Mon, 7 Oct 2019 12:13:29 +0200 Subject: [PATCH] moved util functions to utils.py, moved probability learners from learning.py to probabilistic_learning.py with tests, fixed typos and fixed imports in .ipynb files (#1120) * changed queue to set in AC3 Changed queue to set in AC3 (as in the pseudocode of the original algorithm) to reduce the number of consistency-check due to the redundancy of the same arcs in queue. For example, on the harder1 configuration of the Sudoku CSP the number consistency-check has been reduced from 40464 to 12562! * re-added test commented by mistake * added the mentioned AC4 algorithm for constraint propagation AC3 algorithm has non-optimal worst case time-complexity O(cd^3 ), while AC4 algorithm runs in O(cd^2) worst case time * added doctest in Sudoku for AC4 and and the possibility of choosing the constant propagation algorithm in mac inference * removed useless doctest for AC4 in Sudoku because AC4's tests are already present in test_csp.py * added map coloring SAT problems * fixed typo errors and removed unnecessary brackets * reformulated the map coloring problem * Revert "reformulated the map coloring problem" This reverts commit 20ab0e5afa238a0556e68f173b07ad32d0779d3b. * Revert "fixed typo errors and removed unnecessary brackets" This reverts commit f743146c43b28e0525b0f0b332faebc78c15946f. * Revert "added map coloring SAT problems" This reverts commit 9e0fa550e85081cf5b92fb6a3418384ab5a9fdfd. * Revert "removed useless doctest for AC4 in Sudoku because AC4's tests are already present in test_csp.py" This reverts commit b3cd24c511a82275f5b43c9f176396e6ba05f67e. * Revert "added doctest in Sudoku for AC4 and and the possibility of choosing the constant propagation algorithm in mac inference" This reverts commit 6986247481a05f1e558b93b2bf3cdae395f9c4ee. * Revert "added the mentioned AC4 algorithm for constraint propagation" This reverts commit 03551fbf2aa3980b915d4b6fefcbc70f24547b03. * added map coloring SAT problem * fixed build error * Revert "added map coloring SAT problem" This reverts commit 93af259e4811ddd775429f8a334111b9dd9e268c. * Revert "fixed build error" This reverts commit 6641c2c861728f3d43d3931ef201c6f7093cbc96. * added map coloring SAT problem * removed redundant parentheses * added Viterbi algorithm * added monkey & bananas planning problem * simplified condition in search.py * added tests for monkey & bananas planning problem * removed monkey & bananas planning problem * Revert "removed monkey & bananas planning problem" This reverts commit 9d37ae0def15b9e058862cb465da13d2eb926968. * Revert "added tests for monkey & bananas planning problem" This reverts commit 24041e9a1a0ab936f7a2608e3662c8efec559382. * Revert "simplified condition in search.py" This reverts commit 6d229ce9bde5033802aca29ad3047f37ee6d870d. * Revert "added monkey & bananas planning problem" This reverts commit c74933a8905de7bb569bcaed7230930780560874. * defined the PlanningProblem as a specialization of a search.Problem & fixed typo errors * fixed doctest in logic.py * fixed doctest for cascade_distribution * added ForwardPlanner and tests * added __lt__ implementation for Expr * added more tests * renamed forward planner * Revert "renamed forward planner" This reverts commit c4139e50e3a75a036607f4627717d70ad0919554. * renamed forward planner class & added doc * added backward planner and tests * fixed mdp4e.py doctests * removed ignore_delete_lists_heuristic flag * fixed heuristic for forward and backward planners * added SATPlan and tests * fixed ignore delete lists heuristic in forward and backward planners * fixed backward planner and added tests * updated doc * added nary csp definition and examples * added CSPlan and tests * fixed CSPlan * added book's cryptarithmetic puzzle example * fixed typo errors in test_csp * fixed #1111 * added sortedcontainers to yml and doc to CSPlan * added tests for n-ary csp * fixed utils.extend * updated test_probability.py * converted static methods to functions * added AC3b and AC4 with heuristic and tests * added conflict-driven clause learning sat solver * added tests for cdcl and heuristics * fixed probability.py * fixed import * fixed kakuro * added Martelli and Montanari rule-based unification algorithm * removed duplicate standardize_variables * renamed variables known as built-in functions * fixed typos in learning.py * renamed some files and fixed typos * fixed typos * fixed typos * fixed tests * removed unify_mm * remove unnecessary brackets * fixed tests * moved utility functions to utils.py * fixed typos * moved utils function to utils.py, separated probability learning classes from learning.py, fixed typos and fixed imports in .ipynb files * added missing learners * fixed Travis build * fixed typos * fixed typos * fixed typos * fixed typos * fixed typos in agents files * fixed imports in agent files --- agents.py | 14 +- agents4e.py | 6 +- csp.ipynb | 13 +- deep_learning4e.py | 142 ++-- knowledge.py | 6 +- knowledge_FOIL.ipynb | 14 +- learning.ipynb | 12 +- learning.py | 1100 +++++++++++--------------- learning4e.py | 762 +++++++++--------- learning_apps.ipynb | 12 +- logic.py | 20 +- probabilistic_learning.py | 154 ++++ reinforcement_learning.ipynb | 13 +- requirements.txt | 2 +- tests/test_agents.py | 54 +- tests/test_agents4e.py | 51 +- tests/test_deep_learning4e.py | 41 +- tests/test_learning.py | 157 ++-- tests/test_learning4e.py | 76 +- tests/test_probabilistic_learning.py | 38 + tests/test_utils.py | 55 +- text.py | 2 +- utils.py | 73 +- utils4e.py | 2 +- 24 files changed, 1400 insertions(+), 1419 deletions(-) create mode 100644 probabilistic_learning.py create mode 100644 tests/test_probabilistic_learning.py diff --git a/agents.py b/agents.py index 0cab77eb2..6c01aa5b4 100644 --- a/agents.py +++ b/agents.py @@ -333,8 +333,7 @@ def run(self, steps=1000): def list_things_at(self, location, tclass=Thing): """Return all things exactly at a given location.""" - return [thing for thing in self.things - if thing.location == location and isinstance(thing, tclass)] + return [thing for thing in self.things if thing.location == location and isinstance(thing, tclass)] def some_things_at(self, location, tclass=Thing): """Return true if at least one of the things at location @@ -993,9 +992,8 @@ def is_done(self): else: print("Death by {} [-1000].".format(explorer[0].killed_by)) else: - print("Explorer climbed out {}." - .format( - "with Gold [+1000]!" if Gold() not in self.things else "without Gold [+0]")) + print("Explorer climbed out {}.".format("with Gold [+1000]!" + if Gold() not in self.things else "without Gold [+0]")) return True # TODO: Arrow needs to be implemented @@ -1012,9 +1010,9 @@ def compare_agents(EnvFactory, AgentFactories, n=10, steps=1000): >>> environment = TrivialVacuumEnvironment >>> agents = [ModelBasedVacuumAgent, ReflexVacuumAgent] >>> result = compare_agents(environment, agents) - >>> performance_ModelBasedVacummAgent = result[0][1] - >>> performance_ReflexVacummAgent = result[1][1] - >>> performance_ReflexVacummAgent <= performance_ModelBasedVacummAgent + >>> performance_ModelBasedVacuumAgent = result[0][1] + >>> performance_ReflexVacuumAgent = result[1][1] + >>> performance_ReflexVacuumAgent <= performance_ModelBasedVacuumAgent True """ envs = [EnvFactory() for i in range(n)] diff --git a/agents4e.py b/agents4e.py index c25397783..fab36a46c 100644 --- a/agents4e.py +++ b/agents4e.py @@ -1012,9 +1012,9 @@ def compare_agents(EnvFactory, AgentFactories, n=10, steps=1000): >>> environment = TrivialVacuumEnvironment >>> agents = [ModelBasedVacuumAgent, ReflexVacuumAgent] >>> result = compare_agents(environment, agents) - >>> performance_ModelBasedVacummAgent = result[0][1] - >>> performance_ReflexVacummAgent = result[1][1] - >>> performance_ReflexVacummAgent <= performance_ModelBasedVacummAgent + >>> performance_ModelBasedVacuumAgent = result[0][1] + >>> performance_ReflexVacuumAgent = result[1][1] + >>> performance_ReflexVacuumAgent <= performance_ModelBasedVacuumAgent True """ envs = [EnvFactory() for i in range(n)] diff --git a/csp.ipynb b/csp.ipynb index 163cc6b1e..5d490846b 100644 --- a/csp.ipynb +++ b/csp.ipynb @@ -16,7 +16,7 @@ "outputs": [], "source": [ "from csp import *\n", - "from notebook import psource, pseudocode, plot_NQueens\n", + "from notebook import psource, plot_NQueens\n", "%matplotlib inline\n", "\n", "# Hide warnings in the matplotlib sections\n", @@ -3068,8 +3068,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file diff --git a/deep_learning4e.py b/deep_learning4e.py index dadf19d6b..18c41f54e 100644 --- a/deep_learning4e.py +++ b/deep_learning4e.py @@ -1,3 +1,5 @@ +"""Deep learning. (Chapters 20)""" + import math import random import statistics @@ -8,24 +10,20 @@ from keras.models import Sequential from keras.preprocessing import sequence -from utils4e import sigmoid, dotproduct, softmax1D, conv1D, GaussianKernel, element_wise_product, \ - vector_add, random_weights, scalar_vector_product, matrix_multiplication, map_vector, mse_loss - - -# DEEP NEURAL NETWORKS. (Chapter 19) -# ________________________________________________ -# 19.3 Models -# 19.3.1 Computational Graphs and Layers +from utils4e import (sigmoid, dotproduct, softmax1D, conv1D, GaussianKernel, element_wise_product, vector_add, + random_weights, scalar_vector_product, matrix_multiplication, map_vector, mse_loss) class Node: """ - A node in a computational graph. Contains the pointer to all its parents. + A node in a computational graph contains the pointer to all its parents. :param val: value of current node. :param parents: a container of all parents of current node. """ - def __init__(self, val=None, parents=[]): + def __init__(self, val=None, parents=None): + if parents is None: + parents = [] self.val = val self.parents = parents @@ -35,7 +33,7 @@ def __repr__(self): class NNUnit(Node): """ - A single unit of a layer in a Neural Network + A single unit of a layer in a neural network :param weights: weights between parent nodes and current node :param value: value of current node """ @@ -59,11 +57,8 @@ def forward(self, inputs): raise NotImplementedError -# 19.3.2 Output Layers - - class OutputLayer(Layer): - """Example of a 1D softmax output layer in 19.3.2""" + """1D softmax output layer in 19.3.2""" def __init__(self, size=3): super(OutputLayer, self).__init__(size) @@ -77,7 +72,7 @@ def forward(self, inputs): class InputLayer(Layer): - """Example of a 1D input layer. Layer size is the same as input vector size.""" + """1D input layer. Layer size is the same as input vector size.""" def __init__(self, size=3): super(InputLayer, self).__init__(size) @@ -90,9 +85,6 @@ def forward(self, inputs): return inputs -# 19.3.3 Hidden Layers - - class DenseLayer(Layer): """ 1D dense layer in a neural network. @@ -121,9 +113,6 @@ def forward(self, inputs): return res -# 19.3.4 Convolutional networks - - class ConvLayer1D(Layer): """ 1D convolution layer of in neural network. @@ -137,10 +126,10 @@ def __init__(self, size=3, kernel_size=3): node.weights = GaussianKernel(kernel_size) def forward(self, features): - # Each node in layer takes a channel in the features. + # each node in layer takes a channel in the features. assert len(self.nodes) == len(features) res = [] - # compute the convolution output of each channel, store it in node.val. + # compute the convolution output of each channel, store it in node.val for node, feature in zip(self.nodes, features): out = conv1D(feature, node.weights) res.append(out) @@ -148,12 +137,11 @@ def forward(self, features): return res -# 19.3.5 Pooling and Downsampling - - class MaxPoolingLayer1D(Layer): - """1D max pooling layer in a neural network. - :param kernel_size: max pooling area size""" + """ + 1D max pooling layer in a neural network. + :param kernel_size: max pooling area size + """ def __init__(self, size=3, kernel_size=3): super(MaxPoolingLayer1D, self).__init__(size) @@ -174,38 +162,30 @@ def forward(self, features): return res -# ____________________________________________________________________ -# 19.4 optimization algorithms - - def init_examples(examples, idx_i, idx_t, o_units): """Init examples from dataset.examples.""" inputs, targets = {}, {} - # random.shuffle(examples) for i, e in enumerate(examples): - # Input values of e + # input values of e inputs[i] = [e[i] for i in idx_i] if o_units > 1: - # One-Hot representation of e's target + # one-hot representation of e's target t = [0 for i in range(o_units)] t[e[idx_t]] = 1 targets[i] = t else: - # Target value of e + # target value of e targets[i] = [e[idx_t]] return inputs, targets -# 19.4.1 Stochastic gradient descent - - def gradient_descent(dataset, net, loss, epochs=1000, l_rate=0.01, batch_size=1, verbose=None): """ - gradient descent algorithm to update the learnable parameters of a network. - :return: the updated network. + Gradient descent algorithm to update the learnable parameters of a network. + :return: the updated network """ examples = dataset.examples # init data @@ -233,13 +213,11 @@ def gradient_descent(dataset, net, loss, epochs=1000, l_rate=0.01, batch_size=1, return net -# 19.4.2 Other gradient-based optimization algorithms - - -def adam_optimizer(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 / 10 ** 8, l_rate=0.001, batch_size=1, - verbose=None): +def adam_optimizer(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 / 10 ** 8, + l_rate=0.001, batch_size=1, verbose=None): """ - Adam optimizer in Figure 19.6 to update the learnable parameters of a network. + [Figure 19.6] + Adam optimizer to update the learnable parameters of a network. Required parameters are similar to gradient descent. :return the updated network """ @@ -292,14 +270,11 @@ def adam_optimizer(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 / return net -# 19.4.3 Back-propagation - - def BackPropagation(inputs, targets, theta, net, loss): """ The back-propagation algorithm for multilayer networks in only one epoch, to calculate gradients of theta - :param inputs: A batch of inputs in an array. Each input is an iterable object. - :param targets: A batch of targets in an array. Each target is an iterable object. + :param inputs: a batch of inputs in an array. Each input is an iterable object. + :param targets: a batch of targets in an array. Each target is an iterable object. :param theta: parameters to be updated. :param net: a list of predefined layer objects representing their linear sequence. :param loss: a predefined loss function taking array of inputs and targets. @@ -321,19 +296,19 @@ def BackPropagation(inputs, targets, theta, net, loss): i_val = inputs[e] t_val = targets[e] - # Forward pass and compute batch loss + # forward pass and compute batch loss for i in range(1, n_layers): layer_out = net[i].forward(i_val) i_val = layer_out batch_loss += loss(t_val, layer_out) - # Initialize delta + # initialize delta delta = [[] for _ in range(n_layers)] previous = [layer_out[i] - t_val[i] for i in range(o_units)] h_layers = n_layers - 1 - - # Backward pass + + # backward pass for i in range(h_layers, 0, -1): layer = net[i] derivative = [layer.activation.derivative(node.val) for node in layer.nodes] @@ -349,11 +324,8 @@ def BackPropagation(inputs, targets, theta, net, loss): return total_gradients, batch_loss -# 19.4.5 Batch normalization - - class BatchNormalizationLayer(Layer): - """Example of a batch normalization layer.""" + """Batch normalization layer.""" def __init__(self, size, epsilon=0.001): super(BatchNormalizationLayer, self).__init__(size) @@ -378,19 +350,20 @@ def forward(self, inputs): def get_batch(examples, batch_size=1): - """split examples into multiple batches""" + """Split examples into multiple batches""" for i in range(0, len(examples), batch_size): yield examples[i: i + batch_size] -# example of NNs - - -def neural_net_learner(dataset, hidden_layer_sizes=[4], learning_rate=0.01, epochs=100, optimizer=gradient_descent, - batch_size=1, verbose=None): - """Example of a simple dense multilayer neural network. - :param hidden_layer_sizes: size of hidden layers in the form of a list""" +def NeuralNetLearner(dataset, hidden_layer_sizes=None, learning_rate=0.01, epochs=100, + optimizer=gradient_descent, batch_size=1, verbose=None): + """ + Simple dense multilayer neural network. + :param hidden_layer_sizes: size of hidden layers in the form of a list + """ + if hidden_layer_sizes is None: + hidden_layer_sizes = [4] input_size = len(dataset.inputs) output_size = len(dataset.values[dataset.target]) @@ -404,8 +377,8 @@ def neural_net_learner(dataset, hidden_layer_sizes=[4], learning_rate=0.01, epoc raw_net.append(DenseLayer(hidden_input_size, output_size)) # update parameters of the network - learned_net = optimizer(dataset, raw_net, mse_loss, epochs, l_rate=learning_rate, batch_size=batch_size, - verbose=verbose) + learned_net = optimizer(dataset, raw_net, mse_loss, epochs, l_rate=learning_rate, + batch_size=batch_size, verbose=verbose) def predict(example): n_layers = len(learned_net) @@ -423,9 +396,9 @@ def predict(example): return predict -def perceptron_learner(dataset, learning_rate=0.01, epochs=100, verbose=None): +def PerceptronLearner(dataset, learning_rate=0.01, epochs=100, verbose=None): """ - Example of a simple perceptron neural network. + Simple perceptron neural network. """ input_size = len(dataset.inputs) output_size = len(dataset.values[dataset.target]) @@ -443,17 +416,14 @@ def predict(example): return predict -# ____________________________________________________________________ -# 19.6 Recurrent neural networks - - -def simple_rnn_learner(train_data, val_data, epochs=2): +def SimpleRNNLearner(train_data, val_data, epochs=2): """ - rnn example for text sentimental analysis + RNN example for text sentimental analysis. :param train_data: a tuple of (training data, targets) Training data: ndarray taking training examples, while each example is coded by embedding - Targets: ndarry taking targets of each example. Each target is mapped to an integer. + Targets: ndarray taking targets of each example. Each target is mapped to an integer. :param val_data: a tuple of (validation data, targets) + :param epochs: number of epochs :return: a keras model """ @@ -479,7 +449,7 @@ def simple_rnn_learner(train_data, val_data, epochs=2): def keras_dataset_loader(dataset, max_length=500): """ - helper function to load keras datasets + Helper function to load keras datasets. :param dataset: keras data set type :param max_length: max length of each input sequence """ @@ -491,10 +461,14 @@ def keras_dataset_loader(dataset, max_length=500): return (X_train[10:], y_train[10:]), (X_val, y_val), (X_train[:10], y_train[:10]) -def auto_encoder_learner(inputs, encoding_size, epochs=200): - """simple example of linear auto encoder learning producing the input itself. +def AutoencoderLearner(inputs, encoding_size, epochs=200): + """ + Simple example of linear auto encoder learning producing the input itself. :param inputs: a batch of input data in np.ndarray type - :param encoding_size: int, the size of encoding layer""" + :param encoding_size: int, the size of encoding layer + :param epochs: number of epochs + :return: a keras model + """ # init data input_size = len(inputs[0]) diff --git a/knowledge.py b/knowledge.py index d237090ee..eaeacf7d9 100644 --- a/knowledge.py +++ b/knowledge.py @@ -1,4 +1,4 @@ -"""Knowledge in learning, Chapter 19""" +"""Knowledge in learning (Chapter 19)""" from random import shuffle from math import log @@ -13,10 +13,12 @@ # ______________________________________________________________________________ -def current_best_learning(examples, h, examples_so_far=[]): +def current_best_learning(examples, h, examples_so_far=None): """ [Figure 19.2] The hypothesis is a list of dictionaries, with each dictionary representing a disjunction.""" + if examples_so_far is None: + examples_so_far = [] if not examples: return h diff --git a/knowledge_FOIL.ipynb b/knowledge_FOIL.ipynb index 63e943416..4cefd7f69 100644 --- a/knowledge_FOIL.ipynb +++ b/knowledge_FOIL.ipynb @@ -18,8 +18,7 @@ "outputs": [], "source": [ "from knowledge import *\n", - "\n", - "from notebook import pseudocode, psource" + "from notebook import psource" ] }, { @@ -624,8 +623,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/learning.ipynb b/learning.ipynb index aecd5d2d3..0cadd4e7b 100644 --- a/learning.ipynb +++ b/learning.ipynb @@ -16,6 +16,7 @@ "outputs": [], "source": [ "from learning import *\n", + "from probabilistic_learning import *\n", "from notebook import *" ] }, @@ -2247,8 +2248,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/learning.py b/learning.py index 7fe536f96..31aabe30f 100644 --- a/learning.py +++ b/learning.py @@ -1,4 +1,4 @@ -"""Learn to estimate functions from examples. (Chapters 18, 20)""" +"""Learning from examples. (Chapters 18)""" import copy import heapq @@ -7,46 +7,46 @@ from collections import defaultdict from statistics import mean, stdev -from utils import ( - removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, - dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, - weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, - open_data, sigmoid_derivative, probability, norm, matrix_multiplication, relu, relu_derivative, - tanh, tanh_derivative, leaky_relu_derivative, elu, elu_derivative, - mean_boolean_error) +from probabilistic_learning import NaiveBayesLearner +from utils import (remove_all, unique, mode, argmax, argmax_random_tie, isclose, dotproduct, vector_add, + scalar_vector_product, weighted_sample_with_replacement, num_or_str, normalize, clip, sigmoid, + print_table, open_data, sigmoid_derivative, probability, relu, relu_derivative, tanh, + tanh_derivative, leaky_relu_derivative, elu, elu_derivative, mean_boolean_error, random_weights) class DataSet: - """A data set for a machine learning problem. It has the following fields: + """ + A data set for a machine learning problem. It has the following fields: d.examples A list of examples. Each one is a list of attribute values. d.attrs A list of integers to index into an example, so example[attr] gives a value. Normally the same as range(len(d.examples[0])). - d.attrnames Optional list of mnemonic names for corresponding attrs. + d.attr_names Optional list of mnemonic names for corresponding attrs. d.target The attribute that a learning algorithm will try to predict. By default the final attribute. d.inputs The list of attrs without the target. d.values A list of lists: each sublist is the set of possible values for the corresponding attribute. If initially None, - it is computed from the known examples by self.setproblem. + it is computed from the known examples by self.set_problem. If not None, an erroneous value raises ValueError. - d.distance A function from a pair of examples to a nonnegative number. + d.distance A function from a pair of examples to a non-negative number. Should be symmetric, etc. Defaults to mean_boolean_error since that can handle any field types. d.name Name of the data set (for output display only). d.source URL or other source where the data came from. d.exclude A list of attribute indexes to exclude from d.inputs. Elements - of this list can either be integers (attrs) or attrnames. + of this list can either be integers (attrs) or attr_names. Normally, you call the constructor and you're done; then you just - access fields like d.examples and d.target and d.inputs.""" + access fields like d.examples and d.target and d.inputs. + """ - def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, - inputs=None, values=None, distance=mean_boolean_error, - name='', source='', exclude=()): - """Accepts any of DataSet's fields. Examples can also be a + def __init__(self, examples=None, attrs=None, attr_names=None, target=-1, inputs=None, + values=None, distance=mean_boolean_error, name='', source='', exclude=()): + """ + Accepts any of DataSet's fields. Examples can also be a string or file from which to parse examples using parse_csv. - Optional parameter: exclude, as documented in .setproblem(). + Optional parameter: exclude, as documented in .set_problem(). >>> DataSet(examples='1, 2, 3') """ @@ -56,7 +56,7 @@ def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, self.distance = distance self.got_values_flag = bool(values) - # Initialize .examples from string or list or data directory + # initialize .examples from string or list or data directory if isinstance(examples, str): self.examples = parse_csv(examples) elif examples is None: @@ -64,39 +64,40 @@ def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, else: self.examples = examples - # Attrs are the indices of examples, unless otherwise stated. + # attrs are the indices of examples, unless otherwise stated. if self.examples is not None and attrs is None: attrs = list(range(len(self.examples[0]))) self.attrs = attrs - # Initialize .attrnames from string, list, or by default - if isinstance(attrnames, str): - self.attrnames = attrnames.split() + # initialize .attr_names from string, list, or by default + if isinstance(attr_names, str): + self.attr_names = attr_names.split() else: - self.attrnames = attrnames or attrs - self.setproblem(target, inputs=inputs, exclude=exclude) + self.attr_names = attr_names or attrs + self.set_problem(target, inputs=inputs, exclude=exclude) - def setproblem(self, target, inputs=None, exclude=()): - """Set (or change) the target and/or inputs. + def set_problem(self, target, inputs=None, exclude=()): + """ + Set (or change) the target and/or inputs. This way, one DataSet can be used multiple ways. inputs, if specified, is a list of attributes, or specify exclude as a list of attributes - to not use in inputs. Attributes can be -n .. n, or an attrname. - Also computes the list of possible values, if that wasn't done yet.""" - self.target = self.attrnum(target) - exclude = list(map(self.attrnum, exclude)) + to not use in inputs. Attributes can be -n .. n, or an attr_name. + Also computes the list of possible values, if that wasn't done yet. + """ + self.target = self.attr_num(target) + exclude = list(map(self.attr_num, exclude)) if inputs: - self.inputs = removeall(self.target, inputs) + self.inputs = remove_all(self.target, inputs) else: - self.inputs = [a for a in self.attrs - if a != self.target and a not in exclude] + self.inputs = [a for a in self.attrs if a != self.target and a not in exclude] if not self.values: self.update_values() self.check_me() def check_me(self): """Check that my fields make sense.""" - assert len(self.attrnames) == len(self.attrs) + assert len(self.attr_names) == len(self.attrs) assert self.target in self.attrs assert self.target not in self.inputs assert set(self.inputs).issubset(set(self.attrs)) @@ -115,12 +116,12 @@ def check_example(self, example): for a in self.attrs: if example[a] not in self.values[a]: raise ValueError('Bad value {} for attribute {} in {}' - .format(example[a], self.attrnames[a], example)) + .format(example[a], self.attr_names[a], example)) - def attrnum(self, attr): + def attr_num(self, attr): """Returns the number used for attr, which can be a name, or -n .. n-1.""" if isinstance(attr, str): - return self.attrnames.index(attr) + return self.attr_names.index(attr) elif attr < 0: return len(self.attrs) + attr else: @@ -131,13 +132,12 @@ def update_values(self): def sanitize(self, example): """Return a copy of example, with non-input attributes replaced by None.""" - return [attr_i if i in self.inputs else None - for i, attr_i in enumerate(example)] + return [attr_i if i in self.inputs else None for i, attr_i in enumerate(example)] def classes_to_numbers(self, classes=None): """Converts class names to numbers.""" if not classes: - # If classes were not given, extract them from values + # if classes were not given, extract them from values classes = sorted(self.values[self.target]) for item in self.examples: item[self.target] = classes.index(item[self.target]) @@ -153,17 +153,19 @@ def split_values_by_classes(self): target_names = self.values[self.target] for v in self.examples: - item = [a for a in v if a not in target_names] # Remove target from item - buckets[v[self.target]].append(item) # Add item to bucket of its class + item = [a for a in v if a not in target_names] # remove target from item + buckets[v[self.target]].append(item) # add item to bucket of its class return buckets def find_means_and_deviations(self): - """Finds the means and standard deviations of self.dataset. - means : A dictionary for each class/target. Holds a list of the means + """ + Finds the means and standard deviations of self.dataset. + means : a dictionary for each class/target. Holds a list of the means of the features for the class. - deviations: A dictionary for each class/target. Holds a list of the sample - standard deviations of the features for the class.""" + deviations: a dictionary for each class/target. Holds a list of the sample + standard deviations of the features for the class. + """ target_names = self.values[self.target] feature_numbers = len(self.inputs) @@ -173,13 +175,13 @@ def find_means_and_deviations(self): deviations = defaultdict(lambda: [0] * feature_numbers) for t in target_names: - # Find all the item feature values for item in class t - features = [[] for i in range(feature_numbers)] + # find all the item feature values for item in class t + features = [[] for _ in range(feature_numbers)] for item in item_buckets[t]: for i in range(feature_numbers): features[i].append(item[i]) - # Calculate means and deviations fo the class + # calculate means and deviations fo the class for i in range(feature_numbers): means[t][i] = mean(features[i]) deviations[t][i] = stdev(features[i]) @@ -187,285 +189,182 @@ def find_means_and_deviations(self): return means, deviations def __repr__(self): - return ''.format( - self.name, len(self.examples), len(self.attrs)) - - -# ______________________________________________________________________________ + return ''.format(self.name, len(self.examples), len(self.attrs)) def parse_csv(input, delim=','): - r"""Input is a string consisting of lines, each line has comma-delimited + r""" + Input is a string consisting of lines, each line has comma-delimited fields. Convert this into a list of lists. Blank lines are skipped. Fields that look like numbers are converted to numbers. The delim defaults to ',' but '\t' and None are also reasonable values. >>> parse_csv('1, 2, 3 \n 0, 2, na') - [[1, 2, 3], [0, 2, 'na']]""" + [[1, 2, 3], [0, 2, 'na']] + """ lines = [line for line in input.splitlines() if line.strip()] return [list(map(num_or_str, line.split(delim))) for line in lines] -# ______________________________________________________________________________ - - -class CountingProbDist: - """A probability distribution formed by observing and counting examples. - If p is an instance of this class and o is an observed value, then - there are 3 main operations: - p.add(o) increments the count for observation o by 1. - p.sample() returns a random element from the distribution. - p[o] returns the probability for o (as in a regular ProbDist).""" - - def __init__(self, observations=None, default=0): - """Create a distribution, and optionally add in some observations. - By default this is an unsmoothed distribution, but saying default=1, - for example, gives you add-one smoothing.""" - if observations is None: - observations = [] - self.dictionary = {} - self.n_obs = 0 - self.default = default - self.sampler = None - - for o in observations: - self.add(o) - - def add(self, o): - """Add an observation o to the distribution.""" - self.smooth_for(o) - self.dictionary[o] += 1 - self.n_obs += 1 - self.sampler = None - - def smooth_for(self, o): - """Include o among the possible observations, whether or not - it's been observed yet.""" - if o not in self.dictionary: - self.dictionary[o] = self.default - self.n_obs += self.default - self.sampler = None - - def __getitem__(self, item): - """Return an estimate of the probability of item.""" - self.smooth_for(item) - return self.dictionary[item] / self.n_obs - - # (top() and sample() are not used in this module, but elsewhere.) - - def top(self, n): - """Return (count, obs) tuples for the n most frequent observations.""" - return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()]) - - def sample(self): - """Return a random sample from the distribution.""" - if self.sampler is None: - self.sampler = weighted_sampler(list(self.dictionary.keys()), - list(self.dictionary.values())) - return self.sampler() - - -# ______________________________________________________________________________ - - -def PluralityLearner(dataset): - """A very dumb algorithm: always pick the result that was most popular - in the training data. Makes a baseline for comparison.""" - most_popular = mode([e[dataset.target] for e in dataset.examples]) - - def predict(example): - """Always return same result: the most popular from the training set.""" - return most_popular - - return predict +def err_ratio(predict, dataset, examples=None, verbose=0): + """ + Return the proportion of the examples that are NOT correctly predicted. + verbose - 0: No output; 1: Output wrong; 2 (or greater): Output correct + """ + examples = examples or dataset.examples + if len(examples) == 0: + return 0.0 + right = 0 + for example in examples: + desired = example[dataset.target] + output = predict(dataset.sanitize(example)) + if output == desired: + right += 1 + if verbose >= 2: + print(' OK: got {} for {}'.format(desired, example)) + elif verbose: + print('WRONG: got {}, expected {} for {}'.format(output, desired, example)) + return 1 - (right / len(examples)) -# ______________________________________________________________________________ +def grade_learner(predict, tests): + """ + Grades the given learner based on how many tests it passes. + tests is a list with each element in the form: (values, output). + """ + return mean(int(predict(X) == y) for X, y in tests) -def NaiveBayesLearner(dataset, continuous=True, simple=False): - if simple: - return NaiveBayesSimple(dataset) - if continuous: - return NaiveBayesContinuous(dataset) +def train_test_split(dataset, start=None, end=None, test_split=None): + """ + If you are giving 'start' and 'end' as parameters, + then it will return the testing set from index 'start' to 'end' + and the rest for training. + If you give 'test_split' as a parameter then it will return + test_split * 100% as the testing set and the rest as + training set. + """ + examples = dataset.examples + if test_split is None: + train = examples[:start] + examples[end:] + val = examples[start:end] else: - return NaiveBayesDiscrete(dataset) - - -def NaiveBayesSimple(distribution): - """A simple naive bayes classifier that takes as input a dictionary of - CountingProbDist objects and classifies items according to these distributions. - The input dictionary is in the following form: - (ClassName, ClassProb): CountingProbDist""" - target_dist = {c_name: prob for c_name, prob in distribution.keys()} - attr_dists = {c_name: count_prob for (c_name, _), count_prob in distribution.items()} - - def predict(example): - """Predict the target value for example. Calculate probabilities for each - class and pick the max.""" - - def class_probability(targetval): - attr_dist = attr_dists[targetval] - return target_dist[targetval] * product(attr_dist[a] for a in example) - - return argmax(target_dist.keys(), key=class_probability) - - return predict - - -def NaiveBayesDiscrete(dataset): - """Just count how many times each value of each input attribute - occurs, conditional on the target value. Count the different - target values too.""" - - target_vals = dataset.values[dataset.target] - target_dist = CountingProbDist(target_vals) - attr_dists = {(gv, attr): CountingProbDist(dataset.values[attr]) - for gv in target_vals - for attr in dataset.inputs} - for example in dataset.examples: - targetval = example[dataset.target] - target_dist.add(targetval) - for attr in dataset.inputs: - attr_dists[targetval, attr].add(example[attr]) - - def predict(example): - """Predict the target value for example. Consider each possible value, - and pick the most likely by looking at each attribute independently.""" - - def class_probability(targetval): - return (target_dist[targetval] * - product(attr_dists[targetval, attr][example[attr]] - for attr in dataset.inputs)) + total_size = len(examples) + val_size = int(total_size * test_split) + train_size = total_size - val_size + train = examples[:train_size] + val = examples[train_size:total_size] - return argmax(target_vals, key=class_probability) + return train, val - return predict +def cross_validation_wrapper(learner, dataset, k=10, trials=1): + """ + [Figure 18.8] + Return the optimal value of size having minimum error on validation set. + errT: a training error array, indexed by size + errV: a validation error array, indexed by size + """ + errs = [] + size = 1 + while True: + errT, errV = cross_validation(learner, dataset, size, k, trials) + # check for convergence provided err_val is not empty + if errT and not isclose(errT[-1], errT, rel_tol=1e-6): + best_size = 0 + min_val = math.inf + i = 0 + while i < size: + if errs[i] < min_val: + min_val = errs[i] + best_size = i + i += 1 + return learner(dataset, best_size) + errs.append(errV) + size += 1 -def NaiveBayesContinuous(dataset): - """Count how many times each target value occurs. - Also, find the means and deviations of input attribute values for each target value.""" - means, deviations = dataset.find_means_and_deviations() - target_vals = dataset.values[dataset.target] - target_dist = CountingProbDist(target_vals) +def cross_validation(learner, dataset, size=None, k=10, trials=1): + """ + Do k-fold cross_validate and return their mean. + That is, keep out 1/k of the examples for testing on each of k runs. + Shuffle the examples first; if trials>1, average over several shuffles. + Returns Training error, Validation error + """ + k = k or len(dataset.examples) + if trials > 1: + trial_errT = 0 + trial_errV = 0 + for t in range(trials): + errT, errV = cross_validation(learner, dataset, size, k, trials) + trial_errT += errT + trial_errV += errV + return trial_errT / trials, trial_errV / trials + else: + fold_errT = 0 + fold_errV = 0 + n = len(dataset.examples) + examples = dataset.examples + random.shuffle(dataset.examples) + for fold in range(k): + train_data, val_data = train_test_split(dataset, fold * (n // k), (fold + 1) * (n // k)) + dataset.examples = train_data + h = learner(dataset, size) + fold_errT += err_ratio(h, dataset, train_data) + fold_errV += err_ratio(h, dataset, val_data) + # reverting back to original once test is completed + dataset.examples = examples + return fold_errT / k, fold_errV / k - def predict(example): - """Predict the target value for example. Consider each possible value, - and pick the most likely by looking at each attribute independently.""" - def class_probability(targetval): - prob = target_dist[targetval] - for attr in dataset.inputs: - prob *= gaussian(means[targetval][attr], deviations[targetval][attr], example[attr]) - return prob +def leave_one_out(learner, dataset, size=None): + """Leave one out cross-validation over the dataset.""" + return cross_validation(learner, dataset, size, len(dataset.examples)) - return argmax(target_vals, key=class_probability) - return predict +# TODO learning_curve needs to be fixed +def learning_curve(learner, dataset, trials=10, sizes=None): + if sizes is None: + sizes = list(range(2, len(dataset.examples) - 10, 2)) + def score(learner, size): + random.shuffle(dataset.examples) + return train_test_split(learner, dataset, 0, size) -# ______________________________________________________________________________ + return [(size, mean([score(learner, size) for _ in range(trials)])) for size in sizes] -def NearestNeighborLearner(dataset, k=1): - """k-NearestNeighbor: the k nearest neighbors vote.""" +def PluralityLearner(dataset): + """ + A very dumb algorithm: always pick the result that was most popular + in the training data. Makes a baseline for comparison. + """ + most_popular = mode([e[dataset.target] for e in dataset.examples]) def predict(example): - """Find the k closest items, and have them vote for the best.""" - best = heapq.nsmallest(k, ((dataset.distance(e, example), e) - for e in dataset.examples)) - return mode(e[dataset.target] for (d, e) in best) + """Always return same result: the most popular from the training set.""" + return most_popular return predict -# ______________________________________________________________________________ - - -def truncated_svd(X, num_val=2, max_iter=1000): - """Compute the first component of SVD.""" - - def normalize_vec(X, n=2): - """Normalize two parts (:m and m:) of the vector.""" - X_m = X[:m] - X_n = X[m:] - norm_X_m = norm(X_m, n) - Y_m = [x / norm_X_m for x in X_m] - norm_X_n = norm(X_n, n) - Y_n = [x / norm_X_n for x in X_n] - return Y_m + Y_n - - def remove_component(X): - """Remove components of already obtained eigen vectors from X.""" - X_m = X[:m] - X_n = X[m:] - for eivec in eivec_m: - coeff = dotproduct(X_m, eivec) - X_m = [x1 - coeff * x2 for x1, x2 in zip(X_m, eivec)] - for eivec in eivec_n: - coeff = dotproduct(X_n, eivec) - X_n = [x1 - coeff * x2 for x1, x2 in zip(X_n, eivec)] - return X_m + X_n - - m, n = len(X), len(X[0]) - A = [[0] * (n + m) for _ in range(n + m)] - for i in range(m): - for j in range(n): - A[i][m + j] = A[m + j][i] = X[i][j] - - eivec_m = [] - eivec_n = [] - eivals = [] - - for _ in range(num_val): - X = [random.random() for _ in range(m + n)] - X = remove_component(X) - X = normalize_vec(X) - - for i in range(max_iter): - old_X = X - X = matrix_multiplication(A, [[x] for x in X]) - X = [x[0] for x in X] - X = remove_component(X) - X = normalize_vec(X) - # check for convergence - if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10: - break - - projected_X = matrix_multiplication(A, [[x] for x in X]) - projected_X = [x[0] for x in projected_X] - new_eigenvalue = norm(projected_X, 1) / norm(X, 1) - ev_m = X[:m] - ev_n = X[m:] - if new_eigenvalue < 0: - new_eigenvalue = -new_eigenvalue - ev_m = [-ev_m_i for ev_m_i in ev_m] - eivals.append(new_eigenvalue) - eivec_m.append(ev_m) - eivec_n.append(ev_n) - return (eivec_m, eivec_n, eivals) - - -# ______________________________________________________________________________ - - class DecisionFork: - """A fork of a decision tree holds an attribute to test, and a dict - of branches, one for each of the attribute's values.""" + """ + A fork of a decision tree holds an attribute to test, and a dict + of branches, one for each of the attribute's values. + """ - def __init__(self, attr, attrname=None, default_child=None, branches=None): + def __init__(self, attr, attr_name=None, default_child=None, branches=None): """Initialize by saying what attribute this node tests.""" self.attr = attr - self.attrname = attrname or attr + self.attr_name = attr_name or attr self.default_child = default_child self.branches = branches or {} def __call__(self, example): """Given an example, classify it using the attribute and the branches.""" - attrvalue = example[self.attr] - if attrvalue in self.branches: - return self.branches[attrvalue](example) + attr_val = example[self.attr] + if attr_val in self.branches: + return self.branches[attr_val](example) else: # return default class when attribute is unknown return self.default_child(example) @@ -475,15 +374,14 @@ def add(self, val, subtree): self.branches[val] = subtree def display(self, indent=0): - name = self.attrname + name = self.attr_name print('Test', name) for (val, subtree) in self.branches.items(): print(' ' * 4 * indent, name, '=', val, '==>', end=' ') subtree.display(indent + 1) - print() # newline def __repr__(self): - return ('DecisionFork({0!r}, {1!r}, {2!r})'.format(self.attr, self.attrname, self.branches)) + return 'DecisionFork({0!r}, {1!r}, {2!r})'.format(self.attr, self.attr_name, self.branches) class DecisionLeaf: @@ -495,16 +393,13 @@ def __init__(self, result): def __call__(self, example): return self.result - def display(self, indent=0): + def display(self): print('RESULT =', self.result) def __repr__(self): return repr(self.result) -# ______________________________________________________________________________ - - def DecisionTreeLearner(dataset): """[Figure 18.5]""" @@ -513,21 +408,22 @@ def DecisionTreeLearner(dataset): def decision_tree_learning(examples, attrs, parent_examples=()): if len(examples) == 0: return plurality_value(parent_examples) - elif all_same_class(examples): + if all_same_class(examples): return DecisionLeaf(examples[0][target]) - elif len(attrs) == 0: + if len(attrs) == 0: return plurality_value(examples) - else: - A = choose_attribute(attrs, examples) - tree = DecisionFork(A, dataset.attrnames[A], plurality_value(examples)) - for (v_k, exs) in split_by(A, examples): - subtree = decision_tree_learning(exs, removeall(A, attrs), examples) - tree.add(v_k, subtree) - return tree + A = choose_attribute(attrs, examples) + tree = DecisionFork(A, dataset.attr_names[A], plurality_value(examples)) + for (v_k, exs) in split_by(A, examples): + subtree = decision_tree_learning(exs, remove_all(A, attrs), examples) + tree.add(v_k, subtree) + return tree def plurality_value(examples): - """Return the most popular target value for this set of examples. - (If target is binary, this is the majority; otherwise plurality.)""" + """ + Return the most popular target value for this set of examples. + (If target is binary, this is the majority; otherwise plurality). + """ popular = argmax_random_tie(values[target], key=lambda v: count(target, v, examples)) return DecisionLeaf(popular) @@ -548,64 +444,30 @@ def information_gain(attr, examples): """Return the expected reduction in entropy from splitting by attr.""" def I(examples): - return information_content([count(target, v, examples) - for v in values[target]]) + return information_content([count(target, v, examples) for v in values[target]]) N = len(examples) - remainder = sum((len(examples_i) / N) * I(examples_i) - for (v, examples_i) in split_by(attr, examples)) + remainder = sum((len(examples_i) / N) * I(examples_i) for (v, examples_i) in split_by(attr, examples)) return I(examples) - remainder def split_by(attr, examples): """Return a list of (val, examples) pairs for each val of attr.""" - return [(v, [e for e in examples if e[attr] == v]) - for v in values[attr]] + return [(v, [e for e in examples if e[attr] == v]) for v in values[attr]] return decision_tree_learning(dataset.examples, dataset.inputs) def information_content(values): """Number of bits to represent the probability distribution in values.""" - probabilities = normalize(removeall(0, values)) + probabilities = normalize(remove_all(0, values)) return sum(-p * math.log2(p) for p in probabilities) -# ______________________________________________________________________________ - - -def RandomForest(dataset, n=5): - """An ensemble of Decision Trees trained using bagging and feature bagging.""" - - def data_bagging(dataset, m=0): - """Sample m examples with replacement""" - n = len(dataset.examples) - return weighted_sample_with_replacement(m or n, dataset.examples, [1] * n) - - def feature_bagging(dataset, p=0.7): - """Feature bagging with probability p to retain an attribute""" - inputs = [i for i in dataset.inputs if probability(p)] - return inputs or dataset.inputs - - def predict(example): - print([predictor(example) for predictor in predictors]) - return mode(predictor(example) for predictor in predictors) - - predictors = [DecisionTreeLearner(DataSet(examples=data_bagging(dataset), - attrs=dataset.attrs, - attrnames=dataset.attrnames, - target=dataset.target, - inputs=feature_bagging(dataset))) for _ in range(n)] - - return predict - - -# ______________________________________________________________________________ - -# A decision list is implemented as a list of (test, value) pairs. - - def DecisionListLearner(dataset): - """[Figure 18.11]""" + """ + [Figure 18.11] + A decision list implemented as a list of (test, value) pairs. + """ def decision_list_learning(examples): if not examples: @@ -616,8 +478,10 @@ def decision_list_learning(examples): return [(t, o)] + decision_list_learning(examples - examples_t) def find_examples(examples): - """Find a set of examples that all have the same outcome under - some test. Return a tuple of the test, outcome, and examples.""" + """ + Find a set of examples that all have the same outcome under + some test. Return a tuple of the test, outcome, and examples. + """ raise NotImplementedError def passes(example, test): @@ -635,16 +499,112 @@ def predict(example): return predict -# ______________________________________________________________________________ +def NearestNeighborLearner(dataset, k=1): + """k-NearestNeighbor: the k nearest neighbors vote.""" + + def predict(example): + """Find the k closest items, and have them vote for the best.""" + best = heapq.nsmallest(k, ((dataset.distance(e, example), e) for e in dataset.examples)) + return mode(e[dataset.target] for (d, e) in best) + + return predict + + +def LinearLearner(dataset, learning_rate=0.01, epochs=100): + """ + [Section 18.6.3] + Linear classifier with hard threshold. + """ + idx_i = dataset.inputs + idx_t = dataset.target + examples = dataset.examples + num_examples = len(examples) + + # X transpose + X_col = [dataset.values[i] for i in idx_i] # vertical columns of X + + # add dummy + ones = [1 for _ in range(len(examples))] + X_col = [ones] + X_col + + # initialize random weights + num_weights = len(idx_i) + 1 + w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) + + for epoch in range(epochs): + err = [] + # pass over all examples + for example in examples: + x = [1] + example + y = dotproduct(w, x) + t = example[idx_t] + err.append(t - y) + + # update weights + for i in range(len(w)): + w[i] = w[i] + learning_rate * (dotproduct(err, X_col[i]) / num_examples) + def predict(example): + x = [1] + example + return dotproduct(w, x) -def NeuralNetLearner(dataset, hidden_layer_sizes=[3], learning_rate=0.01, epochs=100, activation=sigmoid): - """Layered feed-forward network. + return predict + + +def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100): + """ + [Section 18.6.4] + Linear classifier with logistic regression. + """ + idx_i = dataset.inputs + idx_t = dataset.target + examples = dataset.examples + num_examples = len(examples) + + # X transpose + X_col = [dataset.values[i] for i in idx_i] # vertical columns of X + + # add dummy + ones = [1 for _ in range(len(examples))] + X_col = [ones] + X_col + + # initialize random weights + num_weights = len(idx_i) + 1 + w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) + + for epoch in range(epochs): + err = [] + h = [] + # pass over all examples + for example in examples: + x = [1] + example + y = sigmoid(dotproduct(w, x)) + h.append(sigmoid_derivative(y)) + t = example[idx_t] + err.append(t - y) + + # update weights + for i in range(len(w)): + buffer = [x * y for x, y in zip(err, h)] + w[i] = w[i] + learning_rate * (dotproduct(buffer, X_col[i]) / num_examples) + + def predict(example): + x = [1] + example + return sigmoid(dotproduct(w, x)) + + return predict + + +def NeuralNetLearner(dataset, hidden_layer_sizes=None, learning_rate=0.01, epochs=100, activation=sigmoid): + """ + Layered feed-forward network. hidden_layer_sizes: List of number of hidden units per hidden layer learning_rate: Learning rate of gradient descent epochs: Number of passes over the dataset """ + if hidden_layer_sizes is None: + hidden_layer_sizes = [3] i_units = len(dataset.inputs) o_units = len(dataset.values[dataset.target]) @@ -653,21 +613,21 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=[3], learning_rate=0.01, epochs learned_net = BackPropagationLearner(dataset, raw_net, learning_rate, epochs, activation) def predict(example): - # Input nodes + # input nodes i_nodes = learned_net[0] - # Activate input layer + # activate input layer for v, n in zip(example, i_nodes): n.value = v - # Forward pass + # forward pass for layer in learned_net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dotproduct(inc, node.weights) node.value = node.activation(in_val) - # Hypothesis + # hypothesis o_nodes = learned_net[-1] prediction = find_max_node(o_nodes) return prediction @@ -675,24 +635,20 @@ def predict(example): return predict -def random_weights(min_value, max_value, num_weights): - return [random.uniform(min_value, max_value) for _ in range(num_weights)] - - def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid): - """[Figure 18.23] The back-propagation algorithm for multilayer networks""" - # Initialise weights + """ + [Figure 18.23] + The back-propagation algorithm for multilayer networks. + """ + # initialise weights for layer in net: for node in layer: - node.weights = random_weights(min_value=-0.5, max_value=0.5, - num_weights=len(node.weights)) + node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights)) examples = dataset.examples - ''' - As of now dataset.target gives an int instead of list, - Changing dataset class will have effect on all the learners. - Will be taken care of later. - ''' + # As of now dataset.target gives an int instead of list, + # Changing dataset class will have effect on all the learners. + # Will be taken care of later. o_nodes = net[-1] i_nodes = net[0] o_units = len(o_nodes) @@ -703,31 +659,31 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmo inputs, targets = init_examples(examples, idx_i, idx_t, o_units) for epoch in range(epochs): - # Iterate over each example + # iterate over each example for e in range(len(examples)): i_val = inputs[e] t_val = targets[e] - # Activate input layer + # activate input layer for v, n in zip(i_val, i_nodes): n.value = v - # Forward pass + # forward pass for layer in net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dotproduct(inc, node.weights) node.value = node.activation(in_val) - # Initialize delta + # initialize delta delta = [[] for _ in range(n_layers)] - # Compute outer layer delta + # compute outer layer delta - # Error for the MSE cost function + # error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] - # Calculate delta at output + # calculate delta at output if node.activation == sigmoid: delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == relu: @@ -739,7 +695,7 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmo else: delta[-1] = [leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] - # Backward pass + # backward pass h_layers = n_layers - 2 for i in range(h_layers, 0, -1): layer = net[i] @@ -765,7 +721,7 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmo delta[i] = [leaky_relu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units)] - # Update weights + # update weights for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i - 1]] @@ -788,19 +744,20 @@ def PerceptronLearner(dataset, learning_rate=0.01, epochs=100): def predict(example): o_nodes = learned_net[1] - # Forward pass + # forward pass for node in o_nodes: in_val = dotproduct(example, node.weights) node.value = node.activation(in_val) - # Hypothesis + # hypothesis return find_max_node(o_nodes) return predict class NNUnit: - """Single Unit of Multiple Layer Neural Network + """ + Single Unit of Multiple Layer Neural Network inputs: Incoming connections weights: Weights to incoming connections """ @@ -813,17 +770,18 @@ def __init__(self, activation=sigmoid, weights=None, inputs=None): def network(input_units, hidden_layer_sizes, output_units, activation=sigmoid): - """Create Directed Acyclic Network of given number layers. + """ + Create Directed Acyclic Network of given number layers. hidden_layers_sizes : List number of neuron units in each hidden layer excluding input and output layers """ layers_sizes = [input_units] + hidden_layer_sizes + [output_units] - net = [[NNUnit(activation) for n in range(size)] + net = [[NNUnit(activation) for _ in range(size)] for size in layers_sizes] n_layers = len(net) - # Make Connection + # make connection for i in range(1, n_layers): for n in net[i]: for k in net[i - 1]: @@ -836,16 +794,16 @@ def init_examples(examples, idx_i, idx_t, o_units): inputs, targets = {}, {} for i, e in enumerate(examples): - # Input values of e + # input values of e inputs[i] = [e[i] for i in idx_i] if o_units > 1: - # One-Hot representation of e's target + # one-hot representation of e's target t = [0 for i in range(o_units)] t[e[idx_t]] = 1 targets[i] = t else: - # Target value of e + # target value of e targets[i] = [e[idx_t]] return inputs, targets @@ -855,50 +813,6 @@ def find_max_node(nodes): return nodes.index(argmax(nodes, key=lambda node: node.value)) -# ______________________________________________________________________________ - - -def LinearLearner(dataset, learning_rate=0.01, epochs=100): - """Define with learner = LinearLearner(data); infer with learner(x).""" - idx_i = dataset.inputs - idx_t = dataset.target # As of now, dataset.target gives only one index. - examples = dataset.examples - num_examples = len(examples) - - # X transpose - X_col = [dataset.values[i] for i in idx_i] # vertical columns of X - - # Add dummy - ones = [1 for _ in range(len(examples))] - X_col = [ones] + X_col - - # Initialize random weights - num_weights = len(idx_i) + 1 - w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) - - for epoch in range(epochs): - err = [] - # Pass over all examples - for example in examples: - x = [1] + example - y = dotproduct(w, x) - t = example[idx_t] - err.append(t - y) - - # update weights - for i in range(len(w)): - w[i] = w[i] + learning_rate * (dotproduct(err, X_col[i]) / num_examples) - - def predict(example): - x = [1] + example - return dotproduct(w, x) - - return predict - - -# ______________________________________________________________________________ - - def EnsembleLearner(learners): """Given a list of learning algorithms, have them vote.""" @@ -913,48 +827,40 @@ def predict(example): return train -# ______________________________________________________________________________ - - -def AdaBoost(L, K): +def ada_boost(dataset, L, K): """[Figure 18.34]""" - def train(dataset): - examples, target = dataset.examples, dataset.target - N = len(examples) - epsilon = 1 / (2 * N) - w = [1 / N] * N - h, z = [], [] - for k in range(K): - h_k = L(dataset, w) - h.append(h_k) - error = sum(weight for example, weight in zip(examples, w) - if example[target] != h_k(example)) - - # Avoid divide-by-0 from either 0% or 100% error rates: - error = clip(error, epsilon, 1 - epsilon) - for j, example in enumerate(examples): - if example[target] == h_k(example): - w[j] *= error / (1 - error) - w = normalize(w) - z.append(math.log((1 - error) / error)) - return WeightedMajority(h, z) - - return train - - -def WeightedMajority(predictors, weights): + examples, target = dataset.examples, dataset.target + N = len(examples) + epsilon = 1 / (2 * N) + w = [1 / N] * N + h, z = [], [] + for k in range(K): + h_k = L(dataset, w) + h.append(h_k) + error = sum(weight for example, weight in zip(examples, w) if example[target] != h_k(example)) + # avoid divide-by-0 from either 0% or 100% error rates + error = clip(error, epsilon, 1 - epsilon) + for j, example in enumerate(examples): + if example[target] == h_k(example): + w[j] *= error / (1 - error) + w = normalize(w) + z.append(math.log((1 - error) / error)) + return weighted_majority(h, z) + + +def weighted_majority(predictors, weights): """Return a predictor that takes a weighted vote.""" def predict(example): - return weighted_mode((predictor(example) for predictor in predictors), - weights) + return weighted_mode((predictor(example) for predictor in predictors), weights) return predict def weighted_mode(values, weights): - """Return the value with the greatest total weight. + """ + Return the value with the greatest total weight. >>> weighted_mode('abbaa', [1, 2, 3, 1, 2]) 'b' """ @@ -964,13 +870,36 @@ def weighted_mode(values, weights): return max(totals, key=totals.__getitem__) -# _____________________________________________________________________________ -# Adapting an unweighted learner for AdaBoost +def RandomForest(dataset, n=5): + """An ensemble of Decision Trees trained using bagging and feature bagging.""" + + def data_bagging(dataset, m=0): + """Sample m examples with replacement""" + n = len(dataset.examples) + return weighted_sample_with_replacement(m or n, dataset.examples, [1] * n) + + def feature_bagging(dataset, p=0.7): + """Feature bagging with probability p to retain an attribute""" + inputs = [i for i in dataset.inputs if probability(p)] + return inputs or dataset.inputs + + def predict(example): + print([predictor(example) for predictor in predictors]) + return mode(predictor(example) for predictor in predictors) + + predictors = [DecisionTreeLearner(DataSet(examples=data_bagging(dataset), attrs=dataset.attrs, + attr_names=dataset.attr_names, target=dataset.target, + inputs=feature_bagging(dataset))) for _ in range(n)] + + return predict def WeightedLearner(unweighted_learner): - """Given a learner that takes just an unweighted dataset, return - one that takes also a weight for each example. [p. 749 footnote 14]""" + """ + [Page 749 footnote 14] + Given a learner that takes just an unweighted dataset, return + one that takes also a weight for each example. + """ def train(dataset, weights): return unweighted_learner(replicated_dataset(dataset, weights)) @@ -987,7 +916,8 @@ def replicated_dataset(dataset, weights, n=None): def weighted_replicate(seq, weights, n): - """Return n selections from seq, with the count of each element of + """ + Return n selections from seq, with the count of each element of seq proportional to the corresponding weight (filling in fractions randomly). >>> weighted_replicate('ABC', [1, 2, 1], 4) @@ -1001,180 +931,39 @@ def weighted_replicate(seq, weights, n): weighted_sample_with_replacement(n - sum(wholes), seq, fractions)) -def flatten(seqs): return sum(seqs, []) - - -# _____________________________________________________________________________ -# Functions for testing learners on examples +def flatten(seqs): + return sum(seqs, []) -def err_ratio(predict, dataset, examples=None, verbose=0): - """Return the proportion of the examples that are NOT correctly predicted. - verbose - 0: No output; 1: Output wrong; 2 (or greater): Output correct""" - examples = examples or dataset.examples - if len(examples) == 0: - return 0.0 - right = 0 - for example in examples: - desired = example[dataset.target] - output = predict(dataset.sanitize(example)) - if output == desired: - right += 1 - if verbose >= 2: - print(' OK: got {} for {}'.format(desired, example)) - elif verbose: - print('WRONG: got {}, expected {} for {}'.format( - output, desired, example)) - return 1 - (right / len(examples)) - - -def grade_learner(predict, tests): - """Grades the given learner based on how many tests it passes. - tests is a list with each element in the form: (values, output).""" - return mean(int(predict(X) == y) for X, y in tests) - - -def train_test_split(dataset, start=None, end=None, test_split=None): - """If you are giving 'start' and 'end' as parameters, - then it will return the testing set from index 'start' to 'end' - and the rest for training. - If you give 'test_split' as a parameter then it will return - test_split * 100% as the testing set and the rest as - training set. - """ - examples = dataset.examples - if test_split == None: - train = examples[:start] + examples[end:] - val = examples[start:end] - else: - total_size = len(examples) - val_size = int(total_size * test_split) - train_size = total_size - val_size - train = examples[:train_size] - val = examples[train_size:total_size] - - return train, val - - -def cross_validation(learner, size, dataset, k=10, trials=1): - """Do k-fold cross_validate and return their mean. - That is, keep out 1/k of the examples for testing on each of k runs. - Shuffle the examples first; if trials>1, average over several shuffles. - Returns Training error, Validation error""" - k = k or len(dataset.examples) - if trials > 1: - trial_errT = 0 - trial_errV = 0 - for t in range(trials): - errT, errV = cross_validation(learner, size, dataset, k=10, trials=1) - trial_errT += errT - trial_errV += errV - return trial_errT / trials, trial_errV / trials - else: - fold_errT = 0 - fold_errV = 0 - n = len(dataset.examples) - examples = dataset.examples - random.shuffle(dataset.examples) - for fold in range(k): - train_data, val_data = train_test_split(dataset, fold * (n / k), (fold + 1) * (n / k)) - dataset.examples = train_data - h = learner(dataset, size) - fold_errT += err_ratio(h, dataset, train_data) - fold_errV += err_ratio(h, dataset, val_data) - - # Reverting back to original once test is completed - dataset.examples = examples - return fold_errT / k, fold_errV / k - - -# TODO: The function cross_validation_wrapper needs to be fixed (the while loop runs forever!) -def cross_validation_wrapper(learner, dataset, k=10, trials=1): - """[Fig 18.8] - Return the optimal value of size having minimum error - on validation set. - err_train: A training error array, indexed by size - err_val: A validation error array, indexed by size - """ - err_val = [] - err_train = [] - size = 1 - - while True: - errT, errV = cross_validation(learner, size, dataset, k) - # Check for convergence provided err_val is not empty - if err_train and isclose(err_train[-1], errT, rel_tol=1e-6): - best_size = 0 - min_val = math.inf - - i = 0 - while i < size: - if err_val[i] < min_val: - min_val = err_val[i] - best_size = i - i += 1 - err_val.append(errV) - err_train.append(errT) - print(err_val) - size += 1 - - -def leave_one_out(learner, dataset, size=None): - """Leave one out cross-validation over the dataset.""" - return cross_validation(learner, size, dataset, k=len(dataset.examples)) - - -# TODO learning_curve needs to be fixed -def learning_curve(learner, dataset, trials=10, sizes=None): - if sizes is None: - sizes = list(range(2, len(dataset.examples) - 10, 2)) - - def score(learner, size): - random.shuffle(dataset.examples) - return train_test_split(learner, dataset, 0, size) - - return [(size, mean([score(learner, size) for t in range(trials)])) - for size in sizes] - - -# ______________________________________________________________________________ -# The rest of this file gives datasets for machine learning problems. - - -orings = DataSet(name='orings', target='Distressed', - attrnames="Rings Distressed Temp Pressure Flightnum") +orings = DataSet(name='orings', target='Distressed', attr_names='Rings Distressed Temp Pressure Flightnum') zoo = DataSet(name='zoo', target='type', exclude=['name'], - attrnames="name hair feathers eggs milk airborne aquatic " + - "predator toothed backbone breathes venomous fins legs tail " + - "domestic catsize type") + attr_names='name hair feathers eggs milk airborne aquatic predator toothed backbone ' + 'breathes venomous fins legs tail domestic catsize type') -iris = DataSet(name="iris", target="class", - attrnames="sepal-len sepal-width petal-len petal-width class") - - -# ______________________________________________________________________________ -# The Restaurant example from [Figure 18.2] +iris = DataSet(name='iris', target='class', attr_names='sepal-len sepal-width petal-len petal-width class') def RestaurantDataSet(examples=None): - """Build a DataSet of Restaurant waiting examples. [Figure 18.3]""" + """ + [Figure 18.3] + Build a DataSet of Restaurant waiting examples. + """ return DataSet(name='restaurant', target='Wait', examples=examples, - attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' + - 'Raining Reservation Type WaitEstimate Wait') + attr_names='Alternate Bar Fri/Sat Hungry Patrons Price Raining Reservation Type WaitEstimate Wait') restaurant = RestaurantDataSet() -def T(attrname, branches): - branches = {value: (child if isinstance(child, DecisionFork) - else DecisionLeaf(child)) +def T(attr_name, branches): + branches = {value: (child if isinstance(child, DecisionFork) else DecisionLeaf(child)) for value, child in branches.items()} - return DecisionFork(restaurant.attrnum(attrname), attrname, print, branches) + return DecisionFork(restaurant.attr_num(attr_name), attr_name, print, branches) -""" [Figure 18.2] +""" +[Figure 18.2] A decision tree for deciding whether to wait for a table at a hotel. """ @@ -1187,8 +976,7 @@ def T(attrname, branches): {'Yes': 'Yes', 'No': T('Bar', {'No': 'No', 'Yes': 'Yes'})}), - 'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})} - ), + 'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})}), '10-30': T('Hungry', {'No': 'Yes', 'Yes': T('Alternate', @@ -1206,30 +994,30 @@ def gen(): example[restaurant.target] = waiting_decision_tree(example) return example - return RestaurantDataSet([gen() for i in range(n)]) - - -# ______________________________________________________________________________ -# Artificial, generated datasets. + return RestaurantDataSet([gen() for _ in range(n)]) def Majority(k, n): - """Return a DataSet with n k-bit examples of the majority problem: - k random bits followed by a 1 if more than half the bits are 1, else 0.""" + """ + Return a DataSet with n k-bit examples of the majority problem: + k random bits followed by a 1 if more than half the bits are 1, else 0. + """ examples = [] for i in range(n): - bits = [random.choice([0, 1]) for i in range(k)] + bits = [random.choice([0, 1]) for _ in range(k)] bits.append(int(sum(bits) > k / 2)) examples.append(bits) - return DataSet(name="majority", examples=examples) + return DataSet(name='majority', examples=examples) -def Parity(k, n, name="parity"): - """Return a DataSet with n k-bit examples of the parity problem: - k random bits followed by a 1 if an odd number of bits are 1, else 0.""" +def Parity(k, n, name='parity'): + """ + Return a DataSet with n k-bit examples of the parity problem: + k random bits followed by a 1 if an odd number of bits are 1, else 0. + """ examples = [] for i in range(n): - bits = [random.choice([0, 1]) for i in range(k)] + bits = [random.choice([0, 1]) for _ in range(k)] bits.append(sum(bits) % 2) examples.append(bits) return DataSet(name=name, examples=examples) @@ -1237,31 +1025,29 @@ def Parity(k, n, name="parity"): def Xor(n): """Return a DataSet with n examples of 2-input xor.""" - return Parity(2, n, name="xor") + return Parity(2, n, name='xor') def ContinuousXor(n): """2 inputs are chosen uniformly from (0.0 .. 2.0]; output is xor of ints.""" examples = [] for i in range(n): - x, y = [random.uniform(0.0, 2.0) for i in '12'] - examples.append([x, y, int(x) != int(y)]) - return DataSet(name="continuous xor", examples=examples) + x, y = [random.uniform(0.0, 2.0) for _ in '12'] + examples.append([x, y, x != y]) + return DataSet(name='continuous xor', examples=examples) -# ______________________________________________________________________________ +def compare(algorithms=None, datasets=None, k=10, trials=1): + """ + Compare various learners on various datasets using cross-validation. + Print results as a table. + """ + # default list of algorithms + algorithms = algorithms or [PluralityLearner, NaiveBayesLearner, NearestNeighborLearner, DecisionTreeLearner] + # default list of datasets + datasets = datasets or [iris, orings, zoo, restaurant, SyntheticRestaurant(20), + Majority(7, 100), Parity(7, 100), Xor(100)] -def compare(algorithms=None, datasets=None, k=10, trials=1): - """Compare various learners on various datasets using cross-validation. - Print results as a table.""" - algorithms = algorithms or [PluralityLearner, NaiveBayesLearner, # default list - NearestNeighborLearner, DecisionTreeLearner] # of algorithms - - datasets = datasets or [iris, orings, zoo, restaurant, SyntheticRestaurant(20), # default list - Majority(7, 100), Parity(7, 100), Xor(100)] # of datasets - - print_table([[a.__name__.replace('Learner', '')] + - [cross_validation(a, d, k, trials) for d in datasets] - for a in algorithms], - header=[''] + [d.name[0:7] for d in datasets], numfmt='%.2f') + print_table([[a.__name__.replace('Learner', '')] + [cross_validation(a, d, k=k, trials=trials) for d in datasets] + for a in algorithms], header=[''] + [d.name[0:7] for d in datasets], numfmt='%.2f') diff --git a/learning4e.py b/learning4e.py index c8bdd44f2..5cf63dda4 100644 --- a/learning4e.py +++ b/learning4e.py @@ -1,3 +1,5 @@ +"""Learning from examples. (Chapters 18)""" + import copy import heapq import math @@ -5,49 +7,46 @@ from collections import defaultdict from statistics import mean, stdev -from utils4e import ( - removeall, unique, mode, argmax_random_tie, isclose, dotproduct, weighted_sample_with_replacement, - num_or_str, normalize, clip, print_table, open_data, probability, random_weights, - mean_boolean_error) - - -# Learn to estimate functions from examples. (Chapters 18) -# ______________________________________________________________________________ -# 18.2 Supervised learning. -# define supervised learning dataset and utility functions/ +from probabilistic_learning import NaiveBayesLearner +from utils import sigmoid, sigmoid_derivative +from utils4e import (remove_all, unique, mode, argmax_random_tie, isclose, dotproduct, weighted_sample_with_replacement, + num_or_str, normalize, clip, print_table, open_data, probability, random_weights, + mean_boolean_error) class DataSet: - """A data set for a machine learning problem. It has the following fields: + """ + A data set for a machine learning problem. It has the following fields: d.examples A list of examples. Each one is a list of attribute values. d.attrs A list of integers to index into an example, so example[attr] gives a value. Normally the same as range(len(d.examples[0])). - d.attrnames Optional list of mnemonic names for corresponding attrs. + d.attr_names Optional list of mnemonic names for corresponding attrs. d.target The attribute that a learning algorithm will try to predict. By default the final attribute. d.inputs The list of attrs without the target. d.values A list of lists: each sublist is the set of possible values for the corresponding attribute. If initially None, - it is computed from the known examples by self.setproblem. + it is computed from the known examples by self.set_problem. If not None, an erroneous value raises ValueError. - d.distance A function from a pair of examples to a nonnegative number. + d.distance A function from a pair of examples to a non-negative number. Should be symmetric, etc. Defaults to mean_boolean_error since that can handle any field types. d.name Name of the data set (for output display only). d.source URL or other source where the data came from. d.exclude A list of attribute indexes to exclude from d.inputs. Elements - of this list can either be integers (attrs) or attrnames. + of this list can either be integers (attrs) or attr_names. Normally, you call the constructor and you're done; then you just - access fields like d.examples and d.target and d.inputs.""" + access fields like d.examples and d.target and d.inputs. + """ - def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, - inputs=None, values=None, distance=mean_boolean_error, - name='', source='', exclude=()): - """Accepts any of DataSet's fields. Examples can also be a + def __init__(self, examples=None, attrs=None, attr_names=None, target=-1, inputs=None, + values=None, distance=mean_boolean_error, name='', source='', exclude=()): + """ + Accepts any of DataSet's fields. Examples can also be a string or file from which to parse examples using parse_csv. - Optional parameter: exclude, as documented in .setproblem(). + Optional parameter: exclude, as documented in .set_problem(). >>> DataSet(examples='1, 2, 3') """ @@ -57,7 +56,7 @@ def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, self.distance = distance self.got_values_flag = bool(values) - # Initialize .examples from string or list or data directory + # initialize .examples from string or list or data directory if isinstance(examples, str): self.examples = parse_csv(examples) elif examples is None: @@ -65,39 +64,40 @@ def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, else: self.examples = examples - # Attrs are the indices of examples, unless otherwise stated. + # attrs are the indices of examples, unless otherwise stated. if self.examples is not None and attrs is None: attrs = list(range(len(self.examples[0]))) self.attrs = attrs - # Initialize .attrnames from string, list, or by default - if isinstance(attrnames, str): - self.attrnames = attrnames.split() + # initialize .attr_names from string, list, or by default + if isinstance(attr_names, str): + self.attr_names = attr_names.split() else: - self.attrnames = attrnames or attrs - self.setproblem(target, inputs=inputs, exclude=exclude) + self.attr_names = attr_names or attrs + self.set_problem(target, inputs=inputs, exclude=exclude) - def setproblem(self, target, inputs=None, exclude=()): - """Set (or change) the target and/or inputs. + def set_problem(self, target, inputs=None, exclude=()): + """ + Set (or change) the target and/or inputs. This way, one DataSet can be used multiple ways. inputs, if specified, is a list of attributes, or specify exclude as a list of attributes - to not use in inputs. Attributes can be -n .. n, or an attrname. - Also computes the list of possible values, if that wasn't done yet.""" - self.target = self.attrnum(target) - exclude = list(map(self.attrnum, exclude)) + to not use in inputs. Attributes can be -n .. n, or an attr_name. + Also computes the list of possible values, if that wasn't done yet. + """ + self.target = self.attr_num(target) + exclude = list(map(self.attr_num, exclude)) if inputs: - self.inputs = removeall(self.target, inputs) + self.inputs = remove_all(self.target, inputs) else: - self.inputs = [a for a in self.attrs - if a != self.target and a not in exclude] + self.inputs = [a for a in self.attrs if a != self.target and a not in exclude] if not self.values: self.update_values() self.check_me() def check_me(self): """Check that my fields make sense.""" - assert len(self.attrnames) == len(self.attrs) + assert len(self.attr_names) == len(self.attrs) assert self.target in self.attrs assert self.target not in self.inputs assert set(self.inputs).issubset(set(self.attrs)) @@ -116,12 +116,12 @@ def check_example(self, example): for a in self.attrs: if example[a] not in self.values[a]: raise ValueError('Bad value {} for attribute {} in {}' - .format(example[a], self.attrnames[a], example)) + .format(example[a], self.attr_names[a], example)) - def attrnum(self, attr): + def attr_num(self, attr): """Returns the number used for attr, which can be a name, or -n .. n-1.""" if isinstance(attr, str): - return self.attrnames.index(attr) + return self.attr_names.index(attr) elif attr < 0: return len(self.attrs) + attr else: @@ -132,13 +132,12 @@ def update_values(self): def sanitize(self, example): """Return a copy of example, with non-input attributes replaced by None.""" - return [attr_i if i in self.inputs else None - for i, attr_i in enumerate(example)] + return [attr_i if i in self.inputs else None for i, attr_i in enumerate(example)] def classes_to_numbers(self, classes=None): """Converts class names to numbers.""" if not classes: - # If classes were not given, extract them from values + # if classes were not given, extract them from values classes = sorted(self.values[self.target]) for item in self.examples: item[self.target] = classes.index(item[self.target]) @@ -154,17 +153,19 @@ def split_values_by_classes(self): target_names = self.values[self.target] for v in self.examples: - item = [a for a in v if a not in target_names] # Remove target from item - buckets[v[self.target]].append(item) # Add item to bucket of its class + item = [a for a in v if a not in target_names] # remove target from item + buckets[v[self.target]].append(item) # add item to bucket of its class return buckets def find_means_and_deviations(self): - """Finds the means and standard deviations of self.dataset. - means : A dictionary for each class/target. Holds a list of the means + """ + Finds the means and standard deviations of self.dataset. + means : a dictionary for each class/target. Holds a list of the means of the features for the class. - deviations: A dictionary for each class/target. Holds a list of the sample - standard deviations of the features for the class.""" + deviations: a dictionary for each class/target. Holds a list of the sample + standard deviations of the features for the class. + """ target_names = self.values[self.target] feature_numbers = len(self.inputs) @@ -174,13 +175,13 @@ def find_means_and_deviations(self): deviations = defaultdict(lambda: [0] * feature_numbers) for t in target_names: - # Find all the item feature values for item in class t - features = [[] for i in range(feature_numbers)] + # find all the item feature values for item in class t + features = [[] for _ in range(feature_numbers)] for item in item_buckets[t]: for i in range(feature_numbers): features[i].append(item[i]) - # Calculate means and deviations fo the class + # calculate means and deviations fo the class for i in range(feature_numbers): means[t][i] = mean(features[i]) deviations[t][i] = stdev(features[i]) @@ -188,44 +189,177 @@ def find_means_and_deviations(self): return means, deviations def __repr__(self): - return ''.format( - self.name, len(self.examples), len(self.attrs)) - - -# ______________________________________________________________________________ + return ''.format(self.name, len(self.examples), len(self.attrs)) def parse_csv(input, delim=','): - r"""Input is a string consisting of lines, each line has comma-delimited + r""" + Input is a string consisting of lines, each line has comma-delimited fields. Convert this into a list of lists. Blank lines are skipped. Fields that look like numbers are converted to numbers. The delim defaults to ',' but '\t' and None are also reasonable values. >>> parse_csv('1, 2, 3 \n 0, 2, na') - [[1, 2, 3], [0, 2, 'na']]""" + [[1, 2, 3], [0, 2, 'na']] + """ lines = [line for line in input.splitlines() if line.strip()] return [list(map(num_or_str, line.split(delim))) for line in lines] -# ______________________________________________________________________________ -# 18.3 Learning decision trees +def err_ratio(predict, dataset, examples=None, verbose=0): + """ + Return the proportion of the examples that are NOT correctly predicted. + verbose - 0: No output; 1: Output wrong; 2 (or greater): Output correct + """ + examples = examples or dataset.examples + if len(examples) == 0: + return 0.0 + right = 0 + for example in examples: + desired = example[dataset.target] + output = predict(dataset.sanitize(example)) + if output == desired: + right += 1 + if verbose >= 2: + print(' OK: got {} for {}'.format(desired, example)) + elif verbose: + print('WRONG: got {}, expected {} for {}'.format(output, desired, example)) + return 1 - (right / len(examples)) + + +def grade_learner(predict, tests): + """ + Grades the given learner based on how many tests it passes. + tests is a list with each element in the form: (values, output). + """ + return mean(int(predict(X) == y) for X, y in tests) + + +def train_test_split(dataset, start=None, end=None, test_split=None): + """ + If you are giving 'start' and 'end' as parameters, + then it will return the testing set from index 'start' to 'end' + and the rest for training. + If you give 'test_split' as a parameter then it will return + test_split * 100% as the testing set and the rest as + training set. + """ + examples = dataset.examples + if test_split is None: + train = examples[:start] + examples[end:] + val = examples[start:end] + else: + total_size = len(examples) + val_size = int(total_size * test_split) + train_size = total_size - val_size + train = examples[:train_size] + val = examples[train_size:total_size] + + return train, val + + +def model_selection(learner, dataset, k=10, trials=1): + """ + [Figure 18.8] + Return the optimal value of size having minimum error on validation set. + err: a validation error array, indexed by size + """ + errs = [] + size = 1 + while True: + err = cross_validation(learner, dataset, size, k, trials) + # check for convergence provided err_val is not empty + if err and not isclose(err[-1], err, rel_tol=1e-6): + best_size = 0 + min_val = math.inf + i = 0 + while i < size: + if errs[i] < min_val: + min_val = errs[i] + best_size = i + i += 1 + return learner(dataset, best_size) + errs.append(err) + size += 1 + + +def cross_validation(learner, dataset, size=None, k=10, trials=1): + """ + Do k-fold cross_validate and return their mean. + That is, keep out 1/k of the examples for testing on each of k runs. + Shuffle the examples first; if trials>1, average over several shuffles. + Returns Training error + """ + k = k or len(dataset.examples) + if trials > 1: + trial_errs = 0 + for t in range(trials): + errs = cross_validation(learner, dataset, size, k, trials) + trial_errs += errs + return trial_errs / trials + else: + fold_errs = 0 + n = len(dataset.examples) + examples = dataset.examples + random.shuffle(dataset.examples) + for fold in range(k): + train_data, val_data = train_test_split(dataset, fold * (n // k), (fold + 1) * (n // k)) + dataset.examples = train_data + h = learner(dataset, size) + fold_errs += err_ratio(h, dataset, train_data) + # reverting back to original once test is completed + dataset.examples = examples + return fold_errs / k + + +def leave_one_out(learner, dataset, size=None): + """Leave one out cross-validation over the dataset.""" + return cross_validation(learner, dataset, size, len(dataset.examples)) + + +# TODO learning_curve needs to be fixed +def learning_curve(learner, dataset, trials=10, sizes=None): + if sizes is None: + sizes = list(range(2, len(dataset.examples) - 10, 2)) + + def score(learner, size): + random.shuffle(dataset.examples) + return train_test_split(learner, dataset, 0, size) + + return [(size, mean([score(learner, size) for _ in range(trials)])) for size in sizes] + + +def PluralityLearner(dataset): + """ + A very dumb algorithm: always pick the result that was most popular + in the training data. Makes a baseline for comparison. + """ + most_popular = mode([e[dataset.target] for e in dataset.examples]) + + def predict(example): + """Always return same result: the most popular from the training set.""" + return most_popular + + return predict class DecisionFork: - """A fork of a decision tree holds an attribute to test, and a dict - of branches, one for each of the attribute's values.""" + """ + A fork of a decision tree holds an attribute to test, and a dict + of branches, one for each of the attribute's values. + """ - def __init__(self, attr, attrname=None, default_child=None, branches=None): + def __init__(self, attr, attr_name=None, default_child=None, branches=None): """Initialize by saying what attribute this node tests.""" self.attr = attr - self.attrname = attrname or attr + self.attr_name = attr_name or attr self.default_child = default_child self.branches = branches or {} def __call__(self, example): """Given an example, classify it using the attribute and the branches.""" - attrvalue = example[self.attr] - if attrvalue in self.branches: - return self.branches[attrvalue](example) + attr_val = example[self.attr] + if attr_val in self.branches: + return self.branches[attr_val](example) else: # return default class when attribute is unknown return self.default_child(example) @@ -235,16 +369,14 @@ def add(self, val, subtree): self.branches[val] = subtree def display(self, indent=0): - name = self.attrname + name = self.attr_name print('Test', name) for (val, subtree) in self.branches.items(): print(' ' * 4 * indent, name, '=', val, '==>', end=' ') subtree.display(indent + 1) - print() # newline def __repr__(self): - return ('DecisionFork({0!r}, {1!r}, {2!r})' - .format(self.attr, self.attrname, self.branches)) + return 'DecisionFork({0!r}, {1!r}, {2!r})'.format(self.attr, self.attr_name, self.branches) class DecisionLeaf: @@ -256,37 +388,37 @@ def __init__(self, result): def __call__(self, example): return self.result - def display(self, indent=0): + def display(self): print('RESULT =', self.result) def __repr__(self): return repr(self.result) -# decision tree learning in Figure 18.5 - - def DecisionTreeLearner(dataset): + """[Figure 18.5]""" + target, values = dataset.target, dataset.values def decision_tree_learning(examples, attrs, parent_examples=()): if len(examples) == 0: return plurality_value(parent_examples) - elif all_same_class(examples): + if all_same_class(examples): return DecisionLeaf(examples[0][target]) - elif len(attrs) == 0: + if len(attrs) == 0: return plurality_value(examples) - else: - A = choose_attribute(attrs, examples) - tree = DecisionFork(A, dataset.attrnames[A], plurality_value(examples)) - for (v_k, exs) in split_by(A, examples): - subtree = decision_tree_learning(exs, removeall(A, attrs), examples) - tree.add(v_k, subtree) - return tree + A = choose_attribute(attrs, examples) + tree = DecisionFork(A, dataset.attr_names[A], plurality_value(examples)) + for (v_k, exs) in split_by(A, examples): + subtree = decision_tree_learning(exs, remove_all(A, attrs), examples) + tree.add(v_k, subtree) + return tree def plurality_value(examples): - """Return the most popular target value for this set of examples. - (If target is binary, this is the majority; otherwise plurality.)""" + """ + Return the most popular target value for this set of examples. + (If target is binary, this is the majority; otherwise plurality). + """ popular = argmax_random_tie(values[target], key=lambda v: count(target, v, examples)) return DecisionLeaf(popular) @@ -307,190 +439,31 @@ def information_gain(attr, examples): """Return the expected reduction in entropy from splitting by attr.""" def I(examples): - return information_content([count(target, v, examples) - for v in values[target]]) + return information_content([count(target, v, examples) for v in values[target]]) N = len(examples) - remainder = sum((len(examples_i) / N) * I(examples_i) - for (v, examples_i) in split_by(attr, examples)) + remainder = sum((len(examples_i) / N) * I(examples_i) for (v, examples_i) in split_by(attr, examples)) return I(examples) - remainder def split_by(attr, examples): """Return a list of (val, examples) pairs for each val of attr.""" - return [(v, [e for e in examples if e[attr] == v]) - for v in values[attr]] + return [(v, [e for e in examples if e[attr] == v]) for v in values[attr]] return decision_tree_learning(dataset.examples, dataset.inputs) def information_content(values): """Number of bits to represent the probability distribution in values.""" - probabilities = normalize(removeall(0, values)) + probabilities = normalize(remove_all(0, values)) return sum(-p * math.log2(p) for p in probabilities) -# ______________________________________________________________________________ -# 18.4 Model selection and optimization - - -def model_selection(learner, dataset, k=10, trials=1): - """[Fig 18.8] - Return the optimal value of size having minimum error - on validation set. - err_train: A training error array, indexed by size - err_val: A validation error array, indexed by size +def DecisionListLearner(dataset): """ - errs = [] - size = 1 - - while True: - err = cross_validation(learner, size, dataset, k, trials) - # Check for convergence provided err_val is not empty - if err and not isclose(err[-1], err, rel_tol=1e-6): - best_size = 0 - min_val = math.inf - - i = 0 - while i < size: - if errs[i] < min_val: - min_val = errs[i] - best_size = i - i += 1 - return learner(dataset, best_size) - errs.append(err) - size += 1 - - -def cross_validation(learner, size, dataset, k=10, trials=1): - """Do k-fold cross_validate and return their mean. - That is, keep out 1/k of the examples for testing on each of k runs. - Shuffle the examples first; if trials>1, average over several shuffles. - Returns Training error, Validation error""" - k = k or len(dataset.examples) - if trials > 1: - trial_errs = 0 - for t in range(trials): - errs = cross_validation(learner, size, dataset, k=10, trials=1) - trial_errs += errs - return trial_errs / trials - else: - fold_errs = 0 - n = len(dataset.examples) - examples = dataset.examples - random.shuffle(dataset.examples) - for fold in range(k): - train_data, val_data = train_test_split(dataset, fold * (n // k), (fold + 1) * (n // k)) - dataset.examples = train_data - h = learner(dataset, size) - fold_errs += err_ratio(h, dataset, train_data) - - # Reverting back to original once test is completed - dataset.examples = examples - return fold_errs / k - - -def cross_validation_nosize(learner, dataset, k=10, trials=1): - """Do k-fold cross_validate and return their mean. - That is, keep out 1/k of the examples for testing on each of k runs. - Shuffle the examples first; if trials>1, average over several shuffles. - Returns Training error, Validation error""" - k = k or len(dataset.examples) - if trials > 1: - trial_errs = 0 - for t in range(trials): - errs = cross_validation(learner, dataset, k=10, trials=1) - trial_errs += errs - return trial_errs / trials - else: - fold_errs = 0 - n = len(dataset.examples) - examples = dataset.examples - random.shuffle(dataset.examples) - for fold in range(k): - train_data, val_data = train_test_split(dataset, fold * (n // k), (fold + 1) * (n // k)) - dataset.examples = train_data - h = learner(dataset) - fold_errs += err_ratio(h, dataset, train_data) - - # Reverting back to original once test is completed - dataset.examples = examples - return fold_errs / k - - -def err_ratio(predict, dataset, examples=None, verbose=0): - """Return the proportion of the examples that are NOT correctly predicted. - verbose - 0: No output; 1: Output wrong; 2 (or greater): Output correct""" - examples = examples or dataset.examples - if len(examples) == 0: - return 0.0 - right = 0 - for example in examples: - desired = example[dataset.target] - output = predict(dataset.sanitize(example)) - if output == desired: - right += 1 - if verbose >= 2: - print(' OK: got {} for {}'.format(desired, example)) - elif verbose: - print('WRONG: got {}, expected {} for {}'.format( - output, desired, example)) - return 1 - (right / len(examples)) - - -def train_test_split(dataset, start=None, end=None, test_split=None): - """If you are giving 'start' and 'end' as parameters, - then it will return the testing set from index 'start' to 'end' - and the rest for training. - If you give 'test_split' as a parameter then it will return - test_split * 100% as the testing set and the rest as - training set. + [Figure 18.11] + A decision list implemented as a list of (test, value) pairs. """ - examples = dataset.examples - if test_split == None: - train = examples[:start] + examples[end:] - val = examples[start:end] - else: - total_size = len(examples) - val_size = int(total_size * test_split) - train_size = total_size - val_size - train = examples[:train_size] - val = examples[train_size:total_size] - - return train, val - - -def grade_learner(predict, tests): - """Grades the given learner based on how many tests it passes. - tests is a list with each element in the form: (values, output).""" - return mean(int(predict(X) == y) for X, y in tests) - - -def leave_one_out(learner, dataset, size=None): - """Leave one out cross-validation over the dataset.""" - return cross_validation(learner, size, dataset, k=len(dataset.examples)) - -# TODO learning_curve needs to fixed -def learning_curve(learner, dataset, trials=10, sizes=None): - if sizes is None: - sizes = list(range(2, len(dataset.examples) - 10, 2)) - - def score(learner, size): - random.shuffle(dataset.examples) - return train_test_split(learner, dataset, 0, size) - - return [(size, mean([score(learner, size) for t in range(trials)])) - for size in sizes] - - -# ______________________________________________________________________________ -# 18.5 The theory Of learning - - -def DecisionListLearner(dataset): - """A decision list is implemented as a list of (test, value) pairs.[Figure 18.11]""" - - # TODO: where are the tests from? def decision_list_learning(examples): if not examples: return [(True, False)] @@ -500,13 +473,14 @@ def decision_list_learning(examples): return [(t, o)] + decision_list_learning(examples - examples_t) def find_examples(examples): - """Find a set of examples that all have the same outcome under - some test. Return a tuple of the test, outcome, and examples.""" + """ + Find a set of examples that all have the same outcome under + some test. Return a tuple of the test, outcome, and examples. + """ raise NotImplementedError def passes(example, test): """Does the example pass the test?""" - return test.test(example) raise NotImplementedError def predict(example): @@ -520,36 +494,44 @@ def predict(example): return predict -# ______________________________________________________________________________ -# 18.6 Linear regression and classification +def NearestNeighborLearner(dataset, k=1): + """k-NearestNeighbor: the k nearest neighbors vote.""" + + def predict(example): + """Find the k closest items, and have them vote for the best.""" + best = heapq.nsmallest(k, ((dataset.distance(e, example), e) for e in dataset.examples)) + return mode(e[dataset.target] for (d, e) in best) + + return predict def LinearLearner(dataset, learning_rate=0.01, epochs=100): - """Define with learner = LinearLearner(data); infer with learner(x).""" + """ + [Section 18.6.4] + Linear classifier with hard threshold. + """ idx_i = dataset.inputs - idx_t = dataset.target # As of now, dataset.target gives only one index. + idx_t = dataset.target examples = dataset.examples num_examples = len(examples) # X transpose X_col = [dataset.values[i] for i in idx_i] # vertical columns of X - # Add dummy + # add dummy ones = [1 for _ in range(len(examples))] X_col = [ones] + X_col - # Initialize random weights + # initialize random weights num_weights = len(idx_i) + 1 w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) for epoch in range(epochs): err = [] - # Pass over all examples + # pass over all examples for example in examples: x = [1] + example y = dotproduct(w, x) - # if threshold: - # y = threshold(y) t = example[idx_t] err.append(t - y) @@ -565,7 +547,10 @@ def predict(example): def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100): - """Define logistic regression classifier in 18.6.5""" + """ + [Section 18.6.5] + Linear classifier with logistic regression. + """ idx_i = dataset.inputs idx_t = dataset.target examples = dataset.examples @@ -574,59 +559,37 @@ def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100): # X transpose X_col = [dataset.values[i] for i in idx_i] # vertical columns of X - # Add dummy + # add dummy ones = [1 for _ in range(len(examples))] X_col = [ones] + X_col - # Initialize random weights + # initialize random weights num_weights = len(idx_i) + 1 w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) for epoch in range(epochs): err = [] h = [] - # Pass over all examples + # pass over all examples for example in examples: x = [1] + example - y = 1 / (1 + math.exp(-dotproduct(w, x))) - h.append(y * (1 - y)) + y = sigmoid(dotproduct(w, x)) + h.append(sigmoid_derivative(y)) t = example[idx_t] err.append(t - y) # update weights for i in range(len(w)): buffer = [x * y for x, y in zip(err, h)] - # w[i] = w[i] + learning_rate * (dotproduct(err, X_col[i]) / num_examples) w[i] = w[i] + learning_rate * (dotproduct(buffer, X_col[i]) / num_examples) def predict(example): x = [1] + example - return 1 / (1 + math.exp(-dotproduct(w, x))) - - return predict - - -# ______________________________________________________________________________ -# 18.7 Nonparametric models - - -def NearestNeighborLearner(dataset, k=1): - """k-NearestNeighbor: the k nearest neighbors vote.""" - - def predict(example): - """Find the k closest items, and have them vote for the best.""" - example.pop(dataset.target) - best = heapq.nsmallest(k, ((dataset.distance(e, example), e) - for e in dataset.examples)) - return mode(e[dataset.target] for (d, e) in best) + return sigmoid(dotproduct(w, x)) return predict -# ______________________________________________________________________________ -# 18.8 Ensemble learning - - def EnsembleLearner(learners): """Given a list of learning algorithms, have them vote.""" @@ -641,6 +604,49 @@ def predict(example): return train +def ada_boost(dataset, L, K): + """[Figure 18.34]""" + + examples, target = dataset.examples, dataset.target + N = len(examples) + epsilon = 1 / (2 * N) + w = [1 / N] * N + h, z = [], [] + for k in range(K): + h_k = L(dataset, w) + h.append(h_k) + error = sum(weight for example, weight in zip(examples, w) if example[target] != h_k(example)) + # avoid divide-by-0 from either 0% or 100% error rates + error = clip(error, epsilon, 1 - epsilon) + for j, example in enumerate(examples): + if example[target] == h_k(example): + w[j] *= error / (1 - error) + w = normalize(w) + z.append(math.log((1 - error) / error)) + return weighted_majority(h, z) + + +def weighted_majority(predictors, weights): + """Return a predictor that takes a weighted vote.""" + + def predict(example): + return weighted_mode((predictor(example) for predictor in predictors), weights) + + return predict + + +def weighted_mode(values, weights): + """ + Return the value with the greatest total weight. + >>> weighted_mode('abbaa', [1, 2, 3, 1, 2]) + 'b' + """ + totals = defaultdict(int) + for v, w in zip(values, weights): + totals[v] += w + return max(totals, key=totals.__getitem__) + + def RandomForest(dataset, n=5): """An ensemble of Decision Trees trained using bagging and feature bagging.""" @@ -658,70 +664,19 @@ def predict(example): print([predictor(example) for predictor in predictors]) return mode(predictor(example) for predictor in predictors) - predictors = [DecisionTreeLearner(DataSet(examples=data_bagging(dataset), - attrs=dataset.attrs, - attrnames=dataset.attrnames, - target=dataset.target, + predictors = [DecisionTreeLearner(DataSet(examples=data_bagging(dataset), attrs=dataset.attrs, + attr_names=dataset.attr_names, target=dataset.target, inputs=feature_bagging(dataset))) for _ in range(n)] return predict -def AdaBoost(L, K): - """[Figure 18.34]""" - - def train(dataset): - examples, target = dataset.examples, dataset.target - N = len(examples) - epsilon = 1 / (2 * N) - w = [1 / N] * N - h, z = [], [] - for k in range(K): - h_k = L(dataset, w) - h.append(h_k) - error = sum(weight for example, weight in zip(examples, w) - if example[target] != h_k(example)) - - # Avoid divide-by-0 from either 0% or 100% error rates: - error = clip(error, epsilon, 1 - epsilon) - for j, example in enumerate(examples): - if example[target] == h_k(example): - w[j] *= error / (1 - error) - w = normalize(w) - z.append(math.log((1 - error) / error)) - return WeightedMajority(h, z) - - return train - - -def WeightedMajority(predictors, weights): - """Return a predictor that takes a weighted vote.""" - - def predict(example): - return weighted_mode((predictor(example) for predictor in predictors), - weights) - - return predict - - -def weighted_mode(values, weights): - """Return the value with the greatest total weight. - >>> weighted_mode('abbaa', [1, 2, 3, 1, 2]) - 'b' - """ - totals = defaultdict(int) - for v, w in zip(values, weights): - totals[v] += w - return max(totals, key=totals.__getitem__) - - -# _____________________________________________________________________________ -# Adapting an unweighted learner for AdaBoost - - def WeightedLearner(unweighted_learner): - """Given a learner that takes just an unweighted dataset, return - one that takes also a weight for each example. [p. 749 footnote 14]""" + """ + [Page 749 footnote 14] + Given a learner that takes just an unweighted dataset, return + one that takes also a weight for each example. + """ def train(dataset, weights): return unweighted_learner(replicated_dataset(dataset, weights)) @@ -738,7 +693,8 @@ def replicated_dataset(dataset, weights, n=None): def weighted_replicate(seq, weights, n): - """Return n selections from seq, with the count of each element of + """ + Return n selections from seq, with the count of each element of seq proportional to the corresponding weight (filling in fractions randomly). >>> weighted_replicate('ABC', [1, 2, 1], 4) @@ -752,48 +708,39 @@ def weighted_replicate(seq, weights, n): weighted_sample_with_replacement(n - sum(wholes), seq, fractions)) -def flatten(seqs): return sum(seqs, []) - - -# _____________________________________________________________________________ -# Functions for testing learners on examples -# The rest of this file gives datasets for machine learning problems. +def flatten(seqs): + return sum(seqs, []) -orings = DataSet(name='orings', target='Distressed', - attrnames="Rings Distressed Temp Pressure Flightnum") +orings = DataSet(name='orings', target='Distressed', attr_names='Rings Distressed Temp Pressure Flightnum') zoo = DataSet(name='zoo', target='type', exclude=['name'], - attrnames="name hair feathers eggs milk airborne aquatic " + - "predator toothed backbone breathes venomous fins legs tail " + - "domestic catsize type") - -iris = DataSet(name="iris", target="class", - attrnames="sepal-len sepal-width petal-len petal-width class") - + attr_names='name hair feathers eggs milk airborne aquatic predator toothed backbone ' + 'breathes venomous fins legs tail domestic catsize type') -# ______________________________________________________________________________ -# The Restaurant example from [Figure 18.2] +iris = DataSet(name='iris', target='class', attr_names='sepal-len sepal-width petal-len petal-width class') def RestaurantDataSet(examples=None): - """Build a DataSet of Restaurant waiting examples. [Figure 18.3]""" + """ + [Figure 18.3] + Build a DataSet of Restaurant waiting examples. + """ return DataSet(name='restaurant', target='Wait', examples=examples, - attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' + - 'Raining Reservation Type WaitEstimate Wait') + attr_names='Alternate Bar Fri/Sat Hungry Patrons Price Raining Reservation Type WaitEstimate Wait') restaurant = RestaurantDataSet() -def T(attrname, branches): - branches = {value: (child if isinstance(child, DecisionFork) - else DecisionLeaf(child)) +def T(attr_name, branches): + branches = {value: (child if isinstance(child, DecisionFork) else DecisionLeaf(child)) for value, child in branches.items()} - return DecisionFork(restaurant.attrnum(attrname), attrname, print, branches) + return DecisionFork(restaurant.attr_num(attr_name), attr_name, print, branches) -""" [Figure 18.2] +""" +[Figure 18.2] A decision tree for deciding whether to wait for a table at a hotel. """ @@ -806,8 +753,7 @@ def T(attrname, branches): {'Yes': 'Yes', 'No': T('Bar', {'No': 'No', 'Yes': 'Yes'})}), - 'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})} - ), + 'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})}), '10-30': T('Hungry', {'No': 'Yes', 'Yes': T('Alternate', @@ -825,30 +771,30 @@ def gen(): example[restaurant.target] = waiting_decision_tree(example) return example - return RestaurantDataSet([gen() for i in range(n)]) - - -# ______________________________________________________________________________ -# Artificial, generated datasets. + return RestaurantDataSet([gen() for _ in range(n)]) def Majority(k, n): - """Return a DataSet with n k-bit examples of the majority problem: - k random bits followed by a 1 if more than half the bits are 1, else 0.""" + """ + Return a DataSet with n k-bit examples of the majority problem: + k random bits followed by a 1 if more than half the bits are 1, else 0. + """ examples = [] for i in range(n): - bits = [random.choice([0, 1]) for i in range(k)] + bits = [random.choice([0, 1]) for _ in range(k)] bits.append(int(sum(bits) > k / 2)) examples.append(bits) - return DataSet(name="majority", examples=examples) + return DataSet(name='majority', examples=examples) -def Parity(k, n, name="parity"): - """Return a DataSet with n k-bit examples of the parity problem: - k random bits followed by a 1 if an odd number of bits are 1, else 0.""" +def Parity(k, n, name='parity'): + """ + Return a DataSet with n k-bit examples of the parity problem: + k random bits followed by a 1 if an odd number of bits are 1, else 0. + """ examples = [] for i in range(n): - bits = [random.choice([0, 1]) for i in range(k)] + bits = [random.choice([0, 1]) for _ in range(k)] bits.append(sum(bits) % 2) examples.append(bits) return DataSet(name=name, examples=examples) @@ -856,27 +802,29 @@ def Parity(k, n, name="parity"): def Xor(n): """Return a DataSet with n examples of 2-input xor.""" - return Parity(2, n, name="xor") + return Parity(2, n, name='xor') def ContinuousXor(n): """2 inputs are chosen uniformly from (0.0 .. 2.0]; output is xor of ints.""" examples = [] for i in range(n): - x, y = [random.uniform(0.0, 2.0) for i in '12'] - examples.append([x, y, int(x) != int(y)]) - return DataSet(name="continuous xor", examples=examples) + x, y = [random.uniform(0.0, 2.0) for _ in '12'] + examples.append([x, y, x != y]) + return DataSet(name='continuous xor', examples=examples) def compare(algorithms=None, datasets=None, k=10, trials=1): - """Compare various learners on various datasets using cross-validation. - Print results as a table.""" - algorithms = algorithms or [NearestNeighborLearner, DecisionTreeLearner] # default list of algorithms + """ + Compare various learners on various datasets using cross-validation. + Print results as a table. + """ + # default list of algorithms + algorithms = algorithms or [PluralityLearner, NaiveBayesLearner, NearestNeighborLearner, DecisionTreeLearner] - datasets = datasets or [iris, orings, zoo, restaurant, SyntheticRestaurant(20), # default list - Majority(7, 100), Parity(7, 100), Xor(100)] # of datasets + # default list of datasets + datasets = datasets or [iris, orings, zoo, restaurant, SyntheticRestaurant(20), + Majority(7, 100), Parity(7, 100), Xor(100)] - print_table([[a.__name__.replace('Learner', '')] + - [cross_validation_nosize(a, d, k, trials) for d in datasets] - for a in algorithms], - header=[''] + [d.name[0:7] for d in datasets], numfmt='{0:.2f}') + print_table([[a.__name__.replace('Learner', '')] + [cross_validation(a, d, k=k, trials=trials) for d in datasets] + for a in algorithms], header=[''] + [d.name[0:7] for d in datasets], numfmt='%.2f') diff --git a/learning_apps.ipynb b/learning_apps.ipynb index 6d5a27a45..dd45b11b5 100644 --- a/learning_apps.ipynb +++ b/learning_apps.ipynb @@ -16,6 +16,7 @@ "outputs": [], "source": [ "from learning import *\n", + "from probabilistic_learning import *\n", "from notebook import *" ] }, @@ -971,8 +972,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/logic.py b/logic.py index 60da6294d..7f4d259dd 100644 --- a/logic.py +++ b/logic.py @@ -40,10 +40,8 @@ from agents import Agent, Glitter, Bump, Stench, Breeze, Scream from csp import parse_neighbors, UniversalDict from search import astar_search, PlanRoute -from utils import ( - removeall, unique, first, argmax, probability, - isnumber, issequence, Expr, expr, subexpressions, - extend) +from utils import (remove_all, unique, first, argmax, probability, isnumber, + issequence, Expr, expr, subexpressions, extend) # ______________________________________________________________________________ @@ -508,7 +506,7 @@ def pl_resolve(ci, cj): for di in disjuncts(ci): for dj in disjuncts(cj): if di == ~dj or ~di == dj: - clauses.append(associate('|', unique(removeall(di, disjuncts(ci)) + removeall(dj, disjuncts(cj))))) + clauses.append(associate('|', unique(remove_all(di, disjuncts(ci)) + remove_all(dj, disjuncts(cj))))) return clauses @@ -714,13 +712,13 @@ def dpll(clauses, symbols, model, branching_heuristic=no_branching_heuristic): return model P, value = find_pure_symbol(symbols, unknown_clauses) if P: - return dpll(clauses, removeall(P, symbols), extend(model, P, value), branching_heuristic) + return dpll(clauses, remove_all(P, symbols), extend(model, P, value), branching_heuristic) P, value = find_unit_clause(clauses, model) if P: - return dpll(clauses, removeall(P, symbols), extend(model, P, value), branching_heuristic) + return dpll(clauses, remove_all(P, symbols), extend(model, P, value), branching_heuristic) P, value = branching_heuristic(symbols, unknown_clauses) - return (dpll(clauses, removeall(P, symbols), extend(model, P, value), branching_heuristic) or - dpll(clauses, removeall(P, symbols), extend(model, P, not value), branching_heuristic)) + return (dpll(clauses, remove_all(P, symbols), extend(model, P, value), branching_heuristic) or + dpll(clauses, remove_all(P, symbols), extend(model, P, not value), branching_heuristic)) def find_pure_symbol(symbols, clauses): @@ -950,8 +948,8 @@ def pl_binary_resolution(ci, cj): for di in disjuncts(ci): for dj in disjuncts(cj): if di == ~dj or ~di == dj: - return pl_binary_resolution(associate('|', removeall(di, disjuncts(ci))), - associate('|', removeall(dj, disjuncts(cj)))) + return pl_binary_resolution(associate('|', remove_all(di, disjuncts(ci))), + associate('|', remove_all(dj, disjuncts(cj)))) return associate('|', unique(disjuncts(ci) + disjuncts(cj))) diff --git a/probabilistic_learning.py b/probabilistic_learning.py new file mode 100644 index 000000000..4b78ef2d9 --- /dev/null +++ b/probabilistic_learning.py @@ -0,0 +1,154 @@ +"""Learning probabilistic models. (Chapters 20)""" + +import heapq + +from utils import weighted_sampler, argmax, product, gaussian + + +class CountingProbDist: + """ + A probability distribution formed by observing and counting examples. + If p is an instance of this class and o is an observed value, then + there are 3 main operations: + p.add(o) increments the count for observation o by 1. + p.sample() returns a random element from the distribution. + p[o] returns the probability for o (as in a regular ProbDist). + """ + + def __init__(self, observations=None, default=0): + """ + Create a distribution, and optionally add in some observations. + By default this is an unsmoothed distribution, but saying default=1, + for example, gives you add-one smoothing. + """ + if observations is None: + observations = [] + self.dictionary = {} + self.n_obs = 0 + self.default = default + self.sampler = None + + for o in observations: + self.add(o) + + def add(self, o): + """Add an observation o to the distribution.""" + self.smooth_for(o) + self.dictionary[o] += 1 + self.n_obs += 1 + self.sampler = None + + def smooth_for(self, o): + """ + Include o among the possible observations, whether or not + it's been observed yet. + """ + if o not in self.dictionary: + self.dictionary[o] = self.default + self.n_obs += self.default + self.sampler = None + + def __getitem__(self, item): + """Return an estimate of the probability of item.""" + self.smooth_for(item) + return self.dictionary[item] / self.n_obs + + # (top() and sample() are not used in this module, but elsewhere.) + + def top(self, n): + """Return (count, obs) tuples for the n most frequent observations.""" + return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()]) + + def sample(self): + """Return a random sample from the distribution.""" + if self.sampler is None: + self.sampler = weighted_sampler(list(self.dictionary.keys()), list(self.dictionary.values())) + return self.sampler() + + +def NaiveBayesLearner(dataset, continuous=True, simple=False): + if simple: + return NaiveBayesSimple(dataset) + if continuous: + return NaiveBayesContinuous(dataset) + else: + return NaiveBayesDiscrete(dataset) + + +def NaiveBayesSimple(distribution): + """ + A simple naive bayes classifier that takes as input a dictionary of + CountingProbDist objects and classifies items according to these distributions. + The input dictionary is in the following form: + (ClassName, ClassProb): CountingProbDist + """ + target_dist = {c_name: prob for c_name, prob in distribution.keys()} + attr_dists = {c_name: count_prob for (c_name, _), count_prob in distribution.items()} + + def predict(example): + """Predict the target value for example. Calculate probabilities for each + class and pick the max.""" + + def class_probability(target_val): + attr_dist = attr_dists[target_val] + return target_dist[target_val] * product(attr_dist[a] for a in example) + + return argmax(target_dist.keys(), key=class_probability) + + return predict + + +def NaiveBayesDiscrete(dataset): + """ + Just count how many times each value of each input attribute + occurs, conditional on the target value. Count the different + target values too. + """ + + target_vals = dataset.values[dataset.target] + target_dist = CountingProbDist(target_vals) + attr_dists = {(gv, attr): CountingProbDist(dataset.values[attr]) for gv in target_vals for attr in dataset.inputs} + for example in dataset.examples: + target_val = example[dataset.target] + target_dist.add(target_val) + for attr in dataset.inputs: + attr_dists[target_val, attr].add(example[attr]) + + def predict(example): + """ + Predict the target value for example. Consider each possible value, + and pick the most likely by looking at each attribute independently. + """ + + def class_probability(target_val): + return (target_dist[target_val] * product(attr_dists[target_val, attr][example[attr]] + for attr in dataset.inputs)) + + return argmax(target_vals, key=class_probability) + + return predict + + +def NaiveBayesContinuous(dataset): + """ + Count how many times each target value occurs. + Also, find the means and deviations of input attribute values for each target value. + """ + means, deviations = dataset.find_means_and_deviations() + + target_vals = dataset.values[dataset.target] + target_dist = CountingProbDist(target_vals) + + def predict(example): + """Predict the target value for example. Consider each possible value, + and pick the most likely by looking at each attribute independently.""" + + def class_probability(target_val): + prob = target_dist[target_val] + for attr in dataset.inputs: + prob *= gaussian(means[target_val][attr], deviations[target_val][attr], example[attr]) + return prob + + return argmax(target_vals, key=class_probability) + + return predict diff --git a/reinforcement_learning.ipynb b/reinforcement_learning.ipynb index a8f6adc2c..ee3b6a5eb 100644 --- a/reinforcement_learning.ipynb +++ b/reinforcement_learning.ipynb @@ -17,7 +17,7 @@ }, "outputs": [], "source": [ - "from rl import *" + "from reinforcement_learning import *" ] }, { @@ -628,8 +628,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ce8246bfa..5a6603dd8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ pytest sortedcontainers -networkx==1.11 +networkx jupyter pandas matplotlib diff --git a/tests/test_agents.py b/tests/test_agents.py index 64e8dc209..3b3182389 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -4,11 +4,10 @@ from agents import Agent from agents import Direction -from agents import ReflexVacuumAgent, ModelBasedVacuumAgent, TrivialVacuumEnvironment, compare_agents, \ - RandomVacuumAgent, TableDrivenVacuumAgent, TableDrivenAgentProgram, RandomAgentProgram, \ - SimpleReflexAgentProgram, ModelBasedReflexAgentProgram -from agents import Wall, Gold, Explorer, Thing, Bump, Glitter, WumpusEnvironment, Pit, \ - VacuumEnvironment, Dirt +from agents import (ReflexVacuumAgent, ModelBasedVacuumAgent, TrivialVacuumEnvironment, compare_agents, + RandomVacuumAgent, TableDrivenVacuumAgent, TableDrivenAgentProgram, RandomAgentProgram, + SimpleReflexAgentProgram, ModelBasedReflexAgentProgram, Wall, Gold, Explorer, Thing, Bump, Glitter, + WumpusEnvironment, Pit, VacuumEnvironment, Dirt) random.seed("aima-python") @@ -61,7 +60,7 @@ def test_add(): def test_RandomAgentProgram(): - # create a list of all the actions a vacuum cleaner can perform + # create a list of all the actions a Vacuum cleaner can perform list = ['Right', 'Left', 'Suck', 'NoOp'] # create a program and then an object of the RandomAgentProgram program = RandomAgentProgram(list) @@ -102,8 +101,7 @@ def test_TableDrivenAgent(): ((loc_B, 'Clean'), (loc_A, 'Dirty')): 'Suck', ((loc_B, 'Dirty'), (loc_B, 'Clean')): 'Left', ((loc_A, 'Dirty'), (loc_A, 'Clean'), (loc_B, 'Dirty')): 'Suck', - ((loc_B, 'Dirty'), (loc_B, 'Clean'), (loc_A, 'Dirty')): 'Suck' - } + ((loc_B, 'Dirty'), (loc_B, 'Clean'), (loc_A, 'Dirty')): 'Suck'} # create an program and then an object of the TableDrivenAgent program = TableDrivenAgentProgram(table) @@ -185,7 +183,7 @@ def matches(self, state): loc_A = (0, 0) loc_B = (1, 0) - # create rules for a two-state vacuum environment + # create rules for a two-state Vacuum Environment rules = [Rule((loc_A, "Dirty"), "Suck"), Rule((loc_A, "Clean"), "Right"), Rule((loc_B, "Dirty"), "Suck"), Rule((loc_B, "Clean"), "Left")] @@ -236,8 +234,8 @@ def test_compare_agents(): agents = [ModelBasedVacuumAgent, ReflexVacuumAgent] result = compare_agents(environment, agents) - performance_ModelBasedVacummAgent = result[0][1] - performance_ReflexVacummAgent = result[1][1] + performance_ModelBasedVacuumAgent = result[0][1] + performance_ReflexVacuumAgent = result[1][1] # The performance of ModelBasedVacuumAgent will be at least as good as that of # ReflexVacuumAgent, since ModelBasedVacuumAgent can identify when it has @@ -245,7 +243,7 @@ def test_compare_agents(): # NoOp leading to 0 performance change, whereas ReflexVacuumAgent cannot # identify the terminal state and thus will keep moving, leading to worse # performance compared to ModelBasedVacuumAgent. - assert performance_ReflexVacummAgent <= performance_ModelBasedVacummAgent + assert performance_ReflexVacuumAgent <= performance_ModelBasedVacuumAgent def test_TableDrivenAgentProgram(): @@ -254,8 +252,7 @@ def test_TableDrivenAgentProgram(): (('bar', 1),): 'action3', (('bar', 2),): 'action1', (('foo', 1), ('foo', 1),): 'action2', - (('foo', 1), ('foo', 2),): 'action3', - } + (('foo', 1), ('foo', 2),): 'action3'} agent_program = TableDrivenAgentProgram(table) assert agent_program(('foo', 1)) == 'action1' assert agent_program(('foo', 2)) == 'action3' @@ -272,19 +269,19 @@ def constant_prog(percept): def test_VacuumEnvironment(): - # Initialize Vacuum Environment + # initialize Vacuum Environment v = VacuumEnvironment(6, 6) - # Get an agent + # get an agent agent = ModelBasedVacuumAgent() agent.direction = Direction(Direction.R) v.add_thing(agent) v.add_thing(Dirt(), location=(2, 1)) - # Check if things are added properly + # check if things are added properly assert len([x for x in v.things if isinstance(x, Wall)]) == 20 assert len([x for x in v.things if isinstance(x, Dirt)]) == 1 - # Let the action begin! + # let the action begin! assert v.percept(agent) == ("Clean", "None") v.execute_action(agent, "Forward") assert v.percept(agent) == ("Dirty", "None") @@ -302,38 +299,37 @@ def test_WumpusEnvironment(): def constant_prog(percept): return percept - # Initialize Wumpus Environment + # initialize Wumpus Environment w = WumpusEnvironment(constant_prog) - # Check if things are added properly + # check if things are added properly assert len([x for x in w.things if isinstance(x, Wall)]) == 20 assert any(map(lambda x: isinstance(x, Gold), w.things)) assert any(map(lambda x: isinstance(x, Explorer), w.things)) assert not any(map(lambda x: not isinstance(x, Thing), w.things)) - # Check that gold and wumpus are not present on (1,1) - assert not any(map(lambda x: isinstance(x, Gold) or isinstance(x, WumpusEnvironment), - w.list_things_at((1, 1)))) + # check that gold and wumpus are not present on (1,1) + assert not any(map(lambda x: isinstance(x, Gold) or isinstance(x, WumpusEnvironment), w.list_things_at((1, 1)))) - # Check if w.get_world() segments objects correctly + # check if w.get_world() segments objects correctly assert len(w.get_world()) == 6 for row in w.get_world(): assert len(row) == 6 - # Start the game! + # start the game! agent = [x for x in w.things if isinstance(x, Explorer)][0] gold = [x for x in w.things if isinstance(x, Gold)][0] pit = [x for x in w.things if isinstance(x, Pit)][0] assert not w.is_done() - # Check Walls + # check Walls agent.location = (1, 2) percepts = w.percept(agent) assert len(percepts) == 5 assert any(map(lambda x: isinstance(x, Bump), percepts[0])) - # Check Gold + # check Gold agent.location = gold.location percepts = w.percept(agent) assert any(map(lambda x: isinstance(x, Glitter), percepts[4])) @@ -341,7 +337,7 @@ def constant_prog(percept): percepts = w.percept(agent) assert not any(map(lambda x: isinstance(x, Glitter), percepts[4])) - # Check agent death + # check agent death agent.location = pit.location assert w.in_danger(agent) assert not agent.alive @@ -355,7 +351,7 @@ def test_WumpusEnvironmentActions(): def constant_prog(percept): return percept - # Initialize Wumpus Environment + # initialize Wumpus Environment w = WumpusEnvironment(constant_prog) agent = [x for x in w.things if isinstance(x, Explorer)][0] diff --git a/tests/test_agents4e.py b/tests/test_agents4e.py index d94a86141..a84e67e7f 100644 --- a/tests/test_agents4e.py +++ b/tests/test_agents4e.py @@ -4,10 +4,9 @@ from agents4e import Agent, WumpusEnvironment, Explorer, Thing, Gold, Pit, Bump, Glitter from agents4e import Direction -from agents4e import ReflexVacuumAgent, ModelBasedVacuumAgent, TrivialVacuumEnvironment, compare_agents, \ - RandomVacuumAgent, TableDrivenVacuumAgent, TableDrivenAgentProgram, RandomAgentProgram, \ - SimpleReflexAgentProgram, ModelBasedReflexAgentProgram -from agents4e import Wall, VacuumEnvironment, Dirt +from agents4e import (ReflexVacuumAgent, ModelBasedVacuumAgent, TrivialVacuumEnvironment, compare_agents, + RandomVacuumAgent, TableDrivenVacuumAgent, TableDrivenAgentProgram, RandomAgentProgram, + SimpleReflexAgentProgram, ModelBasedReflexAgentProgram, Wall, VacuumEnvironment, Dirt) random.seed("aima-python") @@ -60,7 +59,7 @@ def test_add(): def test_RandomAgentProgram(): - # create a list of all the actions a vacuum cleaner can perform + # create a list of all the actions a Vacuum cleaner can perform list = ['Right', 'Left', 'Suck', 'NoOp'] # create a program and then an object of the RandomAgentProgram program = RandomAgentProgram(list) @@ -101,8 +100,7 @@ def test_TableDrivenAgent(): ((loc_B, 'Clean'), (loc_A, 'Dirty')): 'Suck', ((loc_B, 'Dirty'), (loc_B, 'Clean')): 'Left', ((loc_A, 'Dirty'), (loc_A, 'Clean'), (loc_B, 'Dirty')): 'Suck', - ((loc_B, 'Dirty'), (loc_B, 'Clean'), (loc_A, 'Dirty')): 'Suck' - } + ((loc_B, 'Dirty'), (loc_B, 'Clean'), (loc_A, 'Dirty')): 'Suck'} # create an program and then an object of the TableDrivenAgent program = TableDrivenAgentProgram(table) @@ -183,7 +181,7 @@ def matches(self, state): loc_A = (0, 0) loc_B = (1, 0) - # create rules for a two-state vacuum environment + # create rules for a two-state Vacuum Environment rules = [Rule((loc_A, "Dirty"), "Suck"), Rule((loc_A, "Clean"), "Right"), Rule((loc_B, "Dirty"), "Suck"), Rule((loc_B, "Clean"), "Left")] @@ -234,8 +232,8 @@ def test_compare_agents(): agents = [ModelBasedVacuumAgent, ReflexVacuumAgent] result = compare_agents(environment, agents) - performance_ModelBasedVacummAgent = result[0][1] - performance_ReflexVacummAgent = result[1][1] + performance_ModelBasedVacuumAgent = result[0][1] + performance_ReflexVacuumAgent = result[1][1] # The performance of ModelBasedVacuumAgent will be at least as good as that of # ReflexVacuumAgent, since ModelBasedVacuumAgent can identify when it has @@ -243,7 +241,7 @@ def test_compare_agents(): # NoOp leading to 0 performance change, whereas ReflexVacuumAgent cannot # identify the terminal state and thus will keep moving, leading to worse # performance compared to ModelBasedVacuumAgent. - assert performance_ReflexVacummAgent <= performance_ModelBasedVacummAgent + assert performance_ReflexVacuumAgent <= performance_ModelBasedVacuumAgent def test_TableDrivenAgentProgram(): @@ -252,12 +250,11 @@ def test_TableDrivenAgentProgram(): (('bar', 1),): 'action3', (('bar', 2),): 'action1', (('foo', 1), ('foo', 1),): 'action2', - (('foo', 1), ('foo', 2),): 'action3', - } + (('foo', 1), ('foo', 2),): 'action3'} agent_program = TableDrivenAgentProgram(table) assert agent_program(('foo', 1)) == 'action1' assert agent_program(('foo', 2)) == 'action3' - assert agent_program(('invalid percept',)) == None + assert agent_program(('invalid percept',)) is None def test_Agent(): @@ -270,19 +267,19 @@ def constant_prog(percept): def test_VacuumEnvironment(): - # Initialize Vacuum Environment + # initialize Vacuum Environment v = VacuumEnvironment(6, 6) - # Get an agent + # get an agent agent = ModelBasedVacuumAgent() agent.direction = Direction(Direction.R) v.add_thing(agent) v.add_thing(Dirt(), location=(2, 1)) - # Check if things are added properly + # check if things are added properly assert len([x for x in v.things if isinstance(x, Wall)]) == 20 assert len([x for x in v.things if isinstance(x, Dirt)]) == 1 - # Let the action begin! + # let the action begin! assert v.percept(agent) == ("Clean", "None") v.execute_action(agent, "Forward") assert v.percept(agent) == ("Dirty", "None") @@ -300,37 +297,37 @@ def test_WumpusEnvironment(): def constant_prog(percept): return percept - # Initialize Wumpus Environment + # initialize Wumpus Environment w = WumpusEnvironment(constant_prog) - # Check if things are added properly + # check if things are added properly assert len([x for x in w.things if isinstance(x, Wall)]) == 20 assert any(map(lambda x: isinstance(x, Gold), w.things)) assert any(map(lambda x: isinstance(x, Explorer), w.things)) assert not any(map(lambda x: not isinstance(x, Thing), w.things)) - # Check that gold and wumpus are not present on (1,1) + # check that gold and wumpus are not present on (1,1) assert not any(map(lambda x: isinstance(x, Gold) or isinstance(x, WumpusEnvironment), w.list_things_at((1, 1)))) - # Check if w.get_world() segments objects correctly + # check if w.get_world() segments objects correctly assert len(w.get_world()) == 6 for row in w.get_world(): assert len(row) == 6 - # Start the game! + # start the game! agent = [x for x in w.things if isinstance(x, Explorer)][0] gold = [x for x in w.things if isinstance(x, Gold)][0] pit = [x for x in w.things if isinstance(x, Pit)][0] assert not w.is_done() - # Check Walls + # check Walls agent.location = (1, 2) percepts = w.percept(agent) assert len(percepts) == 5 assert any(map(lambda x: isinstance(x, Bump), percepts[0])) - # Check Gold + # check Gold agent.location = gold.location percepts = w.percept(agent) assert any(map(lambda x: isinstance(x, Glitter), percepts[4])) @@ -338,7 +335,7 @@ def constant_prog(percept): percepts = w.percept(agent) assert not any(map(lambda x: isinstance(x, Glitter), percepts[4])) - # Check agent death + # check agent death agent.location = pit.location assert w.in_danger(agent) assert not agent.alive @@ -352,7 +349,7 @@ def test_WumpusEnvironmentActions(): def constant_prog(percept): return percept - # Initialize Wumpus Environment + # initialize Wumpus Environment w = WumpusEnvironment(constant_prog) agent = [x for x in w.things if isinstance(x, Explorer)][0] diff --git a/tests/test_deep_learning4e.py b/tests/test_deep_learning4e.py index d0a05bc49..2a611076c 100644 --- a/tests/test_deep_learning4e.py +++ b/tests/test_deep_learning4e.py @@ -9,11 +9,11 @@ def test_neural_net(): - iris = DataSet(name="iris") - classes = ["setosa", "versicolor", "virginica"] + iris = DataSet(name='iris') + classes = ['setosa', 'versicolor', 'virginica'] iris.classes_to_numbers(classes) - nn_adam = neural_net_learner(iris, [4], learning_rate=0.001, epochs=200, optimizer=adam_optimizer) - nn_gd = neural_net_learner(iris, [4], learning_rate=0.15, epochs=100, optimizer=gradient_descent) + nnl_adam = NeuralNetLearner(iris, [4], learning_rate=0.001, epochs=200, optimizer=adam_optimizer) + nnl_gd = NeuralNetLearner(iris, [4], learning_rate=0.15, epochs=100, optimizer=gradient_descent) tests = [([5.0, 3.1, 0.9, 0.1], 0), ([5.1, 3.5, 1.0, 0.0], 0), ([4.9, 3.3, 1.1, 0.1], 0), @@ -23,25 +23,25 @@ def test_neural_net(): ([7.5, 4.1, 6.2, 2.3], 2), ([7.3, 4.0, 6.1, 2.4], 2), ([7.0, 3.3, 6.1, 2.5], 2)] - assert grade_learner(nn_adam, tests) >= 1 / 3 - assert grade_learner(nn_gd, tests) >= 1 / 3 - assert err_ratio(nn_adam, iris) < 0.21 - assert err_ratio(nn_gd, iris) < 0.21 + assert grade_learner(nnl_adam, tests) >= 1 / 3 + assert grade_learner(nnl_gd, tests) >= 1 / 3 + assert err_ratio(nnl_adam, iris) < 0.21 + assert err_ratio(nnl_gd, iris) < 0.21 def test_perceptron(): - iris = DataSet(name="iris") - classes = ["setosa", "versicolor", "virginica"] + iris = DataSet(name='iris') + classes = ['setosa', 'versicolor', 'virginica'] iris.classes_to_numbers(classes) - perceptron = perceptron_learner(iris, learning_rate=0.01, epochs=100) + pl = PerceptronLearner(iris, learning_rate=0.01, epochs=100) tests = [([5, 3, 1, 0.1], 0), ([5, 3.5, 1, 0], 0), ([6, 3, 4, 1.1], 1), ([6, 2, 3.5, 1], 1), ([7.5, 4, 6, 2], 2), ([7, 3, 6, 2.5], 2)] - assert grade_learner(perceptron, tests) > 1 / 2 - assert err_ratio(perceptron, iris) < 0.4 + assert grade_learner(pl, tests) > 1 / 2 + assert err_ratio(pl, iris) < 0.4 def test_rnn(): @@ -49,20 +49,19 @@ def test_rnn(): train, val, test = keras_dataset_loader(data) train = (train[0][:1000], train[1][:1000]) val = (val[0][:200], val[1][:200]) - model = simple_rnn_learner(train, val) - score = model.evaluate(test[0][:200], test[1][:200], verbose=0) - acc = score[1] - assert acc >= 0.3 + rnn = SimpleRNNLearner(train, val) + score = rnn.evaluate(test[0][:200], test[1][:200], verbose=0) + assert score[1] >= 0.3 def test_auto_encoder(): - iris = DataSet(name="iris") - classes = ["setosa", "versicolor", "virginica"] + iris = DataSet(name='iris') + classes = ['setosa', 'versicolor', 'virginica'] iris.classes_to_numbers(classes) inputs = np.asarray(iris.examples) - model = auto_encoder_learner(inputs, 100) + al = AutoencoderLearner(inputs, 100) print(inputs[0]) - print(model.predict(inputs[:1])) + print(al.predict(inputs[:1])) if __name__ == "__main__": diff --git a/tests/test_learning.py b/tests/test_learning.py index 1cf24984f..1590a4d33 100644 --- a/tests/test_learning.py +++ b/tests/test_learning.py @@ -11,8 +11,8 @@ def test_exclude(): def test_parse_csv(): - Iris = open_data('iris.csv').read() - assert parse_csv(Iris)[0] == [5.1, 3.5, 1.4, 0.2, 'setosa'] + iris = open_data('iris.csv').read() + assert parse_csv(iris)[0] == [5.1, 3.5, 1.4, 0.2, 'setosa'] def test_weighted_mode(): @@ -24,99 +24,37 @@ def test_weighted_replicate(): def test_means_and_deviation(): - iris = DataSet(name="iris") - + iris = DataSet(name='iris') means, deviations = iris.find_means_and_deviations() - - assert round(means["setosa"][0], 3) == 5.006 - assert round(means["versicolor"][0], 3) == 5.936 - assert round(means["virginica"][0], 3) == 6.588 - - assert round(deviations["setosa"][0], 3) == 0.352 - assert round(deviations["versicolor"][0], 3) == 0.516 - assert round(deviations["virginica"][0], 3) == 0.636 + assert round(means['setosa'][0], 3) == 5.006 + assert round(means['versicolor'][0], 3) == 5.936 + assert round(means['virginica'][0], 3) == 6.588 + assert round(deviations['setosa'][0], 3) == 0.352 + assert round(deviations['versicolor'][0], 3) == 0.516 + assert round(deviations['virginica'][0], 3) == 0.636 def test_plurality_learner(): - zoo = DataSet(name="zoo") - - pL = PluralityLearner(zoo) - assert pL([1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 1, 0, 1]) == "mammal" - - -def test_naive_bayes(): - iris = DataSet(name="iris") - - # Discrete - nBD = NaiveBayesLearner(iris, continuous=False) - assert nBD([5, 3, 1, 0.1]) == "setosa" - assert nBD([6, 3, 4, 1.1]) == "versicolor" - assert nBD([7.7, 3, 6, 2]) == "virginica" - - # Continuous - nBC = NaiveBayesLearner(iris, continuous=True) - assert nBC([5, 3, 1, 0.1]) == "setosa" - assert nBC([6, 5, 3, 1.5]) == "versicolor" - assert nBC([7, 3, 6.5, 2]) == "virginica" - - # Simple - data1 = 'a' * 50 + 'b' * 30 + 'c' * 15 - dist1 = CountingProbDist(data1) - data2 = 'a' * 30 + 'b' * 45 + 'c' * 20 - dist2 = CountingProbDist(data2) - data3 = 'a' * 20 + 'b' * 20 + 'c' * 35 - dist3 = CountingProbDist(data3) - - dist = {('First', 0.5): dist1, ('Second', 0.3): dist2, ('Third', 0.2): dist3} - nBS = NaiveBayesLearner(dist, simple=True) - assert nBS('aab') == 'First' - assert nBS(['b', 'b']) == 'Second' - assert nBS('ccbcc') == 'Third' + zoo = DataSet(name='zoo') + pl = PluralityLearner(zoo) + assert pl([1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 1, 0, 1]) == 'mammal' def test_k_nearest_neighbors(): - iris = DataSet(name="iris") - kNN = NearestNeighborLearner(iris, k=3) - assert kNN([5, 3, 1, 0.1]) == "setosa" - assert kNN([5, 3, 1, 0.1]) == "setosa" - assert kNN([6, 5, 3, 1.5]) == "versicolor" - assert kNN([7.5, 4, 6, 2]) == "virginica" - - -def test_truncated_svd(): - test_mat = [[17, 0], - [0, 11]] - _, _, eival = truncated_svd(test_mat) - assert isclose(eival[0], 17) - assert isclose(eival[1], 11) - - test_mat = [[17, 0], - [0, -34]] - _, _, eival = truncated_svd(test_mat) - assert isclose(eival[0], 34) - assert isclose(eival[1], 17) - - test_mat = [[1, 0, 0, 0, 2], - [0, 0, 3, 0, 0], - [0, 0, 0, 0, 0], - [0, 2, 0, 0, 0]] - _, _, eival = truncated_svd(test_mat) - assert isclose(eival[0], 3) - assert isclose(eival[1], 5 ** 0.5) - - test_mat = [[3, 2, 2], - [2, 3, -2]] - _, _, eival = truncated_svd(test_mat) - assert isclose(eival[0], 5) - assert isclose(eival[1], 3) + iris = DataSet(name='iris') + knn = NearestNeighborLearner(iris, k=3) + assert knn([5, 3, 1, 0.1]) == 'setosa' + assert knn([5, 3, 1, 0.1]) == 'setosa' + assert knn([6, 5, 3, 1.5]) == 'versicolor' + assert knn([7.5, 4, 6, 2]) == 'virginica' def test_decision_tree_learner(): - iris = DataSet(name="iris") - dTL = DecisionTreeLearner(iris) - assert dTL([5, 3, 1, 0.1]) == "setosa" - assert dTL([6, 5, 3, 1.5]) == "versicolor" - assert dTL([7.5, 4, 6, 2]) == "virginica" + iris = DataSet(name='iris') + dtl = DecisionTreeLearner(iris) + assert dtl([5, 3, 1, 0.1]) == 'setosa' + assert dtl([6, 5, 3, 1.5]) == 'versicolor' + assert dtl([7.5, 4, 6, 2]) == 'virginica' def test_information_content(): @@ -129,22 +67,22 @@ def test_information_content(): def test_random_forest(): - iris = DataSet(name="iris") - rF = RandomForest(iris) - tests = [([5.0, 3.0, 1.0, 0.1], "setosa"), - ([5.1, 3.3, 1.1, 0.1], "setosa"), - ([6.0, 5.0, 3.0, 1.0], "versicolor"), - ([6.1, 2.2, 3.5, 1.0], "versicolor"), - ([7.5, 4.1, 6.2, 2.3], "virginica"), - ([7.3, 3.7, 6.1, 2.5], "virginica")] - assert grade_learner(rF, tests) >= 1 / 3 + iris = DataSet(name='iris') + rf = RandomForest(iris) + tests = [([5.0, 3.0, 1.0, 0.1], 'setosa'), + ([5.1, 3.3, 1.1, 0.1], 'setosa'), + ([6.0, 5.0, 3.0, 1.0], 'versicolor'), + ([6.1, 2.2, 3.5, 1.0], 'versicolor'), + ([7.5, 4.1, 6.2, 2.3], 'virginica'), + ([7.3, 3.7, 6.1, 2.5], 'virginica')] + assert grade_learner(rf, tests) >= 1 / 3 def test_neural_network_learner(): - iris = DataSet(name="iris") - classes = ["setosa", "versicolor", "virginica"] + iris = DataSet(name='iris') + classes = ['setosa', 'versicolor', 'virginica'] iris.classes_to_numbers(classes) - nNL = NeuralNetLearner(iris, [5], 0.15, 75) + nnl = NeuralNetLearner(iris, [5], 0.15, 75) tests = [([5.0, 3.1, 0.9, 0.1], 0), ([5.1, 3.5, 1.0, 0.0], 0), ([4.9, 3.3, 1.1, 0.1], 0), @@ -154,22 +92,22 @@ def test_neural_network_learner(): ([7.5, 4.1, 6.2, 2.3], 2), ([7.3, 4.0, 6.1, 2.4], 2), ([7.0, 3.3, 6.1, 2.5], 2)] - assert grade_learner(nNL, tests) >= 1 / 3 - assert err_ratio(nNL, iris) < 0.21 + assert grade_learner(nnl, tests) >= 1 / 3 + assert err_ratio(nnl, iris) < 0.21 def test_perceptron(): - iris = DataSet(name="iris") + iris = DataSet(name='iris') iris.classes_to_numbers() - perceptron = PerceptronLearner(iris) + pl = PerceptronLearner(iris) tests = [([5, 3, 1, 0.1], 0), ([5, 3.5, 1, 0], 0), ([6, 3, 4, 1.1], 1), ([6, 2, 3.5, 1], 1), ([7.5, 4, 6, 2], 2), ([7, 3, 6, 2.5], 2)] - assert grade_learner(perceptron, tests) > 1 / 2 - assert err_ratio(perceptron, iris) < 0.4 + assert grade_learner(pl, tests) > 1 / 2 + assert err_ratio(pl, iris) < 0.4 def test_random_weights(): @@ -182,20 +120,19 @@ def test_random_weights(): assert min_value <= weight <= max_value -def test_adaBoost(): - iris = DataSet(name="iris") +def test_ada_boost(): + iris = DataSet(name='iris') iris.classes_to_numbers() - WeightedPerceptron = WeightedLearner(PerceptronLearner) - AdaBoostLearner = AdaBoost(WeightedPerceptron, 5) - adaBoost = AdaBoostLearner(iris) + wl = WeightedLearner(PerceptronLearner) + ab = ada_boost(iris, wl, 5) tests = [([5, 3, 1, 0.1], 0), ([5, 3.5, 1, 0], 0), ([6, 3, 4, 1.1], 1), ([6, 2, 3.5, 1], 1), ([7.5, 4, 6, 2], 2), ([7, 3, 6, 2.5], 2)] - assert grade_learner(adaBoost, tests) > 4 / 6 - assert err_ratio(adaBoost, iris) < 0.25 + assert grade_learner(ab, tests) > 4 / 6 + assert err_ratio(ab, iris) < 0.25 if __name__ == "__main__": diff --git a/tests/test_learning4e.py b/tests/test_learning4e.py index 82cf835dc..987a9bffc 100644 --- a/tests/test_learning4e.py +++ b/tests/test_learning4e.py @@ -1,6 +1,7 @@ import pytest -from learning import * +from deep_learning4e import PerceptronLearner +from learning4e import * random.seed("aima-python") @@ -11,8 +12,8 @@ def test_exclude(): def test_parse_csv(): - Iris = open_data('iris.csv').read() - assert parse_csv(Iris)[0] == [5.1, 3.5, 1.4, 0.2, 'setosa'] + iris = open_data('iris.csv').read() + assert parse_csv(iris)[0] == [5.1, 3.5, 1.4, 0.2, 'setosa'] def test_weighted_mode(): @@ -24,25 +25,37 @@ def test_weighted_replicate(): def test_means_and_deviation(): - iris = DataSet(name="iris") - + iris = DataSet(name='iris') means, deviations = iris.find_means_and_deviations() + assert round(means['setosa'][0], 3) == 5.006 + assert round(means['versicolor'][0], 3) == 5.936 + assert round(means['virginica'][0], 3) == 6.588 + assert round(deviations['setosa'][0], 3) == 0.352 + assert round(deviations['versicolor'][0], 3) == 0.516 + assert round(deviations['virginica'][0], 3) == 0.636 + + +def test_plurality_learner(): + zoo = DataSet(name='zoo') + pl = PluralityLearner(zoo) + assert pl([1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 1, 0, 1]) == 'mammal' - assert round(means["setosa"][0], 3) == 5.006 - assert round(means["versicolor"][0], 3) == 5.936 - assert round(means["virginica"][0], 3) == 6.588 - assert round(deviations["setosa"][0], 3) == 0.352 - assert round(deviations["versicolor"][0], 3) == 0.516 - assert round(deviations["virginica"][0], 3) == 0.636 +def test_k_nearest_neighbors(): + iris = DataSet(name='iris') + knn = NearestNeighborLearner(iris, k=3) + assert knn([5, 3, 1, 0.1]) == 'setosa' + assert knn([5, 3, 1, 0.1]) == 'setosa' + assert knn([6, 5, 3, 1.5]) == 'versicolor' + assert knn([7.5, 4, 6, 2]) == 'virginica' def test_decision_tree_learner(): - iris = DataSet(name="iris") - dTL = DecisionTreeLearner(iris) - assert dTL([5, 3, 1, 0.1]) == "setosa" - assert dTL([6, 5, 3, 1.5]) == "versicolor" - assert dTL([7.5, 4, 6, 2]) == "virginica" + iris = DataSet(name='iris') + dtl = DecisionTreeLearner(iris) + assert dtl([5, 3, 1, 0.1]) == 'setosa' + assert dtl([6, 5, 3, 1.5]) == 'versicolor' + assert dtl([7.5, 4, 6, 2]) == 'virginica' def test_information_content(): @@ -55,15 +68,15 @@ def test_information_content(): def test_random_forest(): - iris = DataSet(name="iris") - rF = RandomForest(iris) - tests = [([5.0, 3.0, 1.0, 0.1], "setosa"), - ([5.1, 3.3, 1.1, 0.1], "setosa"), - ([6.0, 5.0, 3.0, 1.0], "versicolor"), - ([6.1, 2.2, 3.5, 1.0], "versicolor"), - ([7.5, 4.1, 6.2, 2.3], "virginica"), - ([7.3, 3.7, 6.1, 2.5], "virginica")] - assert grade_learner(rF, tests) >= 1 / 3 + iris = DataSet(name='iris') + rf = RandomForest(iris) + tests = [([5.0, 3.0, 1.0, 0.1], 'setosa'), + ([5.1, 3.3, 1.1, 0.1], 'setosa'), + ([6.0, 5.0, 3.0, 1.0], 'versicolor'), + ([6.1, 2.2, 3.5, 1.0], 'versicolor'), + ([7.5, 4.1, 6.2, 2.3], 'virginica'), + ([7.3, 3.7, 6.1, 2.5], 'virginica')] + assert grade_learner(rf, tests) >= 1 / 3 def test_random_weights(): @@ -76,20 +89,19 @@ def test_random_weights(): assert min_value <= weight <= max_value -def test_adaBoost(): - iris = DataSet(name="iris") +def test_ada_boost(): + iris = DataSet(name='iris') iris.classes_to_numbers() - WeightedPerceptron = WeightedLearner(PerceptronLearner) - AdaBoostLearner = AdaBoost(WeightedPerceptron, 5) - adaBoost = AdaBoostLearner(iris) + wl = WeightedLearner(PerceptronLearner) + ab = ada_boost(iris, wl, 5) tests = [([5, 3, 1, 0.1], 0), ([5, 3.5, 1, 0], 0), ([6, 3, 4, 1.1], 1), ([6, 2, 3.5, 1], 1), ([7.5, 4, 6, 2], 2), ([7, 3, 6, 2.5], 2)] - assert grade_learner(adaBoost, tests) > 4 / 6 - assert err_ratio(adaBoost, iris) < 0.25 + assert grade_learner(ab, tests) > 4 / 6 + assert err_ratio(ab, iris) < 0.25 if __name__ == "__main__": diff --git a/tests/test_probabilistic_learning.py b/tests/test_probabilistic_learning.py new file mode 100644 index 000000000..bd37b6ebb --- /dev/null +++ b/tests/test_probabilistic_learning.py @@ -0,0 +1,38 @@ +import random + +import pytest + +from learning import DataSet +from probabilistic_learning import * + +random.seed("aima-python") + + +def test_naive_bayes(): + iris = DataSet(name='iris') + # discrete + nbd = NaiveBayesLearner(iris, continuous=False) + assert nbd([5, 3, 1, 0.1]) == 'setosa' + assert nbd([6, 3, 4, 1.1]) == 'versicolor' + assert nbd([7.7, 3, 6, 2]) == 'virginica' + # continuous + nbc = NaiveBayesLearner(iris, continuous=True) + assert nbc([5, 3, 1, 0.1]) == 'setosa' + assert nbc([6, 5, 3, 1.5]) == 'versicolor' + assert nbc([7, 3, 6.5, 2]) == 'virginica' + # simple + data1 = 'a' * 50 + 'b' * 30 + 'c' * 15 + dist1 = CountingProbDist(data1) + data2 = 'a' * 30 + 'b' * 45 + 'c' * 20 + dist2 = CountingProbDist(data2) + data3 = 'a' * 20 + 'b' * 20 + 'c' * 35 + dist3 = CountingProbDist(data3) + dist = {('First', 0.5): dist1, ('Second', 0.3): dist2, ('Third', 0.2): dist3} + nbs = NaiveBayesLearner(dist, simple=True) + assert nbs('aab') == 'First' + assert nbs(['b', 'b']) == 'Second' + assert nbs('ccbcc') == 'Third' + + +if __name__ == "__main__": + pytest.main() diff --git a/tests/test_utils.py b/tests/test_utils.py index 5ccafe157..672784bef 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,17 +15,17 @@ def test_sequence(): assert sequence(([1, 2], [3, 4], [5, 6])) == ([1, 2], [3, 4], [5, 6]) -def test_removeall_list(): - assert removeall(4, []) == [] - assert removeall(4, [1, 2, 3, 4]) == [1, 2, 3] - assert removeall(4, [4, 1, 4, 2, 3, 4, 4]) == [1, 2, 3] - assert removeall(1, [2, 3, 4, 5, 6]) == [2, 3, 4, 5, 6] +def test_remove_all_list(): + assert remove_all(4, []) == [] + assert remove_all(4, [1, 2, 3, 4]) == [1, 2, 3] + assert remove_all(4, [4, 1, 4, 2, 3, 4, 4]) == [1, 2, 3] + assert remove_all(1, [2, 3, 4, 5, 6]) == [2, 3, 4, 5, 6] -def test_removeall_string(): - assert removeall('s', '') == '' - assert removeall('s', 'This is a test. Was a test.') == 'Thi i a tet. Wa a tet.' - assert removeall('a', 'artificial intelligence: a modern approach') == 'rtificil intelligence: modern pproch' +def test_remove_all_string(): + assert remove_all('s', '') == '' + assert remove_all('s', 'This is a test. Was a test.') == 'Thi i a tet. Wa a tet.' + assert remove_all('a', 'artificial intelligence: a modern approach') == 'rtificil intelligence: modern pproch' def test_unique(): @@ -261,6 +261,34 @@ def test_sigmoid_derivative(): assert sigmoid_derivative(value) == -6 +def test_truncated_svd(): + test_mat = [[17, 0], + [0, 11]] + _, _, eival = truncated_svd(test_mat) + assert isclose(eival[0], 17) + assert isclose(eival[1], 11) + + test_mat = [[17, 0], + [0, -34]] + _, _, eival = truncated_svd(test_mat) + assert isclose(eival[0], 34) + assert isclose(eival[1], 17) + + test_mat = [[1, 0, 0, 0, 2], + [0, 0, 3, 0, 0], + [0, 0, 0, 0, 0], + [0, 2, 0, 0, 0]] + _, _, eival = truncated_svd(test_mat) + assert isclose(eival[0], 3) + assert isclose(eival[1], 5 ** 0.5) + + test_mat = [[3, 2, 2], + [2, 3, -2]] + _, _, eival = truncated_svd(test_mat) + assert isclose(eival[0], 5) + assert isclose(eival[1], 3) + + def test_weighted_choice(): choices = [('a', 0.5), ('b', 0.3), ('c', 0.2)] choice = weighted_choice(choices) @@ -340,11 +368,10 @@ def test_expr(): assert expr('P & Q <=> Q & P') == Expr('<=>', (P & Q), (Q & P)) assert expr('P(x) | P(y) & Q(z)') == (P(x) | (P(y) & Q(z))) # x is grandparent of z if x is parent of y and y is parent of z: - assert (expr('GP(x, z) <== P(x, y) & P(y, z)') - == Expr('<==', GP(x, z), P(x, y) & P(y, z))) + assert (expr('GP(x, z) <== P(x, y) & P(y, z)') == Expr('<==', GP(x, z), P(x, y) & P(y, z))) -def test_min_priorityqueue(): +def test_min_priority_queue(): queue = PriorityQueue(f=lambda x: x[1]) queue.append((1, 100)) queue.append((2, 30)) @@ -360,7 +387,7 @@ def test_min_priorityqueue(): assert len(queue) == 2 -def test_max_priorityqueue(): +def test_max_priority_queue(): queue = PriorityQueue(order='max', f=lambda x: x[1]) queue.append((1, 100)) queue.append((2, 30)) @@ -368,7 +395,7 @@ def test_max_priorityqueue(): assert queue.pop() == (1, 100) -def test_priorityqueue_with_objects(): +def test_priority_queue_with_objects(): class Test: def __init__(self, a, b): self.a = a diff --git a/text.py b/text.py index 3a2d9d7aa..bf1809f96 100644 --- a/text.py +++ b/text.py @@ -5,7 +5,7 @@ working on a tiny sample of Unix manual pages.""" from utils import argmin, argmax, hashabledict -from learning import CountingProbDist +from probabilistic_learning import CountingProbDist import search from math import log, exp diff --git a/utils.py b/utils.py index 897147539..75d4547cf 100644 --- a/utils.py +++ b/utils.py @@ -25,7 +25,7 @@ def sequence(iterable): else tuple([iterable])) -def removeall(item, seq): +def remove_all(item, seq): """Return a copy of seq (or string) with all occurrences of item removed.""" if isinstance(seq, str): return seq.replace(item, '') @@ -305,7 +305,7 @@ def manhattan_distance(X, Y): def mean_boolean_error(X, Y): - return mean(int(x != y) for x, y in zip(X, Y)) + return mean(x != y for x, y in zip(X, Y)) def hamming_distance(X, Y): @@ -329,6 +329,10 @@ def norm(X, n=2): return sum([x ** n for x in X]) ** (1 / n) +def random_weights(min_value, max_value, num_weights): + return [random.uniform(min_value, max_value) for _ in range(num_weights)] + + def clip(x, lowest, highest): """Return x clipped to the range [lowest..highest].""" return max(lowest, min(x, highest)) @@ -414,6 +418,71 @@ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): """Return true if numbers a and b are close to each other.""" return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) + +def truncated_svd(X, num_val=2, max_iter=1000): + """Compute the first component of SVD.""" + + def normalize_vec(X, n=2): + """Normalize two parts (:m and m:) of the vector.""" + X_m = X[:m] + X_n = X[m:] + norm_X_m = norm(X_m, n) + Y_m = [x / norm_X_m for x in X_m] + norm_X_n = norm(X_n, n) + Y_n = [x / norm_X_n for x in X_n] + return Y_m + Y_n + + def remove_component(X): + """Remove components of already obtained eigen vectors from X.""" + X_m = X[:m] + X_n = X[m:] + for eivec in eivec_m: + coeff = dotproduct(X_m, eivec) + X_m = [x1 - coeff * x2 for x1, x2 in zip(X_m, eivec)] + for eivec in eivec_n: + coeff = dotproduct(X_n, eivec) + X_n = [x1 - coeff * x2 for x1, x2 in zip(X_n, eivec)] + return X_m + X_n + + m, n = len(X), len(X[0]) + A = [[0] * (n + m) for _ in range(n + m)] + for i in range(m): + for j in range(n): + A[i][m + j] = A[m + j][i] = X[i][j] + + eivec_m = [] + eivec_n = [] + eivals = [] + + for _ in range(num_val): + X = [random.random() for _ in range(m + n)] + X = remove_component(X) + X = normalize_vec(X) + + for i in range(max_iter): + old_X = X + X = matrix_multiplication(A, [[x] for x in X]) + X = [x[0] for x in X] + X = remove_component(X) + X = normalize_vec(X) + # check for convergence + if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10: + break + + projected_X = matrix_multiplication(A, [[x] for x in X]) + projected_X = [x[0] for x in projected_X] + new_eigenvalue = norm(projected_X, 1) / norm(X, 1) + ev_m = X[:m] + ev_n = X[m:] + if new_eigenvalue < 0: + new_eigenvalue = -new_eigenvalue + ev_m = [-ev_m_i for ev_m_i in ev_m] + eivals.append(new_eigenvalue) + eivec_m.append(ev_m) + eivec_n.append(ev_n) + return eivec_m, eivec_n, eivals + + # ______________________________________________________________________________ # Grid Functions diff --git a/utils4e.py b/utils4e.py index 2681602ac..792fa9e22 100644 --- a/utils4e.py +++ b/utils4e.py @@ -90,7 +90,7 @@ def sequence(iterable): else tuple([iterable])) -def removeall(item, seq): +def remove_all(item, seq): """Return a copy of seq (or string) with all occurrences of item removed.""" if isinstance(seq, str): return seq.replace(item, '')