diff --git a/src/fastkde/empiricalCharacteristicFunction.py b/src/fastkde/empiricalCharacteristicFunction.py index fe03502..d19c538 100644 --- a/src/fastkde/empiricalCharacteristicFunction.py +++ b/src/fastkde/empiricalCharacteristicFunction.py @@ -25,11 +25,14 @@ def __init__( self,\ input_data : The input data. Array like with shape = (nvariables,npoints). - tgrids : The frequency-space grids to which to transform the data + tgrids : The frequency-space grids to which to transform the + data + A list of frequency arrays for each variable dimension. - use_fft_approximation : Flag whether to use the nuFFT approximation to the DFT - + use_fft_approximation : Flag whether to use the nuFFT approximation + to the DFT + be_verbose : Flags whether to be verbose @@ -47,7 +50,10 @@ def __init__( self,\ dshape = npy.shape(input_data) rank = len(dshape) if(rank != 2): - raise ValueError("input_data must be a rank-2 array of shape [nvariables,ndatapoints]; got rank = {}".format(rank)) + raise ValueError(\ + "input_data must be a rank-2 array of shape [nvariables,ndatapoints]; " + "got rank = {}".format(rank) + ) #Extract the number of variables self.nvariables = dshape[0] #Extract the number of data points @@ -59,11 +65,15 @@ def __init__( self,\ try: gridRank = len(self.tgrids) - except: + except TypeError: raise ValueError("Could not determine the number of tgrids") if gridRank != self.nvariables: - raise ValueError("The rank of tgrids should be {}. It is {}".format(gridRank,self.nvariables)) + raise ValueError( + "The rank of tgrids should be {}. It is {}".format( + gridRank,self.nvariables + ) + ) #Check for regularity if we are doing nuFFT if(self.use_fft_approximation): @@ -80,7 +90,10 @@ def __init__( self,\ tolerance = dt/1e6 #Check that all these differences are less than 1/1e6 if(not all(abs(deltaT_diff < tolerance))): - raise ValueError("All grids in tgrids must be regularly spaced if use_fft_approximation is True") + raise ValueError( + "All grids in tgrids must be regularly spaced if " + "use_fft_approximation is True" + ) #Set verbosity self.be_verbose = be_verbose @@ -100,14 +113,14 @@ def __init__( self,\ frequency_grids[v,:len(tgrids[v])] = tgrids[v] #Simply pass in the input data as provided - preparedInputData = input_data #Calculate the ECF if(self.use_fft_approximation): #Calculate the ECF using the fast method myECF = nufft.nuifft( \ abscissas = input_data, \ - ordinates = npy.ones([input_data.shape[1]],dtype=npy.complex128), \ + ordinates = npy.ones(\ + [input_data.shape[1]],dtype=npy.complex128), \ frequency_grids = frequency_grids, \ missing_freq_val = fill_value, \ precision = precision, \ @@ -116,7 +129,8 @@ def __init__( self,\ #Calculate the ECF using the slow (direct, but exact) method myECF = nufft.idft( \ abscissas = input_data, \ - ordinates = npy.ones([input_data.shape[1]],dtype=npy.complex128), \ + ordinates = npy.ones(\ + [input_data.shape[1]],dtype=npy.complex128), \ frequency_grids = frequency_grids, \ missing_freq_val = fill_value) @@ -126,7 +140,11 @@ def __init__( self,\ #Save the ECF in the object self.ECF = myECF/myECF[mid_point_accessor] else: - raise RuntimeError("Midpoint of ECF is 0.0. min(ECF) = {}, max(ECF) = {}".format(npy.amin(myECF),npy.amax(myECF))) + raise RuntimeError(\ + "Midpoint of ECF is 0.0. min(ECF) = {}, max(ECF) = {}".format(\ + npy.amin(myECF),npy.amax(myECF) + ) + ) return diff --git a/src/fastkde/fastKDE.py b/src/fastkde/fastKDE.py index 64c41b9..a7d4b2c 100644 --- a/src/fastkde/fastKDE.py +++ b/src/fastkde/fastKDE.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +# !/usr/bin/env python import numpy as npy import scipy.optimize @@ -10,11 +10,12 @@ import fastkde.floodFillSearch as flood -#A simple timer for comparing ECF calculation methods +# A simple timer for comparing ECF calculation methods class Timer(): def __init__(self,n=None): self.n = n def __enter__(self): self.start = time.time() - def __exit__(self, *args): print("N = {}, t = {} seconds".format(self.n,time.time() - self.start)) + def __exit__(self, *args): \ + print("N = {}, t = {} seconds".format(self.n,time.time() - self.start)) def next_highest_power_of_two(number): """Returns the nearest power of two that is greater than or equal to number""" @@ -67,56 +68,69 @@ def __init__( self,\ spaced and they should have a length that is a power of two plus one (e.g., 33). - log_axes : Flags whether axes should be log spaced (i.e., the PDF is calculated - based on log(data) and then transformed back to sample space). Should - be a logical value (True or False) or a list of logical values with an item - for each variable (i.e, len(log_axes) == shape(data)[0]) specifying which - axes should use log spacing. If only True or False is given, that value - is used for all variables. - - num_points_per_sigma : the number of points on the data grid per standard - deviation; this influences the total size of the axes that are - automatically calculated if no other aspects of the grid are specified. - - num_points : the number of points to use for the pdf grid. If provided as a scalar, - each axis will have the same number of points. Otherwise, it should be an - iterable with a value for each axis length. Axis lengths should be a power - of two plus one (e.g., 33) - + log_axes : Flags whether axes should be log spaced (i.e., the + PDF is calculated based on log(data) and then + transformed back to sample space). Should be a + logical value (True or False) or a list of logical + values with an item for each variable (i.e, + len(log_axes) == shape(data)[0]) specifying which + axes should use log spacing. If only True or False + is given, that value is used for all variables. + + num_points_per_sigma : the number of points on the data grid per + standard deviation; this influences the total + size of the axes that are automatically + calculated if no other aspects of the grid are + specified. + + num_points : the number of points to use for the pdf grid. If + provided as a scalar, each axis will have the same + number of points. Otherwise, it should be an + iterable with a value for each axis length. Axis + lengths should be a power of two plus one (e.g., + 33) + deltaX : if given, this specifies the spacing between domain values. - do_approximate_ecf : flags whether to approximate the ECF using a (much faster) - FFT. In tests, this is accurate to ~1e-14 over low - frequencies, but is inaccurate to ~1e-2 for the highest ~5% - of frequencies. - - ecf_precision : sets the precision of the approximate ECF. If set to 2, it uses - double precision accuracy; 1 otherwise - - do_fft : flags whether to calculate phiSC and its FFT to obtain - pdf - - do_save_marginals : flags whether to calculate and save the marginal distributions - - frac_contiguous_hyper_volumes : the fraction of contiguous hypervolumes of the ECF, that - are above the ECF threshold, to use in the density estimate - - num_contiguous_hyper_volumes : like frac_contiguous_hyper_volumes, but specify an integer number - to use. frac_contiguous_hyper_volumes will be ignored if this - is provided as an argument. - - positive_shift : translate the PDF vertically such that the estimate is positive or - 0 everywhere - - count_threshold : this argument does nothing; it has been deprecated. It is kept as an argument for backward - compatibility. - - axis_expansion_factor : sets the amount by which the KDE grid will be expanded relative to the original min-max - spread for each variable: 1.0 means a 100% (2x) expansion in the range. Such an expansion - is necessary to avoid kernel power from one end of the grid leaking into the opposite end - due to the perioidicity of the Fourier transform. - + do_approximate_ecf : flags whether to approximate the ECF using a (much + faster) FFT. In tests, this is accurate to ~1e-14 + over low frequencies, but is inaccurate to ~1e-2 + for the highest ~5% of frequencies. + + ecf_precision : sets the precision of the approximate ECF. If set + to 2, it uses double precision accuracy; 1 + otherwise + + do_fft : flags whether to calculate phiSC and its FFT to + obtain pdf + + do_save_marginals : flags whether to calculate and save the marginal + distributions + + frac_contiguous_hyper_volumes : the fraction of contiguous hypervolumes of + the ECF, that are above the ECF threshold, + to use in the density estimate + + num_contiguous_hyper_volumes : like frac_contiguous_hyper_volumes, but + specify an integer number to use. + frac_contiguous_hyper_volumes will be + ignored if this is provided as an argument. + + positive_shift : translate the PDF vertically such that the estimate + is positive or 0 everywhere + + count_threshold : this argument does nothing; it has been deprecated. + It is kept as an argument for backward compatibility. + + axis_expansion_factor : sets the amount by which the KDE grid will be + expanded relative to the original min-max spread + for each variable: 1.0 means a 100% (2x) expansion + in the range. Such an expansion is necessary to + avoid kernel power from one end of the grid + leaking into the opposite end due to the + perioidicity of the Fourier transform. + Returns: a fastKDE object """ @@ -126,50 +140,57 @@ def vprint(msg): if be_verbose: print(msg) - addOne = True #Force x grids to be (2**n) + 1 + addOne = True # Force x grids to be (2**n) + 1 if(data is not None): - #Save the original data for the marginal calculation + # Save the original data for the marginal calculation original_data = npy.array(data) data = npy.array(data) - #First check the rank of the data + # First check the rank of the data data_rank = len(npy.shape(data)) - #If the data are a vector, promote the data to a rank-1 array with only 1 column + # If the data are a vector, promote the data to a rank-1 array with only 1 column if(data_rank == 1): data = npy.array(original_data[npy.newaxis,:],dtype=npy.float_) else: data = npy.array(original_data,dtype=npy.float_) if(data_rank > 2): - raise ValueError("data must be a rank-2 array of shape [num_variables,num_data_points]") + raise ValueError( \ + "data must be a rank-2 array of shape [num_variables,num_data_points]" + ) - #Set the rank of the data + # Set the rank of the data self.data_rank = data_rank - #Set the number of variables + # Set the number of variables self.num_variables = npy.shape(data)[0] - #Set the number of data points + # Set the number of data points self.num_data_points = npy.shape(data)[1] - #Check if we need log axes for any variables + # Check if we need log axes for any variables try: - #Check if an iterable was provided for log_axes + # Check if an iterable was provided for log_axes log_axes[0] - except: - #Otherwise set the array to be a list filled with the same value + except TypeError: + # Otherwise set the array to be a list filled with the same value log_axes = self.num_variables*[log_axes] - #Save the log_axes variable + # Save the log_axes variable self.log_axes = log_axes - #Loop over variables and take the logarithm of any with log axes + # Loop over variables and take the logarithm of any with log axes for v in range(self.num_variables): - #Take the logarithm of the given variable + # Take the logarithm of the given variable if log_axes[v]: - #Check wheter all the data are positive + # Check wheter all the data are positive if(npy.amin(data[v,:]) <= 0): - raise ValueError("logarithmic axes were specified for variable {}, but that variable contains values less than 0: min = {}".format(v,npy.amin(data[v,:]))) + raise ValueError(\ + (\ + "logarithmic axes were specified for variable {}, but" + + "that variable contains values less than 0: min = {}"\ + ).format(v,npy.amin(data[v,:])) + ) data[v,:] = npy.log(data[v,:]) @@ -178,46 +199,50 @@ def vprint(msg): if num_contiguous_hyper_volumes is not None: self.frac_contiguous_hyper_volumes = num_contiguous_hyper_volumes - vprint("Operating on data with num_variables = {}, num_data_points = {}".format(self.num_variables,self.num_data_points)) + vprint(\ + ( + "Operating on data with num_variables = {}, " + + "num_data_points = {}" + ).format(self.num_variables,self.num_data_points)) else: self.num_data_points = 0 - #Create a variable to hold the original PDF and axes + # Create a variable to hold the original PDF and axes self.originalPDF = None self.originalAxes = None - #Store the do_fft flag + # Store the do_fft flag self.do_fft = do_fft - #Save the marginals flag + # Save the marginals flag self.do_save_marginals = do_save_marginals if self.num_variables == 1: self.do_save_marginals = False - #Set whether to approximate the ECF using the FFT method + # Set whether to approximate the ECF using the FFT method self.do_approximate_ecf = do_approximate_ecf - #Set the approximate ECF precision + # Set the approximate ECF precision self.ecf_precision = ecf_precision - #Preinitialize the ecf threshold + # Preinitialize the ecf threshold self.ecfThreshold = None - #Flag whether to save the transformed kernel + # Flag whether to save the transformed kernel self.do_save_transformed_kernel = do_save_transformed_kernel - #initialize the kernel and its transform + # initialize the kernel and its transform self.kappaSC = None self.kSC = None self.positive_shift = positive_shift - #*********************** - # Calculate the x grids - #*********************** + # *********************** + # Calculate the x grids + # *********************** if(axes is None): - #Get the range of the data + # Get the range of the data self.xMin = npy.amin(data,1) self.xMax = npy.amax(data,1) @@ -225,28 +250,30 @@ def vprint(msg): vprint("\tminima: {}".format(self.xMin)) vprint("\tmaxima: {}".format(self.xMax)) - #Get the grid mid-points + # Get the grid mid-points midPoint = 0.5*(self.xMax + self.xMin) - #inflate the range by axis_expansion_factor to ensure that KDE power doesn't leak through - #the periodic axis boundary + # inflate the range by axis_expansion_factor to ensure that KDE power + # doesn't leak through the periodic axis boundary self.xMin += axis_expansion_factor*(self.xMin-midPoint) self.xMax += axis_expansion_factor*(self.xMax-midPoint) if num_points is None: - #Calculate the number of standard deviations there - # are in the data range + # Calculate the number of standard deviations there + # are in the data range dataRange = self.xMax - self.xMin numSigma = dataRange/npy.std(data,axis=1) - #Set the number of points for each dimensions - self.numXPoints = npy.array([next_highest_power_of_two(ns * num_points_per_sigma) + int(addOne) for ns in numSigma]) + # Set the number of points for each dimensions + self.numXPoints = npy.array(\ + [next_highest_power_of_two(ns * num_points_per_sigma) + + int(addOne) for ns in numSigma]) else: - #If we can iterate through + # If we can iterate through try: lenNum = len(num_points) isIterable = True - except: + except TypeError: isIterable = False lenNum = 1 @@ -254,43 +281,57 @@ def vprint(msg): if lenNum == self.num_variables: self.numXPoints = num_points else: - raise ValueError("len(num_points) = {}, but it should match num_variables = {}".format(lenNum,self.num_variables)) + raise ValueError(\ + ( + "len(num_points) = {}, but it should match" + + "num_variables = {}" + ).format(lenNum,self.num_variables) + ) else: self.numXPoints = npy.array(self.num_variables*(num_points,)) - #Set the grids for each dimension - self.axes = [ npy.linspace(xmin,xmax,np) for xmin,xmax,np in zip(self.xMin,self.xMax,self.numXPoints)] + # Set the grids for each dimension + self.axes = [ \ + npy.linspace(xmin,xmax,np) + for xmin,xmax,np in zip(self.xMin,self.xMax,self.numXPoints) + ] - vprint("Grids created with xmin: {}, xmax: {}, npoints: {}".format(self.xMin,self.xMax,self.numXPoints)) + vprint(\ + ( + "Grids created with xmin: {}, xmax: {}, " + + "npoints: {}" + ).format(self.xMin,self.xMax,self.numXPoints) + ) else: - #Set the xgrid from the function argument + # Set the xgrid from the function argument self.axes = axes self.xMin = npy.array([npy.amin(xg) for xg in axes]) self.xMax = npy.array([npy.amax(xg) for xg in axes]) self.numXPoints = npy.array([len(xg) for xg in axes]) - #Get the grid mid-points + # Get the grid mid-points self.midPoint = 0.5*(self.xMax + self.xMin) - #Set the midpoint of the incoming grid + # Set the midpoint of the incoming grid self.dataMid = 0.5*(self.xMax + self.xMin) - #Set the range to be +/- pi + # Set the range to be +/- pi self.dataNorm = (self.xMax - self.xMin)/(2.*npy.pi) - #Get the grid spacings + # Get the grid spacings self.deltaX = npy.array([ xg[1] - xg[0] for xg in self.axes]) - #Save xgrids as axes for backward compatibility + # Save xgrids as axes for backward compatibility self.xgrids = self.axes - #Check that the axes are regular and proper powers of two + # Check that the axes are regular and proper powers of two for v in range(self.num_variables): xg = self.axes[v] - dx = (xg[1:]-self.dataMid[v])/self.dataNorm[v] - (xg[:-1] - self.dataMid[v])/self.dataNorm[v] + dx = (xg[1:]-self.dataMid[v])/self.dataNorm[v] - \ + (xg[:-1] - self.dataMid[v])/self.dataNorm[v] dxdiff = dx - self.deltaX[v]/self.dataNorm[v] fTolerance = self.deltaX[v]/(1e4*self.dataNorm[v]) - #Check that these differences are less than 1/1e6 + # Check that these differences are less than 1/1e6 if(not all(npy.abs(dxdiff) < fTolerance)): raise ValueError("All grids in axes must be regularly spaced") @@ -301,108 +342,124 @@ def vprint(msg): else: extraStr = "" - raise ValueError("All grids in axes must be powers of 2" + extraStr + ", but got {}".format(len(xg))) + raise ValueError(\ + "All grids in axes must be powers of 2" + extraStr + + ", but got {}".format(len(xg)) + ) - #Calculate the frequency point grids (for 0-centered data) - self.tgrids = [ calc_t_from_x((xg-av)/sd) for xg,av,sd in zip(self.axes,self.dataMid,self.dataNorm) ] + # Calculate the frequency point grids (for 0-centered data) + self.tgrids = [\ + calc_t_from_x((xg-av)/sd) + for xg,av,sd in zip(self.axes,self.dataMid,self.dataNorm) + ] self.numTPoints = npy.array([len(tg) for tg in self.tgrids]) self.deltaT = npy.array([tg[2] - tg[1] for tg in self.tgrids]) self.phiSC = (0.0+0.0j)*npy.zeros(self.numTPoints) self.ECF = (0.0+0.0j)*npy.zeros(self.numTPoints) - #Initialize the good distribution index + # Initialize the good distribution index self.goodDistributionInds = [] - #Set the verbosity flag + # Set the verbosity flag self.be_verbose = be_verbose self.convolvedData = None - #Initialize the marginals + # Initialize the marginals self.marginalObjects = None if(data is not None): - #************************************************* - # Calculate the Empirical Characteristic Function - #************************************************* - #Note that this routine also standardizes the data on-the-fly + # ************************************************* + # Calculate the Empirical Characteristic Function + # ************************************************* + # Note that this routine also standardizes the data on-the-fly vprint("Calculating the ECF") - #Transfrom the data to 0-centered coordinates + # Transfrom the data to 0-centered coordinates for v in range(self.num_variables): data[v,:] = (data[v,:] - self.dataMid[v])/self.dataNorm[v] - #Calculate the ECF (see empiricalCharacteristicFunction.py) + # Calculate the ECF (see empiricalCharacteristicFunction.py) ecfObj = ecf.ECF( input_data = data, \ tgrids = self.tgrids, \ use_fft_approximation = self.do_approximate_ecf, \ precision = self.ecf_precision, \ be_verbose = self.be_verbose) - #Extract the ECF from the ECF object + # Extract the ECF from the ECF object self.ECF = ecfObj.ECF if(self.do_fft): - #************************************************* - # Apply the filter - #************************************************* - #Apply the Bernacchia and Pigolotti (2011) filter to the ECF to obtain - #the fourier representation of the self-consistent density + # ************************************************* + # Apply the filter + # ************************************************* + # Apply the Bernacchia and Pigolotti (2011) filter to the ECF to obtain + # the fourier representation of the self-consistent density vprint("Applying the filter") self.applyBernacchiaFilter() - #************************************************* - # Transform to real space - #************************************************* - #Transform the optimal distribution to real space + # ************************************************* + # Transform to real space + # ************************************************* + # Transform the optimal distribution to real space vprint("Transforming to real space") self.__transformphiSC__() - #Calculate and save the marginal distribution objects + # Calculate and save the marginal distribution objects if(self.do_save_marginals): self.marginalObjects = [] for i in range(self.num_variables): - self.marginalObjects.append(fastKDE(original_data[i,:], \ - axes = [self.originalAxes[i]], \ - positive_shift = self.positive_shift, \ - frac_contiguous_hyper_volumes = self.frac_contiguous_hyper_volumes, \ - log_axes = self.log_axes[i], \ - do_save_marginals = False) ) + self.marginalObjects.append( + fastKDE( + original_data[i,:], \ + axes = [self.originalAxes[i]], \ + positive_shift = self.positive_shift, \ + frac_contiguous_hyper_volumes = \ + self.frac_contiguous_hyper_volumes, \ + log_axes = self.log_axes[i], \ + do_save_marginals = False + ) + ) return - #***************************************************************************** - #** fastKDE: *********************************************** - #******************* applyBernacchiaFilter() ********************************* - #***************************************************************************** - #***************************************************************************** + # ***************************************************************************** + # ** fastKDE: *********************************************** + # ******************* applyBernacchiaFilter() ********************************* + # ***************************************************************************** + # ***************************************************************************** def applyBernacchiaFilter(self,doFlushArrays=True): """ Given an ECF, calculate the self-consistent density in fourier-space by applying the BP11 filter.""" - #Make an easy-to-read and float version of self.num_data_points + # Make an easy-to-read and float version of self.num_data_points N = float(self.num_data_points) - #Calculate the stability threshold for the ECF + # Calculate the stability threshold for the ECF ecfThresh = 4.*(N-1.)/(N*N) self.ecfThreshold = ecfThresh - #Calculate the squared magnitude of the ECF + # Calculate the squared magnitude of the ECF ecfSq = abs(self.ECF)**2 - #Find all hypervolumes where ecfSq is greater than the stability threshold + # Find all hypervolumes where ecfSq is greater than the stability threshold contiguousInds = flood.flood_fill_search(ecfSq,search_threshold = self.ecfThreshold) if contiguousInds == []: - raise RuntimeError("No ECF values found above the ECF threshold. max(ecfSq) = {}, ecfThresh = {}".format(npy.amax(ecfSq),ecfThresh)) - - #Sort them by distance from the center + raise RuntimeError(\ + ( + "No ECF values found above the ECF threshold. " + + "max(ecfSq) = {}, ecfThresh = {}" + ).format(npy.amax(ecfSq),ecfThresh) + ) + + # Sort them by distance from the center sortedInds = flood.sort_by_distance_from_center(contiguousInds,npy.shape(ecfSq)) - #Convert the fraction of hypervolumes to a number if needbe + # Convert the fraction of hypervolumes to a number if needbe numVolumes = len(sortedInds) if self.frac_contiguous_hyper_volumes >= 1: numVolumesToUse = int(self.frac_contiguous_hyper_volumes) @@ -411,195 +468,216 @@ def applyBernacchiaFilter(self,doFlushArrays=True): if numVolumesToUse < 1: numVolumesToUse = 1 - #Check that we don't exceed the number of provided volumes + # Check that we don't exceed the number of provided volumes if numVolumesToUse > numVolumes: numVolumesToUse = numVolumes - #Initialize the filtered value list + # Initialize the filtered value list iCalcPhi = self.num_variables*[npy.array([],dtype='int')] - #Pull out frac_contiguous_hyper_volumes of contiguous hyper volumes, in order of distance from - #the origin + # Pull out frac_contiguous_hyper_volumes of contiguous hyper volumes, in + # order of distance from the origin for i in range(numVolumesToUse): for n in range(self.num_variables): iCalcPhi[n] = npy.concatenate( (iCalcPhi[n],sortedInds[i][n]) ) - #Convert iCalcPhi to a list of tuples, such that it is compatible with the output of where() + # Convert iCalcPhi to a list of tuples, such that it is compatible with the + # output of where() if self.num_variables != 1: iCalcPhi = [ tuple(ii) for ii in iCalcPhi ] - # convert to a tuple, to avoid a numpy warning + # convert to a tuple, to avoid a numpy warning iCalcPhi = tuple(iCalcPhi) - #Save the filter + # Save the filter self.iCalcPhi = iCalcPhi - #If flagged, clear the phiSC array. This is needed if the same fastKDE object - #is reused for multiple data. + # If flagged, clear the phiSC array. This is needed if the same fastKDE object + # is reused for multiple data. if(doFlushArrays): self.phiSC[:] = (0.0+0.0j) - #Calculate the transform of the self-consistent Kernel (and only calculate it at - # points where ecfSq is above ecfThresh) + # Calculate the transform of the self-consistent Kernel (and only calculate it at + # points where ecfSq is above ecfThresh) kappaSC = (1.0+0.0j)*npy.zeros(npy.shape(self.ECF)) kappaSC[iCalcPhi] = (N/(2*(N-1)))\ *(1+npy.sqrt(1-ecfThresh/ecfSq[iCalcPhi])) - #Store the fourier kernel if we are going to save the transformed kernel + # Store the fourier kernel if we are going to save the transformed kernel if(self.do_save_transformed_kernel): self.kappaSC = kappaSC midPointAccessor = tuple([(tp-1)//2 for tp in self.numTPoints]) - #Calculate the transform of the self-consistent density estimate + # Calculate the transform of the self-consistent density estimate self.phiSC[iCalcPhi] = self.ECF[iCalcPhi]*kappaSC[iCalcPhi] if(self.be_verbose): - print("Normalization of kappaSC, ECF, and phiSC: {}, {}, {}".format(kappaSC[midPointAccessor],self.ECF[midPointAccessor],self.phiSC[midPointAccessor])) - - #***************************************************************************** - #** fastKDE: *********************************************** - #******************* findGoodDistributionInds() ****************************** - #***************************************************************************** - #***************************************************************************** + print( + ( + "Normalization of kappaSC, ECF, and phiSC: "+ + "{}, {}, {}" + ).format(\ + kappaSC[midPointAccessor], + self.ECF[midPointAccessor], + self.phiSC[midPointAccessor] + ) + ) + + # ***************************************************************************** + # ** fastKDE: *********************************************** + # ******************* findGoodDistributionInds() ****************************** + # ***************************************************************************** + # ***************************************************************************** def findGoodDistributionInds(self): """Find indices of the optimal distribution that are above 0.0""" return npy.where(self.pdf >= 0.0) - #***************************************************************************** - #** fastKDE: *********************************************** - #******************* findBadDistributionInds() ******************************* - #***************************************************************************** - #***************************************************************************** + # ***************************************************************************** + # ** fastKDE: *********************************************** + # ******************* findBadDistributionInds() ******************************* + # ***************************************************************************** + # ***************************************************************************** def findBadDistributionInds(self): """Find indices of the optimal distribution that are below 0.0""" return npy.where(self.pdf < 0.0) - #***************************************************************************** - #** fastKDE: *********************************************** - #******************* __transformphiSC__() ************************************ - #***************************************************************************** - #***************************************************************************** + # ***************************************************************************** + # ** fastKDE: *********************************************** + # ******************* __transformphiSC__() ************************************ + # ***************************************************************************** + # ***************************************************************************** def __transformphiSC__(self): """ Transform the self-consistent estimate of the distribution from frequency space to real space""" - #Transform the PDF estimate to real space - pdf = npy.fft.fftshift(npy.real(npy.fft.fftn(npy.fft.ifftshift(self.phiSC))))*npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables + # Transform the PDF estimate to real space + pdf = npy.fft.fftshift( + npy.real(npy.fft.fftn(npy.fft.ifftshift(self.phiSC)))) \ + *npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables - #Unnormalize it + # Unnormalize it pdf /= npy.prod(self.dataNorm) - #transpose the self-consistent density estimate + # transpose the self-consistent density estimate self.pdf = pdf.transpose() - #Shift the PDF such that the negative areas can be set to 0, while the positive area is - #still normalized to 1 + # Shift the PDF such that the negative areas can be set to 0, while the + # positive area is still normalized to 1 if self.positive_shift: if len(npy.where(self.pdf < 0)[0]) != 0: - #Define a function f(delta), such that f(delta) is how far off self.pdf-delta is - #from being normalized; hence, we want to find the zero of this function + + # Define a function f(delta), such that f(delta) is how far off + # self.pdf-delta is from being normalized; hence, we want to find + # the zero of this function def normFunc(delta): """Calculate how far off from normal is the shifted PDF""" ipos = npy.where((self.pdf-delta) >= 0.0) return 1 - sum((self.pdf[ipos]-delta)*npy.prod(self.deltaX)) - #Set the initial guess for the newton-raphson search - #a = -normFunc(0) + + # Set the initial guess for the newton-raphson search + # a = -normFunc(0) a = 0.0 - #Find the zero of the above function; i.e., find delta, such that the shifted PDF is - #normalized + + # Find the zero of the above function; i.e., find delta, such that + # the shifted PDF is normalized try: delta = scipy.optimize.newton(normFunc,a,maxiter=10000) - except: + except RuntimeError: delta = 0.0 - #Check if the positive shift method failed + # Check if the positive shift method failed if not npy.isfinite(delta) or delta < 0 or delta >= npy.amax(self.pdf): if self.be_verbose: print('positive_shift algorithm failure: defaulting to no shift') delta = 0.0 - #If a shift is provided, do the shift + # If a shift is provided, do the shift if delta != 0.0: - #Shift the PDF + # Shift the PDF self.pdf -= delta - #And set the negative values to 0 + # And set the negative values to 0 self.pdf[npy.where(self.pdf < 0)] = 0.0 if(self.be_verbose): normConst = sum(pdf*npy.prod(self.deltaX)) midPointAccessor = tuple([(tp-1)//2 for tp in self.numTPoints]) - print("Normalization of pdf = {}. phiSC[0] = {}".format(normConst,self.phiSC[midPointAccessor])) + print(\ + ( + "Normalization of pdf = {}. " + + "phiSC[0] = {}" + ).format(normConst,self.phiSC[midPointAccessor]) + ) - #Save the original PDF and axes + # Save the original PDF and axes self.originalPDF = npy.array(self.pdf) self.originalAxes = list(self.axes) - #Check if any variables need to be transformed due to use - #of logarithmic axes + # Check if any variables need to be transformed due to use + # of logarithmic axes for v in range(self.num_variables): if self.log_axes[v]: - #Transform the axis back to data space + # Transform the axis back to data space self.axes[v] = npy.exp(self.axes[v]) - #Generate a slice to help the axis conform in shape to the PDF + # Generate a slice to help the axis conform in shape to the PDF conformSlice = self.num_variables*[npy.newaxis] conformSlice[v] = slice(None,None,None) conformSlice = tuple(conformSlice) - #Transform the PDF + # Transform the PDF self.pdf /= self.axes[v][conformSlice[::-1]] - #Set self.fSC for backward compatibility + # Set self.fSC for backward compatibility self.fSC = self.pdf - #Take the transform of the self-consistent kernel if flagged + # Take the transform of the self-consistent kernel if flagged if(self.do_save_transformed_kernel): - kSC = npy.fft.fftshift(npy.real(npy.fft.fftn(npy.fft.ifftshift(self.kappaSC))))*npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables + kSC = npy.fft.fftshift(\ + npy.real(npy.fft.fftn(npy.fft.ifftshift(self.kappaSC))))\ + *npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables kSC /= npy.prod(self.dataNorm) self.kSC = kSC.transpose() - #***************************************************************************** - #** fastKDE: *********************************************** - #******************* __transformphiSC_points__() ************************************ - #***************************************************************************** - #***************************************************************************** + # ***************************************************************************** + # ** fastKDE: *********************************************** + # ******************* __transformphiSC_points__() ************************************ + # ***************************************************************************** + # ***************************************************************************** def __transformphiSC_points__(self,list_of_points): """ Transform the self-consistent estimate of the distribution from frequency space to a set of points in real space""" - # transform the PDF estimate to real space - #pdf = dft_points(array(self.tgrids,dtype=float),self.phiSC,list_of_points)*prod(self.deltaT)*(1./(2*pi))**self.num_variables - - #Transfrom the point to 0-centered coordinates + # Transfrom the point to 0-centered coordinates list_of_points = npy.array(list_of_points,copy=True) for v in range(self.num_variables): list_of_points[v,:] = (list_of_points[v,:] - self.dataMid[v])/self.dataNorm[v] - #Set the fill value for the frequency grids + # Set the fill value for the frequency grids fillValue = -1e20 - #Get the maximum frequency grid length + # Get the maximum frequency grid length tgrids = self.tgrids ntmax = npy.amax([len(tgrid) for tgrid in tgrids]) - #Create the frequency grids array + + # Create the frequency grids array frequencyGrids = fillValue*npy.ones([self.num_variables,ntmax]) - #Fill the frequency grids array + # Fill the frequency grids array for v in range(self.num_variables): frequencyGrids[v,:len(tgrids[v])] = tgrids[v] - # do the inverse direct Fourier transform + # do the inverse direct Fourier transform pdf = dft_points(frequencyGrids, self.phiSC.ravel(), list_of_points, missingFreqVal = fillValue) * \ npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables - # unstandarize the PDF + # unstandarize the PDF pdf /= npy.prod(self.dataNorm) - # TODO: implement positive_shift for point-based PDF estimates - # TODO: implement log_axes for point-based PDF estimates + # TODO: implement positive_shift for point-based PDF estimates + # TODO: implement log_axes for point-based PDF estimates return pdf @@ -622,33 +700,32 @@ def estimateConditionals(self,variables,data,peakFrac = 0.0,reApplyFilter=False) input: ------ - variables : A integer or tuple of array indicies indicating the variables - on which to condition e.g., For a 2D PDF, - - obj.estimateConditionals(1) estimates - P(x_0 | x_1) from the joint PDF P(x_0,x_1) that is - the result of the self-consistent density estimate. - - For a 3D PDF: - - obj.estimateConditionals( (0,2) ) estimates - P( x_0, x_2 | x_1) from P(x_0,x_1,x_2) - - If all possible variables are listed, the copula - is returned instead. - - If negative values are provided, variables are wrapped - (i.e., index -1 indicates the last variable) - - data : The data original used to create the - fastKDE object. This is - needed to calculated the various marginals required - in the conditional computation. - - peakFrac : The fractional threshold below which to truncate the - marginal PDF (to avoid divding by small numbers); - this is the fraction of the height of the mode. - + variables : A integer or tuple of array indicies indicating the + variables on which to condition e.g., For a 2D PDF, + + obj.estimateConditionals(1) estimates P(x_0 | x_1) + from the joint PDF P(x_0,x_1) that is the result of + the self-consistent density estimate. + + For a 3D PDF: + + obj.estimateConditionals( (0,2) ) estimates P( x_0, + x_2 | x_1) from P(x_0,x_1,x_2) + + If all possible variables are listed, the copula is + returned instead. + + If negative values are provided, variables are wrapped + (i.e., index -1 indicates the last variable) + + data : The data original used to create the fastKDE object. + This is needed to calculated the various marginals + required in the conditional computation. + + peakFrac : The fractional threshold below which to truncate the + marginal PDF (to avoid divding by small numbers); this + is the fraction of the height of the mode. + reapplyFilter : Flags whether to reapply the ECF filter to the conditional output: @@ -661,22 +738,22 @@ def estimateConditionals(self,variables,data,peakFrac = 0.0,reApplyFilter=False) data = npy.array(data) - #If the data are univariate, simply return the PDF itself + # If the data are univariate, simply return the PDF itself if(self.num_variables == 1): return self.pdf - #Check that we can interpret the variables tuple + # Check that we can interpret the variables tuple try: len(variables) - except: + except TypeError: try: range(variables) variables = (variables,) - except: + except TypeError: raise ValueError("variables appears to be neither a tuple or an integer") - #Check that the variable indices are sane + # Check that the variable indices are sane rightSideVariableIndices = [] for ind in tuple(variables): if ind > self.num_variables-1: @@ -689,58 +766,67 @@ def estimateConditionals(self,variables,data,peakFrac = 0.0,reApplyFilter=False) dum = ind rightSideVariableIndices.append(dum) - #Pull the unique indices and make sure they are sorted + # Pull the unique indices and make sure they are sorted rightSideVariableIndices = tuple(sorted(list(set(rightSideVariableIndices)))) if len(rightSideVariableIndices) > self.num_variables: - raise ValueError("More indices were provided in 'variables' than there are variables.") + raise ValueError( + "More indices were provided in 'variables' than there are " + "variables." + ) - #Check if all variables were provided + # Check if all variables were provided if len(rightSideVariableIndices) == self.num_variables: return self.getCopula(data) - #If there are no right side variables, return the PDF + # If there are no right side variables, return the PDF if len(rightSideVariableIndices) == 0: return self.pdf - #Create the list of left-side variable indices + # Create the list of left-side variable indices leftSideVariableIndices = list(range(self.num_variables)) for ind in sorted(rightSideVariableIndices)[::-1]: leftSideVariableIndices.pop(ind) - #Calculate the marginal PDF - marginalObject = fastKDE( data[rightSideVariableIndices,:], \ - axes = [self.originalAxes[i] for i in rightSideVariableIndices], \ - positive_shift = self.positive_shift, \ - frac_contiguous_hyper_volumes = self.frac_contiguous_hyper_volumes, \ - log_axes = [ self.log_axes[i] for i in rightSideVariableIndices], \ - do_save_marginals = False) - - #Make the shape of the new marginal object match that of the original PDF - #(using the magic of the numpy newaxis) + # Calculate the marginal PDF + marginalObject = fastKDE(\ + data[rightSideVariableIndices,:], \ + axes = [self.originalAxes[i] for i in rightSideVariableIndices], \ + positive_shift = self.positive_shift, \ + frac_contiguous_hyper_volumes = self.frac_contiguous_hyper_volumes, \ + log_axes = [ self.log_axes[i] for i in rightSideVariableIndices], \ + do_save_marginals = False + ) + + # Make the shape of the new marginal object match that of the original PDF + # (using the magic of the numpy newaxis) conformantSlice = list(self.num_variables*(slice(None,None,None),)) - #Insert a newaxis for each of the left-side indices + # Insert a newaxis for each of the left-side indices sumAxes = [] for ind in leftSideVariableIndices: - #The PDF object has var0 in its rightmost axis, so transform ind - #accordingly (it references as though var0 is the leftmost axis) + # The PDF object has var0 in its rightmost axis, so transform ind + # accordingly (it references as though var0 is the leftmost axis) ip = self.num_variables - ind - 1 conformantSlice[ip] = npy.newaxis - #Add this index to the list of axes over which to sum for normalization + # Add this index to the list of axes over which to sum for normalization sumAxes.append(ip) conformantSlice = tuple(conformantSlice) marginalThreshold = peakFrac*npy.amax(marginalObject.pdf) - #Create and mask the marginal PDF - marginalPDF = npy.ma.masked_less_equal(marginalObject.pdf[conformantSlice],marginalThreshold) + # Create and mask the marginal PDF + marginalPDF = npy.ma.masked_less_equal(\ + marginalObject.pdf[conformantSlice],marginalThreshold + ) - #Calculate the conditional PDF + # Calculate the conditional PDF conditionalPDF = npy.ma.array(self.pdf)/marginalPDF - #Refilter the conditional + # Refilter the conditional if(reApplyFilter): - conditionalPDF = npy.ma.masked_less_equal(self.reApplyFilter(conditionalPDF),0.0) + conditionalPDF = npy.ma.masked_less_equal(\ + self.reApplyFilter(conditionalPDF),0.0 + ) - #Calculate the normalization matrix + # Calculate the normalization matrix dxs = [npy.diff(self.axes[i]) for i in leftSideVariableIndices] dxs = [npy.concatenate((dx,[dx[-1]])) for dx in dxs] if len(dxs) == 1: @@ -753,57 +839,67 @@ def estimateConditionals(self,variables,data,peakFrac = 0.0,reApplyFilter=False) cslice = tuple(cslice) dxProd = dxProd[cslice[::-1]] - normFactor = npy.ma.masked_equal(sum(conditionalPDF*dxProd,axis=tuple(sumAxes)),0.0) + normFactor = npy.ma.masked_equal(\ + sum(conditionalPDF*dxProd,axis=tuple(sumAxes)),0.0 + ) - #Normalize the conditional PDF for the leftside variables + # Normalize the conditional PDF for the leftside variables conditionalPDF /= normFactor[conformantSlice] return conditionalPDF - #***************************************************************************** - #** fastKDE: ******************************************* - #******************* getCopula ****************************************** - #***************************************************************************** - #***************************************************************************** + # ***************************************************************************** + # ** fastKDE: ******************************************* + # ******************* getCopula ****************************************** + # ***************************************************************************** + # ***************************************************************************** def getCopula(self,data=None,peakFrac = 0.0): """Estimates the copula of the underlying PDF""" - #If the data are univariate, simply return the PDF itself + # If the data are univariate, simply return the PDF itself if(self.num_variables == 1): return self.pdf - #Check if we need to calculate the marginal distributions + # Check if we need to calculate the marginal distributions if(not self.do_save_marginals): if(data is None): - raise ValueError("the data must be provided as argument 'data', if do_save_marginals=False when the original PDF was calculated") + raise ValueError(\ + "the data must be provided as argument 'data', if " + "do_save_marginals=False when the original PDF was calculated" + ) else: - #Estimate the marginal distributions + # Estimate the marginal distributions marginalObjects = [] for i in range(self.num_variables): - marginalObjects.append(fastKDE(data[i,:], \ - axes = [self.originalAxes[i]], \ - positive_shift = self.positive_shift, \ - frac_contiguous_hyper_volumes = self.frac_contiguous_hyper_volumes, \ - log_axes = self.log_axes[i], \ - do_save_marginals = False)) + marginalObjects.append( + fastKDE( \ + data[i,:], \ + axes = [self.originalAxes[i]], \ + positive_shift = self.positive_shift, \ + frac_contiguous_hyper_volumes = \ + self.frac_contiguous_hyper_volumes, \ + log_axes = self.log_axes[i], \ + do_save_marginals = False + ) + ) else: - #If not, just use the saved marginals + # If not, just use the saved marginals marginalObjects = self.marginalObjects - #Calculate the marginal distributions and mask bad (or zero) values + # Calculate the marginal distributions and mask bad (or zero) values marginals = [] for obj in marginalObjects: - #Add the marginal to the list while masking <0 values + # Add the marginal to the list while masking <0 values marginalThreshold = peakFrac*npy.amax(obj.pdf) - #Create and mask the marginal PDF + # Create and mask the marginal PDF marginals.append(npy.ma.masked_less_equal(obj.pdf,marginalThreshold)) - #Calculate the PDF assuming independent marginals + # Calculate the PDF assuming independent marginals independencePDF = npy.ma.prod(npy.meshgrid(*tuple(marginals)),axis=0) - #Divide off the indepdencnce PDF to calculate the copula - #actualPDF = ma.array(self.pdf) - #actualPDF[self.findBadDistributionInds()] = ma.masked + # Divide off the indepdencnce PDF to calculate the copula + # actualPDF = ma.array(self.pdf) + # actualPDF[self.findBadDistributionInds()] = ma.masked actualPDF = npy.ma.array(self.pdf) copulaPDF = actualPDF/independencePDF @@ -812,55 +908,66 @@ def getCopula(self,data=None,peakFrac = 0.0): def reApplyFilter(self,pdf): """Reapplies the filter to a PDF estimate. - This is used, e.g., to remove high-frequency noise that results from calculting the conditionals. + This is used, e.g., to remove high-frequency noise that results from + calculting the conditionals. """ - - #Transform the PDF to fourier space - phiTilde_tmp = npy.fft.fftshift(npy.fft.ifftn(npy.fft.ifftshift(npy.ma.filled(pdf,0.0)))) - #Normalize the transform + + # Transform the PDF to fourier space + phiTilde_tmp = npy.fft.fftshift(\ + npy.fft.ifftn(npy.fft.ifftshift(npy.ma.filled(pdf,0.0))) + ) + # Normalize the transform midPointAccessor = tuple([(tp-1)//2 for tp in self.numTPoints]) phiTilde_tmp /= phiTilde_tmp[midPointAccessor] - #Reapply the filter + # Reapply the filter phiTilde = (0.0+0.0j)*npy.zeros(npy.shape(phiTilde_tmp)) phiTilde[self.iCalcPhi] = phiTilde_tmp[self.iCalcPhi] - #Transform back to real space - #Transform the PDF estimate to real space - pdf = npy.fft.fftshift(npy.real(npy.fft.fftn(npy.fft.ifftshift(phiTilde))))*npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables + # Transform back to real space + # Transform the PDF estimate to real space + pdf = npy.fft.fftshift(\ + npy.real(npy.fft.fftn(npy.fft.ifftshift(phiTilde)))) \ + *npy.prod(self.deltaT)*(1./(2*npy.pi))**self.num_variables - #Return the transpose of the PDF + # Return the transpose of the PDF return pdf.transpose() - #***************************************************************************** - #** fastKDE: *********************************************** - #******************* Addition operator __add__ ******************************* - #***************************************************************************** - #***************************************************************************** + # ***************************************************************************** + # ** fastKDE: *********************************************** + # ******************* Addition operator __add__ ******************************* + # ***************************************************************************** + # ***************************************************************************** def __add__(self,rhs): """ Addition operator for the fastKDE object. Adds the empirical characteristic functions of the two estimates, reapplies the BP11 filter, and transforms back to real space. This is useful for parallelized calculation of densities. Note that this only works if the axes are the same for both operands.""" - #Check for proper typing + # Check for proper typing if(not isinstance(rhs,fastKDE)): - raise TypeError("unsupported operand type(s) for +: {} and {}".format(type(self),type(rhs))) + raise TypeError(\ + "unsupported operand type(s) for +: " + "{} and {}".format(type(self),type(rhs)) + ) - #Check that the axes are the same for both objects + # Check that the axes are the same for both objects for sxg,rxg in zip(self.axes,rhs.axes): if not all(npy.isclose(sxg,rxg)): - raise NotImplementedError("addition for operands with different axes is not yet implemented.") + raise NotImplementedError(\ + "addition for operands with different axes is not yet " + "implemented." + ) retObj = copy.deepcopy(self) retObj.phiSC = (0.0+0.0j)*npy.zeros(self.numTPoints) retObj.num_data_points += rhs.num_data_points - #Convert the returned variance back into standard deviation + # Convert the returned variance back into standard deviation retObj.dataStandardDeviation = npy.sqrt(retObj.dataStandardDeviation) - #Average the Empirical Characteristic Function of the two objects + # Average the Empirical Characteristic Function of the two objects retObj.ECF = (self.num_data_points*self.ECF + rhs.num_data_points*rhs.ECF) \ /retObj.num_data_points @@ -868,7 +975,7 @@ def __add__(self,rhs): retObj.applyBernacchiaFilter() retObj.__transformphiSC__() - #Return the new object + # Return the new object return retObj def pdf(*args,**kwargs): @@ -877,18 +984,18 @@ def pdf(*args,**kwargs): input: ------ - var1 : An input variable. - - var2, var3... : Additional input varibles whose length - corresponds to the length of var1. As input - variables are added, the dimensionality of the - resulting PDF increases (e.g., supplying var1 - and var2 results in a 2D PDF). - - **kwargs : Any additional keyword arguments get passed directly to - fastKDE.fastKDE(); see the docstring of fastKDE.fastKDE() for - details of kwargs. + var1 : An input variable. + var2, var3... : Additional input varibles whose length corresponds + to the length of var1. As input variables are + added, the dimensionality of the resulting PDF + increases (e.g., supplying var1 and var2 results + in a 2D PDF). + + **kwargs : Any additional keyword arguments get passed + directly to fastKDE.fastKDE(); see the docstring + of fastKDE.fastKDE() for details of kwargs. + returns: -------- @@ -904,64 +1011,74 @@ def pdf(*args,**kwargs): method grows exponentially with the number of input variables. """ - #Try to get var1 from the args or kwargs + # Try to get var1 from the args or kwargs try: var1 = args[0] - except: + except IndexError: try: var1 = kwargs['var1'] - except: + except KeyError: raise ValueError("No input data were provided.") - #Check that var1 is arraylike + # Check that var1 is arraylike try: var1Shape = npy.shape(var1) - except BaseException as e: - print(e) - raise ValueError("Could not get shape of var1; it does not appear to be array-like.") + assert len(var1Shape) != 0, "var1 is not an array" + except TypeError or AssertionError: + raise ValueError( + "Could not get shape of var1; it does not appear to be array-like." + ) - #Check that var1 is a vector + # Check that var1 is a vector if len(var1Shape) != 1: - raise ValueError("var1 should be a vector. If multiple variables are combined in a single array, please use the fastKDE class interface instead.") + raise ValueError(\ + "var1 should be a vector. If multiple variables are combined in a " + "single array, please use the fastKDE class interface instead." + ) - #Get the length of var1 + # Get the length of var1 N = var1Shape[0] - #Check for input varibles provided as key word arguments + # Check for input varibles provided as key word arguments varArgs = [] varKeys = sorted([ v for v in kwargs if "var" in v ]) for key in varKeys: - #Ignore var1 since this was either provided as an argument - #or was read as a keyword argument above + # Ignore var1 since this was either provided as an argument + # or was read as a keyword argument above if key != "var1": try: - varNum = int(key[3:]) - except BaseException as e: - print(e) - raise ValueError("Incomprehensible variable-like keyword provided: {}".format(key)) - - #Append this variable + int(key[3:]) + except ValueError: + raise ValueError(\ + "Incomprehensible variable-like keyword provided: " + "{}".format(key) + ) + + # Append this variable varArgs.append(kwargs[key]) - #Check if a mixture of keyword and arguments were provided for additional variables + # Check if a mixture of keyword and arguments were provided for additional variables if len(varArgs) != 0 and len(args) > 1: - raise ValueError("additional variables were provided as a mixture of arguments and keyword arguments. They all must be one or the other.") + raise ValueError(\ + "additional variables were provided as a mixture of arguments and " + "keyword arguments. They all must be one or the other." + ) - #Set the additional variables to be the rest of the input arguments - #if none were provided as key word arguments + # Set the additional variables to be the rest of the input arguments + # if none were provided as key word arguments if len(args) > 1: varArgs = args[1:] - #Remove the variables from kwargs + # Remove the variables from kwargs for key in list(varKeys): del(kwargs[key]) - #Start preparing the input data for - #concatenation + # Start preparing the input data for + # concatenation inputVariables = npy.array(var1[npy.newaxis,:]) - #Attempt to read additional variables - #and concatenate them to the input variable + # Attempt to read additional variables + # and concatenate them to the input variable for i in range(len(varArgs)): try: varn = npy.array(varArgs[i][npy.newaxis,:]) @@ -971,20 +1088,25 @@ def pdf(*args,**kwargs): lenN = npy.shape(varn)[1] if lenN != N: - raise ValueError("len(var{}) is {}, but it should be the same of len(var1) = {}".format(i+1,lenN,N)) + raise ValueError(\ + ( + "len(var{}) is {}, but it should be the same of len(var1) "+ + "= {}" + ).format(i+1,lenN,N) + ) inputVariables = npy.concatenate((inputVariables,varn)) - #Remove the do_save_marginals keyword argument + # Remove the do_save_marginals keyword argument try: - do_save_marginals = kwargs['do_save_marginals'] + kwargs['do_save_marginals'] del(kwargs['do_save_marginals']) - except: + except KeyError: pass - #Calculate the PDF + # Calculate the PDF _pdfobj = fastKDE(inputVariables, \ do_save_marginals = False, \ **kwargs @@ -1006,12 +1128,15 @@ def conditional( \ conditioningVars : A vector of conditioning values, or a list of such vectors - **kwargs : Any additional keyword arguments get passed directly to - fastKDE.fastKDE() or fastKDE.estimateConditionals(); see - the docstrings of fastKDE.fastKDE() and fastKDE.estimateConditionals() - for details of kwargs. + **kwargs : Any additional keyword arguments get passed + directly to fastKDE.fastKDE() or + fastKDE.estimateConditionals(); see the + docstrings of fastKDE.fastKDE() and + fastKDE.estimateConditionals() for details of + kwargs. - Note the following two arguments have different default values here: + Note the following two arguments have different + default values here: positive_shift=True by default, and peakFrac = 0.01 by default. @@ -1031,112 +1156,120 @@ def conditional( \ import pylab as PP from numpy import * - #*************************** - # Generate random samples - #*************************** - # Stochastically sample from the function underlyingFunction() (a sigmoid): - # sample the absicissa values from a gamma distribution - # relate the ordinate values to the sample absicissa values and add - # noise from a normal distribution + # *************************** + # Generate random samples + # *************************** + # Stochastically sample from the function underlyingFunction() (a sigmoid): + # sample the absicissa values from a gamma distribution + # relate the ordinate values to the sample absicissa values and add + # noise from a normal distribution - #Set the number of samples + # Set the number of samples numSamples = int(1e6) - #Define a sigmoid function + # Define a sigmoid function def underlyingFunction(x,x0=305,y0=200,yrange=4): return (yrange/2)*tanh(x-x0) + y0 - xp1,xp2,xmid = 5,2,305 #Set gamma distribution parameters - yp1,yp2 = 0,12 #Set normal distribution parameters (mean and std) + xp1,xp2,xmid = 5,2,305 # Set gamma distribution parameters + yp1,yp2 = 0,12 # Set normal distribution parameters (mean and std) - #Generate random samples of X from the gamma distribution + # Generate random samples of X from the gamma distribution x = -(random.gamma(xp1,xp2,int(numSamples))-xp1*xp2) + xmid - #Generate random samples of y from x and add normally distributed noise + # Generate random samples of y from x and add normally distributed noise y = underlyingFunction(x) + random.normal(loc=yp1,scale=yp2,size=numSamples) - #*************************** - # Calculate the conditional - #*************************** + # *************************** + # Calculate the conditional + # *************************** pOfYGivenX,axes = fastKDE.conditional(y,x) ``` """ - #Check whether inputVars is an iterable of vectors or a single vector; - #ensure it is an iterable + # Check whether inputVars is an iterable of vectors or a single vector; + # ensure it is an iterable try: ivarLengths = [len(v) for v in inputVars] - except: + except TypeError: inputVars = [inputVars] ivarLengths = [len(v) for v in inputVars] - #Check whether conditioningVars is an iterable of vectors or a single vector; - #ensure it is an iterable + # Check whether conditioningVars is an iterable of vectors or a single vector; + # ensure it is an iterable try: cvarLengths = [len(v) for v in conditioningVars] - except: + except TypeError: conditioningVars = [conditioningVars] cvarLengths = [len(v) for v in conditioningVars] - #Create a list of all variables + # Create a list of all variables fullVarList = conditioningVars + inputVars - #Check that all input variable lengths are the same + # Check that all input variable lengths are the same if not all(npy.array([len(v) for v in fullVarList]) == ivarLengths[0]): - raise ValueError("inputVars and conditioningVars all must be the same length. Got {} for inputVars and {} for conditioningVars".format(ivarLengths,cvarLengths)) - - #Extract the peakFrac argument + raise ValueError(\ + ( + "inputVars and conditioningVars all must be the same length. " + + "Got {} for inputVars and {} for conditioningVars" + ).format(ivarLengths,cvarLengths) + ) + + # Extract the peakFrac argument if 'peakFrac' in kwargs: peakFrac = kwargs['peakFrac'] del(kwargs['peakFrac']) else: peakFrac = 0.01 - #Default to positive_shift=True + # Default to positive_shift=True if 'positive_shift' in kwargs: positive_shift = kwargs['positive_shift'] del(kwargs['positive_shift']) else: positive_shift = True - #Estimate the full joint PDF + # Estimate the full joint PDF _pdf = fastKDE(npy.array(fullVarList),positive_shift=positive_shift,**kwargs) - #Set the indices of the conditional variables + # Set the indices of the conditional variables cvarInds = list(range(len(conditioningVars))) - #Estimate the conditional - cpdf = _pdf.estimateConditionals(cvarInds,npy.array(fullVarList),peakFrac = peakFrac) + # Estimate the conditional + cpdf = _pdf.estimateConditionals( \ + cvarInds,npy.array(fullVarList),peakFrac = peakFrac + ) - #Return the conditional and the axes + # Return the conditional and the axes return cpdf,_pdf.axes def pdf_at_points(*args,**kwargs): - """Estimate the self-consistent kernel density estimate of the input data at a fixed set of points. - + """ Estimate the self-consistent kernel density estimate of the input data + at a fixed set of points. + input: ------ - var1 : An input variable. - - var2, var3... : Additional input varibles whose length - corresponds to the length of var1. As input - variables are added, the dimensionality of the - resulting PDF increases (e.g., supplying var1 - and var2 results in a 2D PDF). - - list_of_points : Points at which the PDF should be estimated. - Points should be provided as a list of tuples, - where each tuple contains a point at which the - PDF should be estimated. If not provided, the - input data points will be used by default. - - **kwargs : Any additional keyword arguments get passed directly to - fastKDE.fastKDE(); see the docstring of fastKDE.fastKDE() for - details of kwargs. - + var1 : An input variable. + + var2, var3... : Additional input varibles whose length + corresponds to the length of var1. As input + variables are added, the dimensionality of the + resulting PDF increases (e.g., supplying var1 + and var2 results in a 2D PDF). + + list_of_points : Points at which the PDF should be estimated. + Points should be provided as a list of tuples, + where each tuple contains a point at which the + PDF should be estimated. If not provided, the + input data points will be used by default. + + **kwargs : Any additional keyword arguments get passed + directly to fastKDE.fastKDE(); see the docstring + of fastKDE.fastKDE() for details of kwargs. + returns: -------- @@ -1156,64 +1289,74 @@ def pdf_at_points(*args,**kwargs): """ - #Try to get var1 from the args or kwargs + # Try to get var1 from the args or kwargs try: var1 = args[0] - except: + except IndexError: try: var1 = kwargs['var1'] - except: + except KeyError: raise ValueError("No input data were provided.") - #Check that var1 is arraylike + # Check that var1 is arraylike try: var1Shape = npy.shape(var1) - except BaseException as e: - print(e) - raise ValueError("Could not get shape of var1; it does not appear to be array-like.") + assert len(var1Shape) != 0, "var1 is not an array" + except ValueError or AssertionError: + raise ValueError(\ + "Could not get shape of var1; it does not appear to be array-like." + ) - #Check that var1 is a vector + # Check that var1 is a vector if len(var1Shape) != 1: - raise ValueError("var1 should be a vector. If multiple variables are combined in a single array, please use the fastKDE class interface instead.") + raise ValueError( + "var1 should be a vector. If multiple variables are combined in a " + "single array, please use the fastKDE class interface instead." + ) - #Get the length of var1 + # Get the length of var1 N = var1Shape[0] - #Check for input varibles provided as key word arguments + # Check for input varibles provided as key word arguments varArgs = [] varKeys = sorted([ v for v in kwargs if "var" in v ]) for key in varKeys: - #Ignore var1 since this was either provided as an argument - #or was read as a keyword argument above + # Ignore var1 since this was either provided as an argument + # or was read as a keyword argument above if key != "var1": try: - varNum = int(key[3:]) - except BaseException as e: - print(e) - raise ValueError("Incomprehensible variable-like keyword provided: {}".format(key)) - - #Append this variable + int(key[3:]) + except ValueError: + raise ValueError(\ + "Incomprehensible variable-like keyword provided: " + "{}".format(key) + ) + + # Append this variable varArgs.append(kwargs[key]) - #Check if a mixture of keyword and arguments were provided for additional variables + # Check if a mixture of keyword and arguments were provided for additional variables if len(varArgs) != 0 and len(args) > 1: - raise ValueError("additional variables were provided as a mixture of arguments and keyword arguments. They all must be one or the other.") + raise ValueError(\ + "additional variables were provided as a mixture of arguments and " + "keyword arguments. They all must be one or the other." + ) - #Set the additional variables to be the rest of the input arguments - #if none were provided as key word arguments + # Set the additional variables to be the rest of the input arguments + # if none were provided as key word arguments if len(args) > 1: varArgs = args[1:] - #Remove the variables from kwargs + # Remove the variables from kwargs for key in list(varKeys): del(kwargs[key]) - #Start preparing the input data for - #concatenation + # Start preparing the input data for + # concatenation inputVariables = npy.array(var1[npy.newaxis,:]) - #Attempt to read additional variables - #and concatenate them to the input variable + # Attempt to read additional variables + # and concatenate them to the input variable for i in range(len(varArgs)): try: varn = npy.array(varArgs[i][npy.newaxis,:]) @@ -1223,65 +1366,81 @@ def pdf_at_points(*args,**kwargs): lenN = npy.shape(varn)[1] if lenN != N: - raise ValueError("len(var{}) is {}, but it should be the same of len(var1) = {}".format(i+1,lenN,N)) + raise ValueError( + ( + "len(var{}) is {}, but it should be the same of len(var1) " + + "= {}" + ).format(i+1,lenN,N) + ) inputVariables = npy.concatenate((inputVariables,varn)) - # check if list_of_points was provided + # check if list_of_points was provided list_of_points_provided_in_kwargs = False try: - # extract the list of points + # extract the list of points list_of_points = kwargs["list_of_points"] - # delete it from the keyword argument dictionary + # delete it from the keyword argument dictionary del(kwargs["list_of_points"]) - # flag that this argument was provided + # flag that this argument was provided list_of_points_provided_in_kwargs = True - except: - # default to using the input points as the list of points + except KeyError: + # default to using the input points as the list of points list_of_points = inputVariables - # make sure list_of_points is in the expected format + # make sure list_of_points is in the expected format if list_of_points_provided_in_kwargs: try: list_of_points = npy.array(list_of_points,copy=True,dtype=npy.float_).T - except: + except ValueError: raise RuntimeError("Could not convert list_of_points to a numpy array.") - # check the rank of the input data points + # check the rank of the input data points data_rank = len(npy.shape(list_of_points)) - #If the data are a vector, promote the data to a rank-1 array with only 1 column + + # If the data are a vector, promote the data to a rank-1 array with only + # 1 column if(data_rank == 1): list_of_points = npy.array(list_of_points[npy.newaxis,:],dtype=npy.float_) if(data_rank > 2): - # raise an error indicating the proper shape for list_of_points - raise ValueError("list_of_points must be able to be broadcast to a rank-2 array of shape [num_data_points,num_variables]") + # raise an error indicating the proper shape for list_of_points + raise ValueError( + "list_of_points must be able to be broadcast to a rank-2 array " + "of shape [num_data_points,num_variables]" + ) - #Remove the do_save_marginals keyword argument + # Remove the do_save_marginals keyword argument try: _ = kwargs['do_save_marginals'] del(kwargs['do_save_marginals']) - except: + except KeyError: pass - #Remove the log_axes argument + # Remove the log_axes argument try: _ = kwargs['log_axes'] del(kwargs['log_axes']) - warnings.warn("fastKDE.pdf_at_points() does not currently support the log_axes option; it will be ignored.") - except: + warnings.warn(\ + "fastKDE.pdf_at_points() does not currently support the log_axes " + "option; it will be ignored." + ) + except KeyError: pass - #Remove the positive_shift argument + # Remove the positive_shift argument try: _ = kwargs['positive_shift'] del(kwargs['positive_shift']) - warnings.warn("fastKDE.pdf_at_points() does not currently support the positive_shift option; it will be ignored.") - except: + warnings.warn(\ + "fastKDE.pdf_at_points() does not currently support the " + "positive_shift option; it will be ignored." + ) + except KeyError: pass - #Calculate the PDF in Fourier space + # Calculate the PDF in Fourier space _pdfobj = fastKDE(inputVariables, \ do_save_marginals = False, \ do_fft = False, \ @@ -1290,122 +1449,124 @@ def pdf_at_points(*args,**kwargs): **kwargs, \ ) - # complete the Fourier-space calculation of the PDF + # complete the Fourier-space calculation of the PDF _pdfobj.applyBernacchiaFilter() - # calculate the PDF at the requested points + # calculate the PDF at the requested points pdf = _pdfobj.__transformphiSC_points__(list_of_points) - # return the pdf + # return the pdf return pdf -#******************************************************************************* -#******************************************************************************* -#***************************** Unit testing code ******************************* -#******************************************************************************* -#******************************************************************************* -# Test this implementation of the BP11 density estimate against a normal -# distribution. Calculate the estimate for a variety of sample sizes and show -# how the distribution error decreases as sample size increases. As of revision -# 9 of the code, this unit testing shows that this implementation of the BP11 -# estimate converges on the true normal distribution like N**-1, which agrees -# the theoretical and empirical convergence rate given in BP11. +# ******************************************************************************* +# ******************************************************************************* +# ***************************** Unit testing code ******************************* +# ******************************************************************************* +# ******************************************************************************* +# Test this implementation of the BP11 density estimate against a normal +# distribution. Calculate the estimate for a variety of sample sizes and show +# how the distribution error decreases as sample size increases. As of revision +# 9 of the code, this unit testing shows that this implementation of the BP11 +# estimate converges on the true normal distribution like N**-1, which agrees +# the theoretical and empirical convergence rate given in BP11. if(__name__ == "__main__"): - #set a seed so that results are repeatable + # set a seed so that results are repeatable npy.random.seed(0) doOneDimensionalTests = True if(doOneDimensionalTests): - import pylab as P + import matplotlib.pyplot as plt import scipy.stats as stats mu = -1e3 sig = 1e3 - #Define a gaussian function for evaluation purposes + # Define a gaussian function for evaluation purposes def mygaus(x): return (1./(sig*npy.sqrt(2*npy.pi)))*npy.exp(-(x-mu)**2/(2.*sig**2)) - #Set the size of the sample to calculate + # Set the size of the sample to calculate powmax = 19 npow = npy.asarray(range(powmax)) + 1.0 - #Set the maximum sample size + # Set the maximum sample size nmax = 2**powmax - #Create a random normal sample of this size + # Create a random normal sample of this size randsample = sig*npy.random.normal(size = nmax) + mu - #Pre-define sample size and error-squared arrays + # Pre-define sample size and error-squared arrays nsample = npy.zeros([len(npow)]) esq = npy.zeros([len(npow)]) epct = npy.zeros([len(npow)]) evaluateError = True if evaluateError: - #Do the optimal calculation on a number of different random draws + # Do the optimal calculation on a number of different random draws for i,n in zip(range(len(npow)),npow): - #Extract a sample of length 2**n + 1 from the previously-created - #random sample + # Extract a sample of length 2**n + 1 from the previously-created + # random sample randgauss = randsample[:(2**n + 1)] - #Set the sample size + # Set the sample size nsample[i] = len(randgauss) with Timer(nsample[i]): - #Do the BP11 density estimate + # Do the BP11 density estimate bkernel = fastKDE(randgauss,do_approximate_ecf=True,num_points=513) - #Calculate the mean squared error between the estimated density - #And the gaussian - #esq[i] = average(abs(mygaus(bkernel.x)-bkernel.pdf)**2 *bkernel.deltaX) - esq[i] = npy.average(abs(mygaus(bkernel.axes[0])-bkernel.pdf[:])**2 *bkernel.deltaX[0]) - epct[i] = 100*sum(abs(mygaus(bkernel.axes[0])-bkernel.pdf[:])*bkernel.deltaX[0]) - #Print the sample size and the error to show that the code is proceeeding - #print "{}, {}%".format(nsample[i],epct[i]) - - #Plot the optimal distribution - P.subplot(2,2,1)#,yscale="log") - #pdfmask = ma.masked_less(bkernel.pdf,bkernel.distributionThreshold) + # Calculate the mean squared error between the estimated density and + # the gaussian esq[i] = average(abs(mygaus(bkernel.x)-bkernel.pdf)**2 + # *bkernel.deltaX) + esq[i] = npy.average(\ + abs(mygaus(bkernel.axes[0])-bkernel.pdf[:])**2 *bkernel.deltaX[0] + ) + epct[i] = 100*sum( + abs(mygaus(bkernel.axes[0])-bkernel.pdf[:])*bkernel.deltaX[0] + ) + + # Plot the optimal distribution + plt.subplot(2,2,1)# ,yscale="log") + # pdfmask = ma.masked_less(bkernel.pdf,bkernel.distributionThreshold) pdfmask = bkernel.pdf - P.plot(bkernel.axes[0],pdfmask,'b-') + plt.plot(bkernel.axes[0],pdfmask,'b-') - #Plot the empirical characteristic function - P.subplot(2,2,2,xscale="log",yscale="log") - P.plot(bkernel.tgrids[0][1:],abs(bkernel.ECF[1:])**2,'b-') + # Plot the empirical characteristic function + plt.subplot(2,2,2,xscale="log",yscale="log") + plt.plot(bkernel.tgrids[0][1:],abs(bkernel.ECF[1:])**2,'b-') - #Plot the sample gaussian - P.subplot(2,2,1)#,yscale="log") - P.plot(bkernel.axes[0],mygaus(bkernel.axes[0]),'r-') + # Plot the sample gaussian + plt.subplot(2,2,1)# ,yscale="log") + plt.plot(bkernel.axes[0],mygaus(bkernel.axes[0]),'r-') - #Do a simple power law fit to the scaling + # Do a simple power law fit to the scaling [m,b,_,_,_] = stats.linregress(npy.log(nsample),npy.log(esq)) - #Print the error scaling (following BP11, this is expected to be m ~ -1) + # Print the error scaling (following BP11, this is expected to be m ~ -1) print("Error scales ~ N**{}".format(m)) - #Plot the error vs sample size on a log-log curve - P.subplot(2,2,3) - P.loglog(nsample,esq) - P.plot(nsample,npy.exp(b)*nsample**m,'r-') + # Plot the error vs sample size on a log-log curve + plt.subplot(2,2,3) + plt.loglog(nsample,esq) + plt.plot(nsample,npy.exp(b)*nsample**m,'r-') print("") bDemoSum = False if(not bDemoSum): - P.show() + plt.show() else: - #********************************************************************* - # Demonstrate the capability to sum fastKDE objects - #********************************************************************* + # ********************************************************************* + # Demonstrate the capability to sum fastKDE objects + # ********************************************************************* nsamp = 512 nloop = nmax/nsamp - #Pre-define sample size and error-squared arrays + # Pre-define sample size and error-squared arrays nsample2 = npy.zeros([nloop]) esq2 = npy.zeros([nloop]) @@ -1418,94 +1579,98 @@ def mygaus(x): bkernel2 += fastKDE(randgauss) nsample2[i] = nsample2[i-1] + len(randgauss) - #Calculate the mean squared error between the estimated density - #And the gaussian - esq2[i] = npy.average(npy.abs(mygaus(bkernel2.axes[0])-bkernel2.pdf)**2 * bkernel2.deltaX[0]) - #Print the sample size and the error to show that the code is proceeeding + # Calculate the mean squared error between the estimated density + # And the gaussian + esq2[i] = npy.average(\ + npy.abs(\ + mygaus(bkernel2.axes[0])-bkernel2.pdf)**2 \ + * bkernel2.deltaX[0] + ) + # Print the sample size and the error to show that the code is proceeeding print("{}, {}".format(nsample2[i],esq2[i])) - #Plot the distribution - P.subplot(2,2,1) - P.plot(bkernel2.axes[0],bkernel2.pdf,'g-') + # Plot the distribution + plt.subplot(2,2,1) + plt.plot(bkernel2.axes[0],bkernel2.pdf,'g-') - #Plot the ECF - P.subplot(2,2,2,xscale="log",yscale="log") - P.plot(bkernel2.tgrids[0][1:],abs(bkernel2.ECF[0,1:])**2,'b-') + # Plot the ECF + plt.subplot(2,2,2,xscale="log",yscale="log") + plt.plot(bkernel2.tgrids[0][1:],abs(bkernel2.ECF[0,1:])**2,'b-') - #Plot the error-rate change - P.subplot(2,2,3) - P.loglog(nsample2,esq2,'g-') + # Plot the error-rate change + plt.subplot(2,2,3) + plt.loglog(nsample2,esq2,'g-') - #Plot the difference between the two distributions - P.subplot(2,2,4) - P.plot(bkernel2.axes[0], abs(bkernel.pdf - bkernel2.pdf)*bkernel.deltaX[0]) + # Plot the difference between the two distributions + plt.subplot(2,2,4) + plt.plot(bkernel2.axes[0], abs(bkernel.pdf - bkernel2.pdf)*bkernel.deltaX[0]) - #Show the plots - P.show() + # Show the plots + plt.show() else: print(randsample) - #Simply do the BP11 density estimate and plot it + # Simply do the BP11 density estimate and plot it bkernel = fastKDE(randsample,\ do_approximate_ecf=True, \ be_verbose = True, \ num_points = 513) - #Plot the optimal distribution - P.subplot(2,1,1) - #pdfmask = ma.masked_less(bkernel.pdf,bkernel.distributionThreshold) + # Plot the optimal distribution + plt.subplot(2,1,1) + # pdfmask = ma.masked_less(bkernel.pdf,bkernel.distributionThreshold) pdfmask = bkernel.pdf - P.plot(bkernel.axes[0],pdfmask,'b-') - #Plot the sample gaussian - P.plot(bkernel.axes[0],mygaus(bkernel.axes[0]),'r-') + plt.plot(bkernel.axes[0],pdfmask,'b-') + # Plot the sample gaussian + plt.plot(bkernel.axes[0],mygaus(bkernel.axes[0]),'r-') - #for d in randsample: - # P.plot([d,d],[0,1./len(randsample)],'k-',alpha=0.5) + # for d in randsample: + # plt.plot([d,d],[0,1./len(randsample)],'k-',alpha=0.5) - #Plot the transforms - P.subplot(2,1,2) - P.plot(bkernel.tgrids[0],abs(bkernel.phiSC),'b-') + # Plot the transforms + plt.subplot(2,1,2) + plt.plot(bkernel.tgrids[0],abs(bkernel.phiSC),'b-') ecfStandard = npy.fft.ifft(mygaus(bkernel.axes[0])) ecfStandard /= ecfStandard[0] ecfStandard = npy.fft.fftshift(ecfStandard) - P.plot(bkernel.tgrids[0],abs(ecfStandard),'r-') + plt.plot(bkernel.tgrids[0],abs(ecfStandard),'r-') mean = sum(bkernel.axes[0]*bkernel.pdf*bkernel.deltaX[0]) - P.show() + plt.show() doTwoDimensionalTests = True if(doTwoDimensionalTests): - from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import scipy.stats as stats nvariables = 2 - #Seed with 0 so results are reproducable + # Seed with 0 so results are reproducable npy.random.seed(0) - #Define a bivariate normal function + # Define a bivariate normal function def norm2d(x,y,mux=0,muy=0,sx=1,sy=1,r=0): coef = 1./(2*npy.pi*sx*sy*npy.sqrt(1.-r**2)) - expArg = -(1./(2*(1-r**2)))*( (x-mux)**2/sx**2 + (y-muy)**2/sy**2 - 2*r*(x-mux)*(y-muy)/(sx*sy)) + expArg = -(1./(2*(1-r**2)))* \ + ( (x-mux)**2/sx**2 + (y-muy)**2/sy**2 - 2*r*(x-mux)*(y-muy)/(sx*sy)) return coef*npy.exp(expArg) - #Set the size of the sample to calculate + # Set the size of the sample to calculate powmax = 16 npow = npy.asarray(range(1,powmax)) + 1.0 - #Set the maximum sample size + # Set the maximum sample size nmax = 2**powmax def covMat(sx,sy,r): return [[sx**2,r*sx*sy],[r*sx*sy,sy**2]] gausParams = [] - gausParams.append([0.0,0.0,1.0,1.0,0.0]) #Standard, uncorrelated bivariate - gausParams.append([2.0,0.0,1.0,1.0,0.7]) #correlation 0.7, mean x+2 - gausParams.append([0.0,2.0,1.0,0.5,0.0]) #Flat in y-direction, mean y+2 - gausParams.append([2.0,2.0,0.5,1.0,0.0]) #Flat in x-direction, mean xy+2 + gausParams.append([0.0,0.0,1.0,1.0,0.0]) # Standard, uncorrelated bivariate + gausParams.append([2.0,0.0,1.0,1.0,0.7]) # correlation 0.7, mean x+2 + gausParams.append([0.0,2.0,1.0,0.5,0.0]) # Flat in y-direction, mean y+2 + gausParams.append([2.0,2.0,0.5,1.0,0.0]) # Flat in x-direction, mean xy+2 - #Define the corresponding standard function + # Define the corresponding standard function def pdfStandard(x,y): pdfStandard = npy.zeros(npy.shape(x)) for gg in gausParams: @@ -1514,21 +1679,21 @@ def pdfStandard(x,y): return pdfStandard - #Generate samples from this distribution + # Generate samples from this distribution randsamples = [] ngg = len(gausParams) for gg in gausParams: mu = gg[:2] gCovMat = covMat(*tuple(gg[2:])) size = tuple([2,nmax/ngg]) - #Append a 2D gaussian to the list + # Append a 2D gaussian to the list randsamples.append(npy.random.multivariate_normal(mu,gCovMat,(nmax/ngg,)).transpose()) - #Concatenate the gaussian samples + # Concatenate the gaussian samples randsample = npy.concatenate(tuple(randsamples),axis=1) - #Shuffle the samples along the long axis so that we - #can draw successively larger samples + # Shuffle the samples along the long axis so that we + # can draw successively larger samples ishuffle = npy.asarray(range(nmax)) npy.random.shuffle(ishuffle) randsample = randsample[:,ishuffle] @@ -1537,23 +1702,23 @@ def pdfStandard(x,y): if(doSaveCSV): npy.savetxt("bp11_2d_samples.csv",randsample.transpose(),delimiter=",") - #Pre-define sample size and error-squared arrays + # Pre-define sample size and error-squared arrays nsample = npy.zeros([len(npow)]) esq = npy.zeros([len(npow)]) epct = npy.zeros([len(npow)]) evaluateError = True if(evaluateError): - #Do the optimal calculation on a number of different random draws + # Do the optimal calculation on a number of different random draws for z,n in zip(range(len(npow)),npow): - #Extract a sample of length 2**n + 1 from the previously-created - #random sample + # Extract a sample of length 2**n + 1 from the previously-created + # random sample randsub = randsample[:,:(2**n)] - #Set the sample size + # Set the sample size nsample[z] = npy.shape(randsub)[1] with Timer(nsample[z]): - #Do the BP11 density estimate + # Do the BP11 density estimate bkernel = fastKDE( randsub, \ be_verbose=False, \ do_save_marginals = False, \ @@ -1562,20 +1727,18 @@ def pdfStandard(x,y): x,y = tuple(bkernel.axes) x2d,y2d = npy.meshgrid(x,y) - #Calculate the mean squared error between the estimated density - #And the gaussian - #esq[z] = average(abs(mygaus(bkernel.x)-bkernel.pdf)**2 *bkernel.deltaX) - #esq[z] = average(abs(pdfStandard(x2d,y2d)-bkernel.getTransformedPDF())**2 *bkernel.deltaX**2) + # Calculate the mean squared error between the estimated density + # and the gaussian absdiffsq = abs(pdfStandard(x2d,y2d)-bkernel.pdf)**2 dx = x[1] - x[0] dy = y[1] - y[0] esq[z] = sum(dy*sum(absdiffsq*dx,axis=0))/(len(x)*len(y)) - #Print the sample size and the error to show that the code is proceeeding - #print "{}: {}, {}".format(n,nsample[z],esq[z]) + # Print the sample size and the error to show that the code is proceeeding + # print "{}: {}, {}".format(n,nsample[z],esq[z]) - #Do a simple power law fit to the scaling + # Do a simple power law fit to the scaling [m,b,_,_,_] = stats.linregress(npy.log(nsample),npy.log(esq)) - #Print the error scaling (following BP11, this is expected to be m ~ -1) + # Print the error scaling (following BP11, this is expected to be m ~ -1) print("Error scales ~ N**{}".format(m)) else: with Timer(npy.shape(randsample)[1]): @@ -1598,19 +1761,19 @@ def pdfStandard(x,y): clevs = npy.asarray(range(2,10))/100. ax1.contour(x2d,y2d,bkernel.pdf,levels = clevs) ax1.contour(x2d,y2d,pdfStandard(x2d,y2d),levels=clevs,colors='k') - #ax1.plot(randsample[0,:],randsample[1,:],'k.',markersize=1) + # ax1.plot(randsample[0,:],randsample[1,:],'k.',markersize=1) plt.xlim([-4,6]) plt.ylim([-4,6]) if(evaluateError): - #Plot the error vs sample size on a log-log curve + # Plot the error vs sample size on a log-log curve ax3 = fig.add_subplot(122,xscale="log",yscale="log") ax3.plot(nsample,esq) ax3.plot(nsample,npy.exp(b)*nsample**m,'r-') - #ax3 = fig.add_subplot(223) - #ax3.plot(randsample[0,::16],randsample[1,::16],'k.',markersize=1) - #plt.xlim([-4,6]) - #plt.ylim([-4,6]) + # ax3 = fig.add_subplot(223) + # ax3.plot(randsample[0,::16],randsample[1,::16],'k.',markersize=1) + # plt.xlim([-4,6]) + # plt.ylim([-4,6]) else: ax3 = fig.add_subplot(122) errorStandardSum= sum(abs(pdfStandard(x2d,y2d)-bkernel.pdf)**2,axis=0) diff --git a/src/fastkde/plot.py b/src/fastkde/plot.py index aa9b0b1..2690c7a 100644 --- a/src/fastkde/plot.py +++ b/src/fastkde/plot.py @@ -9,79 +9,91 @@ def cumulative_integral(pdf, axes, integration_axes=None, reverse_axes=None): input: ------ - pdf : a PDF (presumably from the .pdf member of a fastKDE object) - axes : a list of PDF axes (presumably from the .axes member of a fastKDE object) - integration_axes : the axes along which to integrate (default is all). These have the same - ordering as axes in the axes input variable. - reverse_axes : axes along which to reverse the direction of the cumulative calculation - + pdf : a PDF (presumably from the .pdf member of a + fastKDE object) + + axes : a list of PDF axes (presumably from the .axes + member of a fastKDE object) + + integration_axes : the axes along which to integrate (default is + all). These have the same ordering as axes in the + axes input variable. + + reverse_axes : axes along which to reverse the direction of the + cumulative calculation + output: ------- - returns an array with the same shape as PDF, but with the integral calculated cumulatively + + returns an array with the same shape as PDF, but with the integral + calculated cumulatively + """ - #Set the default value for integration_axes + # Set the default value for integration_axes if integration_axes is None: integration_axes = range(len(np.shape(pdf))) - #Set the rank of the pdf + # Set the rank of the pdf rank = len(np.shape(pdf)) - #Set the default value for reverse_axes + # Set the default value for reverse_axes if reverse_axes is None: reverse_axes = [] - #Make sure that axes is an iterable of iterables + # Make sure that axes is an iterable of iterables try: axes[0][0] - except: + except IndexError: axes = [axes] - #Make sure that reverse_axes is an iterable + # Make sure that reverse_axes is an iterable try: len(reverse_axes) - except: + except TypeError: reverse_axes = [reverse_axes] - #Make sure that integration_axes is an iterable + # Make sure that integration_axes is an iterable try: len(integration_axes) - except: + except TypeError: integration_axes = [integration_axes] - #Copy the pdf array + # Copy the pdf array cpdf = np.array(pdf) - #Loop over the axes for which we are calculating the cumulative + # Loop over the axes for which we are calculating the cumulative for i in integration_axes: - #Calculate the grid spacing; assume it is uniform + # Calculate the grid spacing; assume it is uniform dx = np.diff(axes[i]) dx = np.concatenate([dx,[dx[-1]]]) - #Calculate the index of pdf corresponding to axis i + # Calculate the index of pdf corresponding to axis i iprime = rank - i - 1 - # broadcast dx to the ~same shape as cpdf + # broadcast dx to the ~same shape as cpdf broadcast_tuple = len(cpdf.shape)*[np.newaxis] broadcast_tuple[iprime] = slice(None,None,None) broadcast_tuple = tuple(broadcast_tuple) dx = dx[broadcast_tuple] - #Determine if we are reversing cumulative orientation + # Determine if we are reversing cumulative orientation if i in reverse_axes: - #Set a slice that reversese only axis i in the PDF + # Set a slice that reversese only axis i in the PDF reverseSlice = rank*[slice(None,None,None)] reverseSlice[iprime] = slice(None,None,-1) reverseSlice = tuple(reverseSlice) - #Do the cumulative calculation on the reversed PDF (and reverse back to the original orientation) + + # Do the cumulative calculation on the reversed PDF (and reverse + # back to the original orientation) cpdf = np.cumsum(cpdf[reverseSlice]*dx,axis=iprime)[reverseSlice] else: - #Do the cumulative calculation + # Do the cumulative calculation cpdf = np.cumsum(cpdf*dx,axis=iprime) - #Return the cumulative PDF + # Return the cumulative PDF return cpdf @@ -90,24 +102,26 @@ def calculate_probability_contour(pdf,axes,pvals,axis=0): input: ------ - pdf : a conditional PDF from fastKDE - - axes : a list of axis values returned from fastKDE + pdf : a conditional PDF from fastKDE - pvals : the integrated amount of PDF that should be outside a contour of constant PDF (scalar or array-like) + axes : a list of axis values returned from fastKDE + pvals : the integrated amount of PDF that should be outside a + contour of constant PDF (scalar or array-like) + output: ------- - - an array of PDF values for which the integral of the PDF outside the PDF value should integrate to pval. - Each value in the array corresponds to a value within axes[axis] (nominally, axis should be the variable - on which the PDF is conditioned). + an array of PDF values for which the integral of the PDF outside the + PDF value should integrate to pval. + + Each value in the array corresponds to a value within axes[axis] + (nominally, axis should be the variable on which the PDF is + conditioned). - """ - # calculate the dx values + # calculate the dx values diff_axes = [] for i in range(len(axes)): if i != axis: @@ -115,40 +129,44 @@ def calculate_probability_contour(pdf,axes,pvals,axis=0): dx = np.concatenate((dx,[dx[-1]])) diff_axes.append(dx) - # create a meshgrid of dx values + # create a meshgrid of dx values dxmesh = np.meshgrid(*diff_axes) - # create the dx product + # create the dx product dxgrid = np.prod(dxmesh,axis=0) - # broadcast the dxgrid to the shape of the PDF + # broadcast the dxgrid to the shape of the PDF broadcast_shape = len(axes)*[slice(None,None,None)] broadcast_shape[axis] = np.newaxis dxgrid = dxgrid[tuple(broadcast_shape[::-1])] - # multiply the pdf by dx - pdf_dx = dxgrid*pdf + # multiply the pdf by dx + dxgrid*pdf - # reshape the PDF to unravel the summation dimensions + # reshape the PDF to unravel the summation dimensions new_shape = [-1,len(axes[axis])] pdf_unraveled = np.reshape(pdf,new_shape) - # get the indices to sort by probability + # get the indices to sort by probability isort_inds = np.ma.argsort(pdf_unraveled,axis=0) - # generate an index array suitable for use in a numpy array - isort = ([ np.array(i) for i in zip(*isort_inds.T)], np.arange(len(axes[axis]),dtype=int)[np.newaxis,:]) - # sort by probability + # generate an index array suitable for use in a numpy array + isort = (\ + [ np.array(i) for i in zip(*isort_inds.T) ], \ + np.arange(len(axes[axis]),dtype=int)[np.newaxis,:] + ) + + # sort by probability pdf_dx_unraveled = np.reshape(pdf*dxgrid,new_shape)[isort] - # take the cumulative sum + # take the cumulative sum pdf_csum = np.ma.cumsum(pdf_dx_unraveled,axis=0) - # find the location along each axis nearest to the given PDF value + # find the location along each axis nearest to the given PDF value ind_closest = np.ma.argmin(abs(pdf_csum-pvals),axis=0) ind_closest = (ind_closest, np.arange(len(axes[axis]),dtype=int)) - # get the probability value at that location + # get the probability value at that location pdf_vals = np.reshape(pdf,new_shape)[isort][ind_closest] - # return the value + # return the value return pdf_vals def pair_plot(var_list, \ @@ -166,32 +184,41 @@ def pair_plot(var_list, \ input: ------ - var_list : a list of input variables (all must be vectors of the same length) with at least two variables - + var_list : a list of input variables (all must be vectors of the + same length) with at least two variables + conditional : flags whether to plot the bivariate PDFs as conditionals. var_names : a list of strings corresponding to variable names fig_size : The size of the figure (if none, it is set to 10,10) - draw_scatter : flag whether to draw scatter plots on top of the PDFs. If None, this turns on/off - automatically, depending on the number of points (1000 is the upper cutoff) - + draw_scatter : flag whether to draw scatter plots on top of the + PDFs. If None, this turns on/off automatically, + depending on the number of points (1000 is the upper + cutoff) + axis_limits : limits for all variables (applies to all axes) - auto_show : flags whether to automatically call PP.show() (no values are returned if this flag is on) - - cdf_levels : a set of CDF levels (betwen 0 and 1 exclusive) for which to plot PDF contours - (i.e., a value of 0.5 means that 50% of the PDF falls outside the corresponding contour - of constant PDF that will be plotted). If None is given, matplotlib will choose PDF levels. - - log_scale : flags whether to use the logAxes argument for fastKDE. If a single bool value, it applies - to all variables; if a list, each item in the list corresponds to a variable - - axis_lengths : The length of axis variables. If axis_lengths is None, then fastKDE automatically sets - axis lengths + auto_show : flags whether to automatically call PP.show() (no + values are returned if this flag is on) + + cdf_levels : a set of CDF levels (betwen 0 and 1 exclusive) for + which to plot PDF contours (i.e., a value of 0.5 + means that 50% of the PDF falls outside the + corresponding contour of constant PDF that will be + plotted). If None is given, matplotlib will choose + PDF levels. + + log_scale : flags whether to use the logAxes argument for + fastKDE. If a single bool value, it applies to all + variables; if a list, each item in the list + corresponds to a variable + + axis_lengths : The length of axis variables. If axis_lengths is + None, then fastKDE automatically sets axis lengths + - output: ------- @@ -201,29 +228,36 @@ def pair_plot(var_list, \ fig, axs : a matplotlib figure and axis matrix object - marginal_vals : the values at which the PDFs (both marginal and bivariate) are defined + marginal_vals : the values at which the PDFs (both marginal and + bivariate) are defined marginal_pdfs : the marginal PDFs of the input variables bivariate_pdfs : the bivariate PDFs """ - # convert the list to an array + # convert the list to an array try: input_var_array = np.array(var_list) - except: - raise ValueError("var_list could not be converted to a numpy array; are all vectors of the same length?") + assert len(input_var_array) != 0, "var_list must be a non-empty list" + except TypeError or AssertionError: + raise ValueError( + "var_list could not be converted to a numpy array; are all vectors " + "of the same length?" + ) - # get the array rank + # get the array rank rank = len(input_var_array.shape) if rank != 2: raise ValueError("input variables in var_list must be vectors") - # get the array shape + # get the array shape num_vars = input_var_array.shape[0] if num_vars < 2: - raise ValueError("pair_plot requires at least two variables in var_list") + raise ValueError( + "pair_plot requires at least two variables in var_list" + ) if log_scale is False or log_scale is True: log_scale = num_vars*[log_scale] @@ -238,45 +272,66 @@ def pair_plot(var_list, \ else: draw_scatter = True - # calculate the marginals and define limits + # calculate the marginals and define limits for n in range(num_vars): - # calculate PDFs - marginal_pdfs[n], marginal_vals[n] = fastKDE.pdf(input_var_array[n,:],logAxes = log_scale[n]) + # calculate PDFs + marginal_pdfs[n], marginal_vals[n] = \ + fastKDE.pdf(\ + input_var_array[n,:], + logAxes = log_scale[n] + ) - # define axis limits + # define axis limits if axis_limits is None: - var_limits[n] = [input_var_array[n,:].min(),input_var_array[n,:].max()] + var_limits[n] = [\ + input_var_array[n,:].min(),input_var_array[n,:].max() + ] else: var_limits[n] = axis_limits - #bivariate_pdfs = num_vars*[num_vars*[None]] bivariate_pdfs = {} - # calculate the bivariate PDFs + # calculate the bivariate PDFs for n1 in range(num_vars): for n2 in range(n1, num_vars): if n1 != n2: if not conditional: - bivariate_pdfs[n1,n2], _ = fastKDE.pdf(input_var_array[n1,:], input_var_array[n2,:],logAxes = [log_scale[n1],log_scale[n2]]) + bivariate_pdfs[n1,n2], _ = \ + fastKDE.pdf(\ + input_var_array[n1,:], + input_var_array[n2,:], + logAxes = [log_scale[n1],log_scale[n2]] + ) else: - bivariate_pdfs[n1,n2], _ = fastKDE.conditional(input_var_array[n2,:], input_var_array[n1,:],logAxes = [log_scale[n1],log_scale[n2]]) - bivariate_pdfs[n2,n1], _ = fastKDE.conditional(input_var_array[n1,:], input_var_array[n2,:],logAxes = [log_scale[n2],log_scale[n1]]) + bivariate_pdfs[n1,n2], _ = \ + fastKDE.conditional( + input_var_array[n2,:], + input_var_array[n1,:], + logAxes = [log_scale[n1],log_scale[n2]] + ) + + bivariate_pdfs[n2,n1], _ = \ + fastKDE.conditional( + input_var_array[n1,:], + input_var_array[n2,:], + logAxes = [log_scale[n2],log_scale[n1]] + ) - # set variable labels if needed + # set variable labels if needed if var_names is None: var_names = ['var {}'.format(n) for n in range(num_vars)] - # set the figure size if needed + # set the figure size if needed if fig_size is None: fig_size = (10,10) - # create the figure + # create the figure fig, axs = PP.subplots(num_vars,num_vars,figsize=fig_size) - # plot the marginals + # plot the marginals for n in range(num_vars): axs[n,n].plot(marginal_vals[n],marginal_pdfs[n]) axs[n,n].set_xlim(var_limits[n]) @@ -290,31 +345,49 @@ def pair_plot(var_list, \ axs[n,n].set_xscale('log') - # plot the bivariate PDFs + # plot the bivariate PDFs for n1 in range(num_vars): for n2 in range(n1, num_vars): if n1 != n2: if not conditional: - # get the PDF levels + # get the PDF levels if cdf_levels is None: levels = None else: - levels = np.sort(np.array([calculate_probability_contour( bivariate_pdfs[n1,n2][...,np.newaxis],\ - [[0],marginal_vals[n1],marginal_vals[n2]],\ - c) - for c in cdf_levels])).squeeze() + levels = np.sort(\ + np.array([ + calculate_probability_contour(\ + bivariate_pdfs[n1,n2][...,np.newaxis], + [[0], marginal_vals[n1],marginal_vals[n2]], + c, + ) + for c in cdf_levels])).squeeze() - # plot the PDF + # plot the PDF pdf_to_plot = bivariate_pdfs[n1,n2] if any([log_scale[n1],log_scale[n2]]): - pdf_to_plot = np.ma.log(np.ma.masked_less_equal(bivariate_pdfs[n1,n2],0)) + pdf_to_plot = np.ma.log(\ + np.ma.masked_less_equal(bivariate_pdfs[n1,n2],0) + ) levels = np.log(levels) else: levels = cdf_levels - pdf_to_plot = cumulative_integral(bivariate_pdfs[n1,n2],[marginal_vals[n1],marginal_vals[n2]],integration_axes=1) - # mask CDF parts that don't normalize to something close to 1 - pdf_to_plot = np.ma.masked_where(np.logical_not(np.isclose(np.ma.ones(pdf_to_plot.shape)*pdf_to_plot[-1,:][np.newaxis,:],1.0)),pdf_to_plot) + pdf_to_plot = cumulative_integral( + bivariate_pdfs[n1,n2], + [marginal_vals[n1],marginal_vals[n2]], + integration_axes=1 + ) + # mask CDF parts that don't normalize to something close to 1 + pdf_to_plot = np.ma.masked_where(\ + np.logical_not(\ + np.isclose(\ + np.ma.ones(\ + pdf_to_plot.shape)*\ + pdf_to_plot[-1,:][np.newaxis,:], + 1.0)), + pdf_to_plot + ) axs[n2,n1].contour(marginal_vals[n1],marginal_vals[n2],pdf_to_plot,levels=levels) @@ -324,23 +397,43 @@ def pair_plot(var_list, \ if log_scale[n2]: axs[n2,n1].set_yscale('log') - # plot the scatter + # plot the scatter if draw_scatter: - axs[n2,n1].plot(input_var_array[n1,:],input_var_array[n2,:],'k.',alpha = 0.3) + axs[n2,n1].plot(\ + input_var_array[n1,:], + input_var_array[n2,:], + 'k.', + alpha = 0.3 + ) - # set axis limits + # set axis limits axs[n2,n1].set_xlim(var_limits[n1]) axs[n2,n1].set_ylim(var_limits[n2]) if not conditional: - # turn of axes for the other part of the triangle + # turn of axes for the other part of the triangle axs[n1,n2].axis('off') else: levels = cdf_levels - pdf_to_plot = cumulative_integral(bivariate_pdfs[n2,n1],[marginal_vals[n2],marginal_vals[n1]],integration_axes=1) - # mask CDF parts that don't normalize to something close to 1 - pdf_to_plot = np.ma.masked_where(np.logical_not(np.isclose(np.ma.ones(pdf_to_plot.shape)*pdf_to_plot[-1,:][np.newaxis,:],1.0)),pdf_to_plot) + pdf_to_plot = \ + cumulative_integral(\ + bivariate_pdfs[n2,n1], + [marginal_vals[n2],marginal_vals[n1]],integration_axes=1 + ) + + # mask CDF parts that don't normalize to something close to + # 1 + pdf_to_plot = \ + np.ma.masked_where(\ + np.logical_not(\ + np.isclose(\ + np.ma.ones(pdf_to_plot.shape)*\ + pdf_to_plot[-1,:][np.newaxis,:], + 1.0) + ), + pdf_to_plot + ) axs[n1,n2].contour(marginal_vals[n2],marginal_vals[n1],pdf_to_plot,levels=levels) @@ -349,11 +442,16 @@ def pair_plot(var_list, \ if log_scale[n1]: axs[n1,n2].set_yscale('log') - # plot the scatter + # plot the scatter if draw_scatter: - axs[n1,n2].plot(input_var_array[n2,:],input_var_array[n1,:],'k.',alpha = 0.3) + axs[n1,n2].plot( + input_var_array[n2,:], + input_var_array[n1,:], + 'k.', + alpha = 0.3 + ) - # set axis limits + # set axis limits axs[n1,n2].set_xlim(var_limits[n2]) axs[n1,n2].set_ylim(var_limits[n1]) @@ -361,7 +459,7 @@ def pair_plot(var_list, \ - # turn off axis limits and set axis labels + # turn off axis limits and set axis labels for n1 in range(num_vars): for n2 in range(n1, num_vars): if n1 != n2: