From 206e37a19275865e75918c3ade732f2f2ffe3455 Mon Sep 17 00:00:00 2001
From: Pablo Villanueva Domingo <Pablo.Villanueva.Domingo@gmail.com>
Date: Tue, 26 Apr 2022 02:04:47 +0200
Subject: [PATCH] general update pre paper

---
 .gitignore                  |   3 +-
 .gitignore~                 |   5 +
 README.md                   |   9 +-
 Source/constants.py         |   7 +-
 Source/load_data.py         | 186 +++++++++-----------------------
 Source/metalayer.py         |  71 +++++-------
 Source/networks.py          | 208 ------------------------------------
 Source/plotting.py          | 141 +++++++-----------------
 Source/training.py          |  39 +------
 crosstest.py                |   2 +-
 hyperparameters.py          |  33 +++++-
 hyperparams_optimization.py |  14 +--
 main.py                     |  10 +-
 ps_test.py                  | 157 +++++++++++----------------
 visualize_graphs.py         |  20 ++--
 15 files changed, 241 insertions(+), 664 deletions(-)
 create mode 100644 .gitignore~
 delete mode 100644 Source/networks.py
diff --git a/.gitignore b/.gitignore
index 37afa5d..814be2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 batchfile.slurm
 crosstest_batchfile.slurm
 tunning_batchfile.slurm
-__pycache__/
\ No newline at end of file
+__pycache__/
+plotresults.py
diff --git a/.gitignore~ b/.gitignore~
new file mode 100644
index 0000000..37afa5d
--- /dev/null
+++ b/.gitignore~
@@ -0,0 +1,5 @@
+.DS_Store
+batchfile.slurm
+crosstest_batchfile.slurm
+tunning_batchfile.slurm
+__pycache__/
\ No newline at end of file
diff --git a/README.md b/README.md
index 57ecfee..5145bb5 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ See the paper [arXiv:2204.xxxxx](https://arxiv.org/abs/2204.xxxxx) for more deta
 <img src="visualize_graph_10.png" width="500">
 
 
-## Scripts
+## Codes
 
 Here is a brief description of the codes included:
 
@@ -51,13 +51,18 @@ The libraries required for training the models and compute some statistics are:
 * `scipy`
 * `sklearn`
 * `optuna` (only for optimization in `hyperparams_optimization.py`)
+* [`Pylians`](https://pylians3.readthedocs.io/en/master/) (only for computing power spectra in `ps_test.py`)
 
 
 ## Usage
 
+The codes implemented here are designed to train GNNs for two tasks. The desired task is chosen in `hyperparameters.py` with the `outmode` flag:
+1. Infer cosmological parameters from galaxy catalogues. Set `outmode = "cosmo"`.
+2. Predict the power spectrum from galaxy catalogues. Set `outmode = "ps"`.
+
 These are some advices to employ the scripts described above:
 1. To perform a search of the optimal hyperparameters, run `hyperparams_optimization.py`.
-2. To train a model with a given set of parameters defined in `hyperparameters.py`, run `main.py`. The hyperparameters currently present in `hyperparameters.py` correspond to the best optimal values for each suite when all galactic features are employed (see the paper).
+2. To train a model with a given set of parameters defined in `hyperparameters.py`, run `main.py`. The hyperparameters currently present in `hyperparameters.py` correspond to the best optimal values for each suite when all galactic features are employed (see the paper). Modify it accordingly to the task.
 3. Once a model is trained, run `crosstest.py` to test in the training simulation suite and cross test it in the other one included in CAMELS (IllustrisTNG and SIMBA).
 
 
diff --git a/Source/constants.py b/Source/constants.py
index 9ad75b2..cfb6cbf 100644
--- a/Source/constants.py
+++ b/Source/constants.py
@@ -1,7 +1,7 @@
 #----------------------------------------------------------------------
 # List of constants and some common functions
 # Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
 #----------------------------------------------------------------------
 
 import numpy as np
@@ -39,15 +39,12 @@
 # Batch size
 batch_size = 25
 
-
-
 # Number of k bins in the power spectrum
 ps_size = 79
 
 #--- FUNCTIONS ---#
 
-
 # Choose color depending on the CAMELS simulation suite
 def colorsuite(suite):
     if suite=="IllustrisTNG":   return "purple"
-    elif suite=="SIMBA":            return "deepskyblue"
+    elif suite=="SIMBA":            return "dodgerblue"
diff --git a/Source/load_data.py b/Source/load_data.py
index 6d667cf..f272989 100644
--- a/Source/load_data.py
+++ b/Source/load_data.py
@@ -1,3 +1,9 @@
+#----------------------------------------------------
+# Routine for loading the CAMELS galaxy catalogues
+# Author: Pablo Villanueva Domingo
+# Last update: 4/22
+#----------------------------------------------------
+
 import h5py
 from torch_geometric.data import Data, DataLoader
 from Source.constants import *
@@ -6,6 +12,7 @@
 
 Nstar_th = 20   # Minimum number of stellar particles required to consider a galaxy
 
+# Normalize CAMELS parameters
 def normalize_params(params):
 
     minimum = np.array([0.1, 0.6, 0.25, 0.25, 0.5, 0.5])
@@ -13,13 +20,17 @@ def normalize_params(params):
     params = (params - minimum)/(maximum - minimum)
     return params
 
+# Normalize power spectrum
 def normalize_ps(ps):
     mean, std = ps.mean(axis=0), ps.std(axis=0)
     normps = (ps - mean)/std
     return normps
 
+# Compute KDTree and get edges and edge features
 def get_edges(pos, r_link, use_loops):
 
+    # 1. Get edges
+
     # Create the KDTree and look for pairs within a distance r_link
     # Boxsize normalize to 1
     kd_tree = SS.KDTree(pos, leafsize=16, boxsize=1.0001)
@@ -37,35 +48,36 @@ def get_edges(pos, r_link, use_loops):
     edge_index = edge_index.reshape((2,-1))
     num_pairs = edge_index.shape[1]
 
-    # Edge attributes
+    # 2. Get edge attributes
+
     row, col = edge_index
     diff = pos[row]-pos[col]
 
-    # Correct boundaries in distances
+    # Take into account periodic boundary conditions, correcting the distances
     for i, pos_i in enumerate(diff):
-        #outbound=False
         for j, coord in enumerate(pos_i):
             if coord > r_link:
-                #outbound=True
                 diff[i,j] -= 1.  # Boxsize normalize to 1
             elif -coord > r_link:
-                #outbound=True
                 diff[i,j] += 1.  # Boxsize normalize to 1
-        #if outbound: numbounds+=1
 
+    # Get translational and rotational invariant features
+    # Distance
     dist = np.linalg.norm(diff, axis=1)
+    # Centroid of galaxy catalogue
     centroid = np.mean(pos,axis=0)
+    # Unit vectors of node, neighbor and difference vector
     unitrow = (pos[row]-centroid)/np.linalg.norm((pos[row]-centroid), axis=1).reshape(-1,1)
     unitcol = (pos[col]-centroid)/np.linalg.norm((pos[col]-centroid), axis=1).reshape(-1,1)
     unitdiff = diff/dist.reshape(-1,1)
+    # Dot products between unit vectors
     cos1 = np.array([np.dot(unitrow[i,:].T,unitcol[i,:]) for i in range(num_pairs)])
     cos2 = np.array([np.dot(unitrow[i,:].T,unitdiff[i,:]) for i in range(num_pairs)])
-
-    #print(edge_index.shape, cos1.shape, cos2.shape, dist.shape)
+    # Normalize distance by linking radius
     dist /= r_link
-    edge_attr = np.concatenate([dist.reshape(-1,1), cos1.reshape(-1,1), cos2.reshape(-1,1)], axis=1)
 
-    #print(pos.shape, edge_index.shape, edge_attr.shape)
+    # Concatenate to get all edge attributes
+    edge_attr = np.concatenate([dist.reshape(-1,1), cos1.reshape(-1,1), cos2.reshape(-1,1)], axis=1)
 
     # Add loops
     if use_loops:
@@ -78,90 +90,54 @@ def get_edges(pos, r_link, use_loops):
         edge_attr = np.append(edge_attr, atrloops, 0)
     edge_index = edge_index.astype(int)
 
-    #print(pos.shape, edge_index.shape, edge_attr.shape)
-
-
-
-    #print(edge_index.shape, edge_attr.shape)
-
-
-    """
-    diff = (pos[row]-pos[col])/r_link
-
-    #print(diff.shape, edge_index.shape, pos.shape)
-    #numbounds = 0
-
-    # Correct boundaries in distances
-    for i, pos_i in enumerate(diff):
-        #outbound=False
-        for j, coord in enumerate(pos_i):
-            if coord > 1.:
-                #outbound=True
-                diff[i,j] -= 1./r_link  # Boxsize normalize to 1
-            elif -coord > 1.:
-                #outbound=True
-                diff[i,j] += 1./r_link  # Boxsize normalize to 1
-        #if outbound: numbounds+=1
-
-    edge_attr = np.concatenate([diff, np.linalg.norm(diff, axis=1, keepdims=True)], axis=1)
-    #print(edge_attr[:,3].min(), edge_attr[:,3].max())
-    #print(diff.shape[0], numbounds)
-    """
-
     return edge_index, edge_attr
 
-######################################################################################
-# This routine reads the galaxies from a simulation and
-# root ------> folder containing all simulations with their galaxy catalogues
-# sim -------> 'IllustrisTNG' or 'SIMBA'
-# suite -----> 'LH' or 'CV'
-# number ----> number of the simulation
-# snapnum ---> snapshot number (choose depending of the desired redshift)
-# BoxSize ---> size of the simulation box in Mpc/h
-# Nstar_th -----> galaxies need to contain at least Nstar_th stars
-# k ---------> number of neighbors
-# param_file -> file with the value of the cosmological + astrophysical parameters
-def sim_graph(simnumber,param_file,hparams):
 
+# Routine to create a cosmic graph from a galaxy catalogue
+# simnumber: number of simulation
+# param_file: file with the value of the cosmological + astrophysical parameters
+# hparams: hyperparameters class
+def sim_graph(simnumber, param_file, hparams):
+
+    # Get some hyperparameters
     simsuite,simset,r_link,only_positions,outmode,pred_params = hparams.simsuite,hparams.simset,hparams.r_link,hparams.only_positions,hparams.outmode,hparams.pred_params
 
-    # get the name of the galaxy catalogue
+    # Name of the galaxy catalogue
     simpath = simpathroot + simsuite + "/"+simset+"_"
     catalogue = simpath + str(simnumber)+"/fof_subhalo_tab_0"+hparams.snap+".hdf5"
 
-    # read the catalogue
+    # Read the catalogue
     f     = h5py.File(catalogue, 'r')
     pos   = f['/Subhalo/SubhaloPos'][:]/boxsize
     Mstar = f['/Subhalo/SubhaloMassType'][:,4] #Msun/h
-    SubhaloVel = f["Subhalo/SubhaloVel"][:]
     Rstar = f["Subhalo/SubhaloHalfmassRadType"][:,4]
     Metal = f["Subhalo/SubhaloStarMetallicity"][:]
     Vmax = f["Subhalo/SubhaloVmax"][:]
     Nstar = f['/Subhalo/SubhaloLenType'][:,4]       #number of stars
     f.close()
 
-    # some simulations are slightly outside the box
+    # Some simulations are slightly outside the box, correct it
     pos[np.where(pos<0.0)]+=1.0
     pos[np.where(pos>1.0)]-=1.0
 
-    # select only galaxies with more than 10 star particles
+    # Select only galaxies with more than Nstar_th star particles
     indexes = np.where(Nstar>Nstar_th)[0]
     pos     = pos[indexes]
     Mstar   = Mstar[indexes]
-    SubhaloVel = SubhaloVel[indexes]
     Rstar   = Rstar[indexes]
     Metal   = Metal[indexes]
     Vmax   = Vmax[indexes]
 
     # Get the output to be predicted by the GNN, either the cosmo parameters or the power spectrum
     if outmode=="cosmo":
-        # read the value of the cosmological & astrophysical parameters
+        # Read the value of the cosmological & astrophysical parameters
         paramsfile = np.loadtxt(param_file, dtype=str)
         params = np.array(paramsfile[simnumber,1:-1],dtype=np.float32)
         params = normalize_params(params)
-        params = params[:pred_params]
+        params = params[:pred_params]   # Consider only the first parameters, up to pred_params
         y = np.reshape(params, (1,params.shape[0]))
 
+    # Read the power spectra
     elif outmode=="ps":
 
         ps = np.load(param_file)
@@ -170,42 +146,30 @@ def sim_graph(simnumber,param_file,hparams):
         #ps = normalize_ps(ps)
         y = np.reshape(ps, (1,ps_size))
 
-
-    """
-    # compute the number of pairs
-    nodes = pos.shape[0]
-    u      = np.zeros((1,2),       dtype=np.float32)
-    u[0,0] = np.log10(np.sum(Mstar))
-    u[0,1] = np.log10(nodes)
-    """
+    # Number of galaxies as global feature
     u = np.log10(pos.shape[0]).reshape(1,1)
 
     Mstar = np.log10(1.+ Mstar)
-    #SubhaloVel = np.log10(1.+SubhaloVel)
-    SubhaloVel/=100.
     Rstar = np.log10(1.+ Rstar)
     Metal = np.log10(1.+ Metal)
     Vmax = np.log10(1. + Vmax)
+
+    # Node features
     tab = np.column_stack((Mstar, Rstar, Metal, Vmax))
-    #tab = Vmax.reshape(-1,1)
+    #tab = Vmax.reshape(-1,1)       # For using only Vmax
+    x = torch.tensor(tab, dtype=torch.float32)
 
+    # Use loops if node features are considered only
     if only_positions:
-        #u   = np.zeros((1,2), dtype=np.float32) # not used
-        tab = np.zeros_like(pos[:,:1])   # not really used
+        tab = np.zeros_like(pos[:,:1])   # Node features not really used
         use_loops = False
     else:
-        use_loops = True#"""
+        use_loops = True
 
-    #use_loops = False
-
-    x = torch.tensor(tab, dtype=torch.float32)
-
-    #use_loops = False
+    # Get edges and edge features
     edge_index, edge_attr = get_edges(pos, r_link, use_loops)
-    #edge_index = get_edges(pos, r_link)
-    #edge_index = None
 
-    # get the graph
+    # Construct the graph
     graph = Data(x=x,
                  y=torch.tensor(y, dtype=torch.float32),
                  u=torch.tensor(u, dtype=torch.float32),
@@ -213,54 +177,8 @@ def sim_graph(simnumber,param_file,hparams):
                  edge_attr=torch.tensor(edge_attr, dtype=torch.float32))
 
     return graph
-######################################################################################
-"""
-######################################################################################
-# This routine creates the dataset for the considered mode
-# mode -------------> 'train', 'valid', 'test' or 'all'
-# seed -------------> random seed to split simulations among train/valid/test
-# sims -------------> total number of simulations
-# root -------------> folder containing all simulations with their galaxy catalogues
-# sim --------------> 'IllustrisTNG' or 'SIMBA'
-# suite ------------> 'LH' or 'CV'
-# number -----------> number of the simulation
-# snapnum ----------> snapshot number (choose depending of the desired redshift)
-# BoxSize ----------> size of the simulation box in Mpc/h
-# Nstar_th --> galaxies need to contain at least Nstar_th stars
-# k ----------------> number of neighbors
-# param_file -------> file with the value of the cosmo & astro parameters
-# batch_size -------> batch size
-# num_workers ------> number of workers to load the data
-# shuffle ----------> whether randomly shuffle the data in the data loader
-def create_dataset(mode, seed, sims, root, sim, suite, snapnum, BoxSize,
-                   Nstar_th, k, param_file, batch_size, num_workers=1,
-                   shuffle=True):
-
-
-
-    # get the offset and size of the considered mode
-    if   mode=='train':  offset, size = int(0.0*sims), int(0.8*sims)
-    elif mode=='valid':  offset, size = int(0.8*sims), int(0.1*sims)
-    elif mode=='test':   offset, size = int(0.9*sims), int(0.1*sims)
-    elif mode=='all':    offset, size = int(0.0*sims), int(1.0*sims)
-    else:                raise Exception('wrong mode!')
-
-    # randomly shuffle the simulations. Instead of 0 1 2 3...999 have a
-    # random permutation. E.g. 5 9 0 29...342
-    np.random.seed(seed)
-    numbers = np.arange(sims) #shuffle sims not maps
-    np.random.shuffle(numbers)
-    numbers = numbers[offset:offset+size] #select indexes of mode
-
-    # get the dataset
-    dataset = []
-    for i in numbers:
-        dataset.append(sim_graph(root,sim,suite,i,snapnum,BoxSize,
-                                 Nstar_th,k,param_file))
 
-    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
-                      num_workers=num_workers)
-"""
+
 # Split training and validation sets
 def split_datasets(dataset):
 
@@ -283,13 +201,9 @@ def split_datasets(dataset):
 ######################################################################################
 
 # Main routine to load data and create the dataset
-# simsuite: simulation suite, either "IllustrisTNG" or "SIMBA"
-# simset: set of simulations:
-#   CV: Use simulations with fiducial cosmological and astrophysical parameters, but different random seeds (27 simulations total)
-#   LH: Use simulations over latin-hypercube, varying over cosmological and astrophysical parameters, and different random seeds (1000 simulations total)
-# n_sims: number of simulations, maximum 27 for CV and 1000 for LH
 def create_dataset(hparams):
 
+    # Target file depending on the task: inferring cosmo parameters or predicting power spectrum
     if hparams.outmode == "cosmo":
         param_file = "/projects/QUIJOTE/CAMELS/Sims/CosmoAstroSeed_params_"+hparams.simsuite+".txt"
     elif hparams.outmode == "ps":
@@ -311,8 +225,8 @@ def create_dataset(hparams):
         # Add other snapshots from other redshifts
         # Snapshot redshift
         # 004: z=3, 010: z=2, 014: z=1.5, 018: z=1, 024: z=0.5, 033: z=0
-        for snap in [24,18,14,10]:
-        #for snap in [18,10]:
+        #for snap in [24,18,14,10]:
+        for snap in [18,10]:
 
             hparams.snap = str(snap)
 
diff --git a/Source/metalayer.py b/Source/metalayer.py
index d1bb669..fc50497 100644
--- a/Source/metalayer.py
+++ b/Source/metalayer.py
@@ -1,3 +1,9 @@
+#----------------------------------------------------
+# Graph Neural Network architecture
+# Author: Pablo Villanueva Domingo
+# Last update: 4/22
+#----------------------------------------------------
+
 import torch
 import torch.nn.functional as F
 from torch_cluster import knn_graph, radius_graph
@@ -5,11 +11,9 @@
 from torch_geometric.nn import MessagePassing, MetaLayer, LayerNorm
 from torch_scatter import scatter_mean, scatter_sum, scatter_max, scatter_min, scatter_add
 from torch_geometric.nn import global_mean_pool, global_max_pool, global_add_pool
-#import torch.utils.checkpoint
 from Source.constants import *
 
-#use_checkpoints = False
-
+# Model for updating edge attritbutes
 class EdgeModel(torch.nn.Module):
     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, residuals=True, norm=False):
         super().__init__()
@@ -34,11 +38,11 @@ def forward(self, src, dest, edge_attr, u, batch):
         out = torch.cat([src, dest, edge_attr], dim=1)
         #out = torch.cat([src, dest, edge_attr, u[batch]], 1)
         out = self.edge_mlp(out)
-        #out = torch.utils.checkpoint.checkpoint_sequential(self.edge_mlp, 2, out)
         if self.residuals:
             out = out + edge_attr
         return out
 
+# Model for updating node attritbutes
 class NodeModel(torch.nn.Module):
     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, residuals=True, norm=False):
         super().__init__()
@@ -59,24 +63,22 @@ def forward(self, x, edge_index, edge_attr, u, batch):
         # edge_attr: [E, F_e]
         # u: [B, F_u]
         # batch: [N] with max entry B - 1.
+
         row, col = edge_index
-        #out = torch.cat([x[row], edge_attr], dim=1)
-        #out = self.node_mlp_1(out)
         out = edge_attr
+
+        # Multipooling layer
         out1 = scatter_add(out, col, dim=0, dim_size=x.size(0))
         out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
         out3 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
         out = torch.cat([x, out1, out2, out3, u[batch]], dim=1)
-        #out = torch.cat([x, out], dim=1)
-        #out = self.node_mlp(out)
-        #if use_checkpoints:
-        #    out = torch.utils.checkpoint.checkpoint_sequential(self.node_mlp, 2, out)
-        #else:
+
         out = self.node_mlp(out)
         if self.residuals:
             out = out + x
         return out
 
+# First edge model for updating edge attritbutes when no initial node features are provided
 class EdgeModelIn(torch.nn.Module):
     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=False):
         super().__init__()
@@ -97,6 +99,7 @@ def forward(self, src, dest, edge_attr, u, batch):
 
         return out
 
+# First node model for updating node attritbutes when no initial node features are provided
 class NodeModelIn(torch.nn.Module):
     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=False):
         super().__init__()
@@ -113,31 +116,30 @@ def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=Fals
     def forward(self, x, edge_index, edge_attr, u, batch):
 
         row, col = edge_index
-
         out = edge_attr
-        #out = scatter_add(out, col, dim=0, dim_size=x.size(0))
+
+        # Multipooling layer
         out1 = scatter_add(out, col, dim=0, dim_size=x.size(0))
         out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
         out3 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
         out = torch.cat([out1, out2, out3, u[batch]], dim=1)
-        #out = torch.cat([out, u[batch]], dim=1)
 
         out = self.node_mlp(out)
 
         return out
 
-# Graph Neural Network architecture, based on the Interaction Network (arXiv:1612.00222, arXiv:2002.09405)
+# Graph Neural Network architecture, based on the Graph Network (arXiv:1806.01261)
+# Employing the MetaLayer implementation in Pytorch-Geometric
 class GNN(torch.nn.Module):
     def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out, only_positions, residuals=True):
         super().__init__()
 
         self.n_layers = n_layers
-        #self.loop = False
         self.link_r = linkradius
         self.dim_out = dim_out
         self.only_positions = only_positions
 
-        # Input node features (0 if only_positions is used)
+        # Number of input node features (0 if only_positions is used)
         node_in = node_features
         # Input edge features: |p_i-p_j|, p_i*p_j, p_i*(p_i-p_j)
         edge_in = 3
@@ -147,7 +149,7 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
 
         layers = []
 
-        # Encoder layer
+        # Encoder graph block
         # If use only positions, node features are created from the aggregation of edge attritbutes of neighbors
         if self.only_positions:
             inlayer = MetaLayer(node_model=NodeModelIn(node_in, node_out, edge_in, edge_out, hid_channels),
@@ -163,7 +165,7 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
         node_in = node_out
         edge_in = edge_out
 
-        # Hidden graph layers
+        # Hidden graph blocks
         for i in range(n_layers-1):
 
             lay = MetaLayer(node_model=NodeModel(node_in, node_out, edge_in, edge_out, hid_channels, residuals=residuals),
@@ -172,6 +174,7 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
 
         self.layers = ModuleList(layers)
 
+        # Final aggregation layer
         self.outlayer = Sequential(Linear(3*node_out+1, hid_channels),
                               ReLU(),
                               Linear(hid_channels, hid_channels),
@@ -180,41 +183,15 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
                               ReLU(),
                               Linear(hid_channels, self.dim_out))
 
-
-    def get_graph(self, data):
-
-        """pos = data.x[:,:3]
-
-        # Get edges
-        edge_index = radius_graph(pos, r=self.link_r, batch=data.batch, loop=self.loop)
-        #edge_index = data.edge_index
-
-        row, col = edge_index
-
-        # Edge features
-        diff = (pos[row]-pos[col])/self.link_r
-        edge_attr = torch.cat([diff, torch.norm(diff, dim=1, keepdim=True)], dim=1)"""
-
-        edge_index, edge_attr = data.edge_index, data.edge_attr
-
-        # Node features
-        if self.only_positions:
-            h = torch.zeros_like(data.x[:,0])
-        else:
-            h = data.x
-
-        return h, edge_index, edge_attr
-
-
     def forward(self, data):
-    #def forward(self, x, batch):
 
-        #h, edge_index, edge_attr = self.get_graph(data)
         h, edge_index, edge_attr, u = data.x, data.edge_index, data.edge_attr, data.u
 
+        # Message passing layers
         for layer in self.layers:
             h, edge_attr, _ = layer(h, edge_index, edge_attr, u, data.batch)
 
+        # Multipooling layer
         addpool = global_add_pool(h, data.batch)
         meanpool = global_mean_pool(h, data.batch)
         maxpool = global_max_pool(h, data.batch)
diff --git a/Source/networks.py b/Source/networks.py
deleted file mode 100644
index 14a6084..0000000
--- a/Source/networks.py
+++ /dev/null
@@ -1,208 +0,0 @@
-#----------------------------------------------------------------------
-# Definition of the neural network architectures
-# Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
-#----------------------------------------------------------------------
-
-import torch
-from torch.nn import Sequential, Linear, ReLU, ModuleList
-from torch_geometric.nn import MessagePassing, GCNConv, PPFConv, MetaLayer, EdgeConv
-import torch.nn.functional as F
-from torch_geometric.nn import global_mean_pool, global_max_pool, global_add_pool
-from torch_cluster import knn_graph, radius_graph
-from torch_scatter import scatter_mean, scatter_sum, scatter_add, scatter_max, scatter_min
-import numpy as np
-from Source.constants import *
-
-
-#------------------------------
-# Architectures considered:
-#   DeepSet
-#   Metalayer (graph network)
-#
-#-----------------------------
-
-#--------------------------------------------
-# Message passing architecture
-# See pytorch-geometric documentation for more info
-# pytorch-geometric.readthedocs.io/
-#--------------------------------------------
-
-
-# Node model for the MetaLayer
-class NodeModel(torch.nn.Module):
-    def __init__(self, in_channels, hidden_channels, latent_channels, link_r):
-        super(NodeModel, self).__init__()
-        #self.node_mlp_1 = Sequential(Linear(in_channels,hidden_channels),  LeakyReLU(0.2), Linear(hidden_channels,hidden_channels),LeakyReLU(0.2), Linear(mid_channels,out_channels))
-        #self.node_mlp_2 = Sequential(Linear(303,500), LeakyReLU(0.2), Linear(500,500),LeakyReLU(0.2), Linear(500,1))
-
-        if only_positions:
-            inchan = in_channels + 3
-            secchan = 3*latent_channels+in_channels
-        else:
-            inchan = (in_channels-3)*2+3
-            secchan = 3*latent_channels+2+in_channels-3
-
-
-        self.mlp1 = Sequential(Linear(inchan, hidden_channels),
-                              ReLU(),
-                              Linear(hidden_channels, hidden_channels),
-                              ReLU(),
-                              Linear(hidden_channels, latent_channels))
-
-        self.mlp2 = Sequential(Linear(secchan, hidden_channels),
-                              ReLU(),
-                              Linear(hidden_channels, hidden_channels),
-                              ReLU(),
-                              Linear(hidden_channels, latent_channels))
-
-        self.link_r = link_r
-
-    def forward(self, x, edge_index, edge_attr, u, batch):
-        # x: [N, F_x], where N is the number of nodes.
-        # edge_index: [2, E] with max entry N - 1.
-        # edge_attr: [E, F_e]
-        # u: [B, F_u]
-        # batch: [N] with max entry B - 1.
-        row, col = edge_index
-        #x_node, x_neigh = x[row], x[col]
-
-        #edge_attr = torch.sqrt(torch.sum((x[row,:3]-x[col,:3])**2.,axis=1))
-        #edge_attr = edge_attr.view(-1,1)
-        edge_attr = x[row,:3]-x[col,:3]
-
-        # correct distance for boundaries
-        for coord in range(3):
-            edge_attr[edge_attr[:,coord]>self.link_r,coord]-=1.
-            edge_attr[-edge_attr[:,coord]>self.link_r,coord]+=1.
-
-        # define interaction tensor; every pair contains features from input and
-        # output node together with
-        if only_positions:
-            out = torch.cat([x[row], edge_attr], dim=1)
-        else:
-            out = torch.cat([x[row,3:], x[col,3:], edge_attr], dim=1)
-
-        # take interaction feature tensor and embedd it into another tensor
-        #out = self.node_mlp_1(out)
-        out = self.mlp1(out)
-
-        # compute the mean,sum and max of each embed feature tensor for each node
-        out1 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
-        out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
-        out3 = scatter_add(out, col, dim=0, dim_size=x.size(0))
-        #out3 = scatter_min(out, col, dim=0, dim_size=x.size(0))[0]
-
-        # every node contains a feature tensor with the pooling of the messages from
-        # neighbors, its own state, and a global feature
-        if only_positions:
-            out = torch.cat([x, out1, out2, out3], dim=1)
-        else:
-            out = torch.cat([x[:,3:], out1, out2, out3, u[batch]], dim=1)
-        #print("node post", out.shape)
-
-        out = self.mlp2(out)
-
-        # In this way, it is traslational equivariant
-        out = torch.cat([x[:,:3], out], dim=1)
-
-        return out
-
-
-
-#--------------------------------------------
-# General Graph Neural Network architecture
-#--------------------------------------------
-class ModelGNN(torch.nn.Module):
-    def __init__(self, use_model, node_features, n_layers, link_r, hidden_channels=300, latent_channels=100, loop=True):
-        super(ModelGNN, self).__init__()
-
-        # Graph layers
-        layers = []
-        in_channels = node_features
-        for i in range(n_layers):
-
-            # Choose the model
-            if use_model=="DeepSet":
-                lay = Sequential(
-                    Linear(in_channels, hidden_channels),
-                	ReLU(),
-                	Linear(hidden_channels, hidden_channels),
-                	ReLU(),
-                	Linear(hidden_channels, latent_channels))
-
-
-            elif use_model=="MetaNet":
-                lay = MetaLayer(node_model=NodeModel(in_channels, hidden_channels, latent_channels, link_r))
-
-            else:
-                print("Model not known...")
-
-            layers.append(lay)
-            in_channels = latent_channels+3
-
-
-        self.layers = ModuleList(layers)
-
-
-        if only_positions:
-            lin_in = (in_channels-3)*3
-        else:
-            lin_in = (in_channels-3)*3+2
-
-        self.lin = Sequential(Linear(lin_in, 2*latent_channels),
-                              ReLU(),
-                              Linear(2*latent_channels, latent_channels),
-                              ReLU(),
-                              Linear(latent_channels, latent_channels),
-                              ReLU(),
-                              Linear(latent_channels, 4))
-
-        self.link_r = link_r
-        self.pooled = 0.
-        self.loop = loop
-        self.namemodel = use_model
-
-    def forward(self, data):
-
-        x, pos, batch, u = data.x, data.x[:,:3], data.batch, data.u
-
-        # Get edges using positions by computing the kNNs or the neighbors within a radius
-        #edge_index = knn_graph(pos, k=self.link_r, batch=batch, loop=self.loop)
-        #edge_index = radius_graph(pos, r=self.link_r, batch=batch, loop=self.loop)
-        edge_index = data.edge_index
-
-        #if self.namemodel=="MetaNet":
-        #    edge_attr = get_edge_attr(edge_index,pos)
-
-        # Start message passing
-        for layer in self.layers:
-            if self.namemodel=="DeepSet":
-                x = layer(x)
-            elif self.namemodel=="MetaNet":
-                x, dumb, u = layer(x, edge_index, None, u, batch)
-            x = x.relu()
-
-
-        # Mix different global pooling layers
-        addpool = global_add_pool(x[:,3:], batch) # [num_examples, hidden_channels]
-        meanpool = global_mean_pool(x[:,3:], batch)
-        maxpool = global_max_pool(x[:,3:], batch)
-
-        if only_positions:
-            self.pooled = torch.cat([addpool, meanpool, maxpool], dim=1)
-        else:
-            self.pooled = torch.cat([addpool, meanpool, maxpool, u], dim=1)
-
-        # Final linear layer
-        out = self.lin(self.pooled)
-
-        return out
-
-def get_edge_attr(edge_index,pos):
-
-    edge_attr = torch.zeros((edge_index.shape[1],1), dtype=torch.float32, device=device)
-    for i, nodein in enumerate(edge_index[0,:]):
-        nodeout = edge_index[1,i]
-        edge_attr[i,0]=torch.sqrt(torch.sum((pos[nodein]-pos[nodeout])**2.,axis=0))
-    return edge_attr
diff --git a/Source/plotting.py b/Source/plotting.py
index ec1221b..d6d75c3 100644
--- a/Source/plotting.py
+++ b/Source/plotting.py
@@ -1,7 +1,7 @@
 #----------------------------------------------------------------------
 # Script for plotting some statistics
 # Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
 #----------------------------------------------------------------------
 
 import matplotlib.pyplot as plt
@@ -25,6 +25,7 @@ def plot_losses(train_losses, valid_losses, test_loss, err_min, hparams):
     plt.savefig("Plots/loss_"+hparams.name_model()+".png", bbox_inches='tight', dpi=300)
     plt.close()
 
+# Remove normalization of cosmo parameters
 def denormalize(trues, outputs, errors, minpar, maxpar):
 
     trues = minpar + trues*(maxpar - minpar)
@@ -32,13 +33,12 @@ def denormalize(trues, outputs, errors, minpar, maxpar):
     errors = errors*(maxpar - minpar)
     return trues, outputs, errors
 
-# Scatter plot of true vs predicted properties
+# Scatter plot of true vs predicted cosmological parameter
 def plot_out_true_scatter(hparams, cosmoparam, testsuite = False):
 
-    figscat, axscat = plt.subplots()
+    figscat, axscat = plt.subplots(figsize=(6,5))
     suite, simset = hparams.simsuite, hparams.simset
     col = colorsuite(suite)
-    #if testsuite: col = colorsuite(hparams.flip_suite())
 
     # Load true values and predicted means and standard deviations
     outputs = np.load("Outputs/outputs_"+hparams.name_model()+".npy")
@@ -113,163 +113,105 @@ def plot_out_true_scatter(hparams, cosmoparam, testsuite = False):
     axscat.set_xlim([truemin, truemax])
     axscat.set_ylim([truemin, truemax])
     if testsuite:
-        axscat.set_ylim([truemin, truemax+0.3])
+        axscat.set_ylim([truemin, truemax+0.1])
     axscat.set_ylabel(r"Prediction")
     axscat.set_xlabel(r"Truth")
     axscat.grid()
 
     # Title, indicating which are the training and testing suites
     if hparams.only_positions:
-        endtitle = ", only positions"
+        usefeatures = "Only positions"
     else:
-        endtitle = ", galactic features"
+        usefeatures = r"Positions + $V_{\rm max}, M_*, R_*, Z_*$"
+    props = dict(boxstyle='round', facecolor='white')#, alpha=0.5)
+    axscat.text((minpar + maxpar)/2-0.02, minpar, usefeatures, color="k", bbox=props )
+
+
     if testsuite:
-        #titlefig = "Training in "+hparams.flip_suite()+" "+simset+", testing in "+suite+" "+simset
-        titlefig = "Cross test in "+suite+endtitle
+        titlefig = "Training in "+hparams.flip_suite()+", testing in "+suite
+        #titlefig = "Cross test in "+suite+usefeatures
         namefig = "out_true_testsuite_"+cosmoparam+"_"+hparams.name_model()
     else:
-        #titlefig = "Training in "+suite+" "+simset+", testing in "+suite+" "+simset
-        titlefig = "Train in "+suite+endtitle
+        titlefig = "Training in "+suite+", testing in "+suite
+        #titlefig = "Train in "+suite+usefeatures
         namefig = "out_true_"+cosmoparam+"_"+hparams.name_model()
     axscat.set_title(titlefig)
 
     figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
     plt.close(figscat)
 
-# Scatter plot of true vs predicted properties
-def plot_ps(hparams, testsuite = False):
+# Plot predicted and target power spectra
+def plot_ps(hparams):
+
+    figscat = plt.figure(figsize=(6,6))
+    gs = gridspec.GridSpec(2,1,height_ratios=[6,2])
+    gs.update(hspace=0.0)#,wspace=0.4,bottom=0.6,top=1.05)
+    axscat=plt.subplot(gs[0])
+    axerr=plt.subplot(gs[1])
 
-    figscat, axscat = plt.subplots(figsize=(8,8))
     suite, simset = hparams.simsuite, hparams.simset
     col = colorsuite(suite)
-    if testsuite: col = colorsuite(hparams.flip_suite())
 
     # Load true values and predicted means and standard deviations
     outputs = np.load("Outputs/outputsPS_"+hparams.name_model()+".npy")
     trues = np.load("Outputs/truesPS_"+hparams.name_model()+".npy")
-    #errors = np.load("Outputs/errors"+cosmoparam+"_"+hparams.name_model()+".npy")
 
     # There is a (0,0) initial point, fix it
     outputs = outputs[1:]
     trues = trues[1:]
-    #errors = errors[1:]
     trues, outputs = 10.**trues, 10.**outputs
 
-    """if cosmoparam=="Om":
-        minpar, maxpar = 0.1, 0.5
-    elif cosmoparam=="Sig":
-        minpar, maxpar = 0.6, 1.0
-    trues, outputs, errors = denormalize(trues, outputs, errors, minpar, maxpar)"""
-
-    # Compute the number of points lying within 1 or 2 sigma regions from their uncertainties
-    """cond_success_1sig, cond_success_2sig = np.abs(outputs-trues)<=np.abs(errors), np.abs(outputs-trues)<=2.*np.abs(errors)
-    tot_points = outputs.shape[0]
-    successes1sig, successes2sig = outputs[cond_success_1sig].shape[0], outputs[cond_success_2sig].shape[0]"""
-
     # Compute the linear correlation coefficient
     r2 = r2_score(trues,outputs)
     err_rel = np.mean(np.abs((trues - outputs)/(trues)))
-    #err_rel = np.mean(np.abs((trues - outputs)/(trues)), axis=0)
-    #chi2s = (outputs-trues)**2./errors**2.
-    #chi2 = chi2s[np.abs(errors)>0.01].mean()    # Remove some outliers which make explode the chi2
-    #chi2 = chi2s[chi2s<1.e4].mean()    # Remove some outliers which make explode the chi2
     print("R^2={:.2f}, Relative error={:.2e}".format(r2, err_rel))
-    #print("A fraction of succeses of", successes1sig/tot_points, "at 1 sigma,", successes2sig/tot_points, "at 2 sigmas")
-
-    # Sort by true value
-    #indsort = trues.argsort()
-    #print(indsort.shape)
-    #outputs, trues = outputs[indsort,:], trues[indsort,:]
 
-    # Compute mean and std region within several bins
-    """truebins, binsize = np.linspace(trues[0], trues[-1], num=10, retstep=True)
-    means, stds = [], []
-    for i, bin in enumerate(truebins[:-1]):
-        cond = (trues>=bin) & (trues<bin+binsize)
-        outbin = outputs[cond]
-        if len(outbin)==0:
-            outmean, outstd = np.nan, np.nan    # Avoid error message from some bins without points
-        else:
-            outmean, outstd = outbin.mean(), outbin.std()
-        means.append(outmean); stds.append(outstd)
-    means, stds = np.array(means), np.array(stds)
-    #means -= (truebins[:-1]+binsize/2.)
-    #axscat.fill_between(truebins[:-1]+binsize/2., means-stds, means+stds, color=col, alpha=0.2)
-    #axscat.plot(truebins[:-1]+binsize/2., means, color=col, linestyle="--")
-    print("Std in bins:",stds[~np.isnan(stds)].mean(),"Mean predicted uncertainty:", np.abs(errors.mean()))"""
-
-    # Take 200 elements to plot randomly chosen
-    npoints = 5    # 100
+    # Take 5 elements to plot randomly chosen
+    npoints = 5
     indexes = np.random.choice(trues.shape[0], npoints, replace=False)
     outputs = outputs[indexes]
     trues = trues[indexes]
-    #errors = errors[indexes]
     colors = ["r","b","g","orange","purple"]
 
-    # Plot predictions vs true values
-    #truemin, truemax = trues.min(), trues.max()
-    #axscat.plot([truemin, truemax], [0., 0.], "r-")
-    #axscat.errorbar(trues, outputs-trues, yerr=errors, color=col, marker="o", ls="none", markersize=0.5, elinewidth=0.5, zorder=10)
-    #axscat.errorbar(trues, outputs, yerr=errors, color=col, marker="o", ls="none", markersize=0.5, elinewidth=0.5, zorder=10)
-
-    #kvec = range(79)
-    #kvec = np.linspace(0.3,20.,num=trues[0].shape[0])
     kvec = np.loadtxt("PS_files/k_values.txt")
-    #print(trues.shape, outputs.shape)
 
     for i in range(npoints):
-        #print(i, trues[i,:].shape)
         axscat.plot(kvec,trues[i], color=colors[i], linestyle="-")
-        axscat.plot(kvec,outputs[i], color=colors[i], linestyle="--")
-
-    # Legend
-    """if cosmoparam=="Om":
-        par = "\t"+r"$\Omega_m$"
-    elif cosmoparam=="Sig":
-        par = "\t"+r"$\sigma_8$"
-    #leg = par+"\n"+"$R^2$={:.2f}".format(r2)+"\n"+"$\epsilon$={:.1f} %".format(100.*err_rel)+"\n"+"$\chi^2$={:.2f}".format(chi2)
-    leg = par+"\n"+"$R^2$={:.2f}".format(r2)+"\n"+"$\epsilon$={:.1f} %".format(100.*err_rel)#+"\n"+"$\chi^2$={:.2f}".format(chi2)
-    at = AnchoredText(leg, frameon=True, loc="upper left")
-    at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
-    axscat.add_artist(at)"""
+        axscat.plot(kvec,outputs[i], color=colors[i], linestyle=":")
 
     # Labels etc
-    #axscat.set_xlim([truemin, truemax])
-    #axscat.set_ylim([-1.,1.])
-    #axscat.set_ylabel(r"Prediction - Truth")
     axscat.set_yscale("log")
     axscat.set_xscale("log")
     axscat.set_ylabel(r"$P(k)$")
-    axscat.set_xlabel(r"$k$ [hMpc$^{-1}$]")
-    #plt.ylabel(r"log$_{10}\left[M_{h,infer}/(M_\odot/h)\right]$ - log$_{10}\left[M_{h,truth}/(M_\odot/h)\right]$")
-    #plt.xlabel(r"log$_{10}\left[M_{h,truth}/(M_\odot/h)\right]$")
-    #axscat.yaxis.set_major_locator(MultipleLocator(0.2))
-    axscat.grid()
+    axscat.grid(which="both", alpha=0.3)
 
     axscat.set_xlim(kvec[0],kvec[-1])
 
     customlegend = []
     customlegend.append( Line2D([0], [0], color="k", linestyle="-", lw=4, label=r"Truth") )
-    customlegend.append( Line2D([0], [0], color="k", linestyle="--", lw=4, label=r"GNN") )
+    customlegend.append( Line2D([0], [0], color="k", linestyle=":", lw=4, label=r"GNN") )
     axscat.legend(handles=customlegend, loc = "upper right")
 
-    # Title, indicating which are the training and testing suites
-    if testsuite:
-        titlefig = "Training in "+hparams.flip_suite()+" "+simset+", testing in "+suite+" "+simset
-        namefig = "ps_testsuite_"+hparams.name_model()
-    else:
-        titlefig = "Training in "+suite+" "+simset+", testing in "+suite+" "+simset
-        namefig = "ps_"+hparams.name_model()
-    axscat.set_title(titlefig)#"""
-    axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel))
+    errk = np.mean(np.abs((trues - outputs)/(trues)), axis=0)*100.
+    axerr.plot(kvec,errk, color="k", linestyle="-")
+
+    axerr.set_xlim(kvec[0],kvec[-1])
+    axerr.set_xscale("log")
+    axerr.set_ylabel(r"$\epsilon$ (%)")
+    axerr.set_xlabel(r"$k$ [hMpc$^{-1}$]")
 
+    axerr.yaxis.set_minor_locator(MultipleLocator(2.5))
+    axerr.grid(which="both", alpha=0.3)
+
+    axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel))
 
     figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
     plt.close(figscat)
 
+# Plot relative error of the power spectrum as a function of k
 def plot_relerr(hparams):
 
-    fig, ax = plt.subplots(figsize=(8,8))
+    fig, ax = plt.subplots(figsize=(6,5))
 
     outputs = np.load("Outputs/outputsPS_"+hparams.name_model()+".npy")
     trues = np.load("Outputs/truesPS_"+hparams.name_model()+".npy")
@@ -282,7 +224,6 @@ def plot_relerr(hparams):
     print("R2=",r2,"Errrel=",err_rel)
 
     errk = np.mean(np.abs((trues - outputs)/(trues)), axis=0)*100.
-    #kvec = np.linspace(0.3,20.,num=errk.shape[0])
     kvec = np.loadtxt("PS_files/k_values.txt")
     ax.plot(kvec, errk)
     #plt.title("R2={:.2f}, Errrel={:.2f} %".format(r2,err_rel))
diff --git a/Source/training.py b/Source/training.py
index 885ab1c..9f6cab3 100644
--- a/Source/training.py
+++ b/Source/training.py
@@ -1,11 +1,10 @@
 #----------------------------------------------------------------------
 # Routines for training and testing the GNNs
 # Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
 #----------------------------------------------------------------------
 
 from Source.constants import *
-from scipy.spatial.transform import Rotation as Rot
 
 
 # Training step
@@ -15,12 +14,6 @@ def train(loader, model, hparams, optimizer, scheduler):
     loss_tot = 0
     for data in loader:  # Iterate in batches over the training dataset.
 
-        # Rotate randomly for data augmentation
-        """rotmat = Rot.random().as_matrix()
-        data.x[:,:3] = torch.tensor([rotmat.dot(p) for p in data.x[:,:3]], dtype=torch.float32)
-        if not hparams.only_positions:
-            data.x[:,-3:] = torch.tensor([rotmat.dot(p) for p in data.x[:,-3:]], dtype=torch.float32)"""
-
         data.to(device)
         optimizer.zero_grad()  # Clear gradients.
         out = model(data)  # Perform a single forward pass.
@@ -34,6 +27,7 @@ def train(loader, model, hparams, optimizer, scheduler):
             loss_lfi = torch.mean(torch.sum(((y_out - data.y)**2. - err_out**2.)**2., axis=1) , axis=0)
             loss = torch.log(loss_mse) + torch.log(loss_lfi)
 
+        # Else, perform standard regression
         elif hparams.outmode=="ps":
             loss_mse = torch.mean(torch.sum((out - data.y)**2., axis=1) , axis=0)
             loss = torch.log(loss_mse)
@@ -49,12 +43,6 @@ def train(loader, model, hparams, optimizer, scheduler):
 def test(loader, model, hparams):
     model.eval()
 
-    outsOm = np.zeros((1))
-    truesOm = np.zeros((1))
-    yerrorsOm = np.zeros((1))
-    outsSig = np.zeros((1))
-    truesSig = np.zeros((1))
-    yerrorsSig = np.zeros((1))
     outsPS = np.zeros((1,ps_size))
     truesPS = np.zeros((1,ps_size))
 
@@ -92,15 +80,6 @@ def test(loader, model, hparams):
                 trueparams = np.append(trueparams, data.y.detach().cpu().numpy(), 0)
                 outparams = np.append(outparams, y_out.detach().cpu().numpy(), 0)
                 outerrparams  = np.append(outerrparams, err_out.detach().cpu().numpy(), 0)
-                """
-                truesOm = np.append(truesOm, data.y[:,0].detach().cpu().numpy(), 0)
-                outsOm = np.append(outsOm, y_out[:,0].detach().cpu().numpy(), 0)
-                yerrorsOm = np.append(yerrorsOm, err_out[:,0].detach().cpu().numpy(), 0)
-                if hparams.pred_params==2:
-                    truesSig = np.append(truesSig, data.y[:,1].detach().cpu().numpy(), 0)
-                    outsSig = np.append(outsSig, y_out[:,1].detach().cpu().numpy(), 0)
-                    yerrorsSig = np.append(yerrorsSig, err_out[:,1].detach().cpu().numpy(), 0)
-                """
 
             elif hparams.outmode=="ps":
                 # Append true values and predictions
@@ -113,29 +92,17 @@ def test(loader, model, hparams):
         np.save("Outputs/trues_"+hparams.name_model()+".npy",trueparams)
         np.save("Outputs/outputs_"+hparams.name_model()+".npy",outparams)
         np.save("Outputs/errors_"+hparams.name_model()+".npy",outerrparams)
-        """
-        np.save("Outputs/outputsOm_"+hparams.name_model()+".npy",outsOm)
-        np.save("Outputs/truesOm_"+hparams.name_model()+".npy",truesOm)
-        np.save("Outputs/errorsOm_"+hparams.name_model()+".npy",yerrorsOm)
-        if hparams.pred_params==2:
-            np.save("Outputs/outputsSig_"+hparams.name_model()+".npy",outsSig)
-            np.save("Outputs/truesSig_"+hparams.name_model()+".npy",truesSig)
-            np.save("Outputs/errorsSig_"+hparams.name_model()+".npy",yerrorsSig)
-        """
 
     elif hparams.outmode=="ps":
         np.save("Outputs/outputsPS_"+hparams.name_model()+".npy",outsPS)
         np.save("Outputs/truesPS_"+hparams.name_model()+".npy",truesPS)
 
-
-
     return loss_tot/len(loader), np.array(errs).mean(axis=0)
 
 # Training procedure
 def training_routine(model, train_loader, test_loader, hparams, verbose=True):
 
-    #use_model, learning_rate, weight_decay, n_layers, k_nn, n_epochs, training, simsuite, simset, n_sims = hparams
-
+    # Define optimizer and learning rate cyclic scheduler
     optimizer = torch.optim.Adam(model.parameters(), lr=hparams.learning_rate, weight_decay=hparams.weight_decay)
     scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=hparams.learning_rate, max_lr=1.e-3, cycle_momentum=False)
 
diff --git a/crosstest.py b/crosstest.py
index 38be2df..e6c503f 100644
--- a/crosstest.py
+++ b/crosstest.py
@@ -1,7 +1,7 @@
 #------------------------------------------------
 # Test a model already trained
 # Author: Pablo Villanueva Domingo
-# Last update: 5/11/21
+# Last update: 3/21
 #------------------------------------------------
 
 from main import *
diff --git a/hyperparameters.py b/hyperparameters.py
index df36dd1..dc4cdd4 100644
--- a/hyperparameters.py
+++ b/hyperparameters.py
@@ -1,5 +1,10 @@
+#----------------------------------------------------
+# Hyperparameters definition
+# Author: Pablo Villanueva Domingo
+# Last update: 4/22
+#----------------------------------------------------
 
-
+# Hyperparameters class
 class hyperparameters():
     def __init__(self, outmode, only_positions, learning_rate, weight_decay, n_layers, hidden_channels, r_link, n_epochs, simsuite, simset="LH", n_sims=1000, training=True, pred_params=2):
 
@@ -49,14 +54,32 @@ def flip_suite(self):
         return new_simsuite
 
 
+#--- HYPERPARAMETER CHOICES ---#
+
+#"""
+# IllustrisTNG best model
 hparams = hyperparameters(outmode = "cosmo",                        # Choose the output to be predicted, either the cosmological parameters ("cosmo") or the power spectrum ("ps")
                           only_positions = 0,                       # 1 for using only positions as features, 0 for using additional galactic features
-                          learning_rate = 0.0005061466694296257,    # Learning rate
+                          learning_rate = 1.619e-07,                # Learning rate
                           weight_decay = 1.e-07,                    # Weight decay
-                          n_layers = 1,                             # Number of hidden graph layers
-                          r_link = 0.06952349951836213,             # Linking radius
-                          hidden_channels = 128,                    # Hidden channels
+                          n_layers = 2,                             # Number of hidden graph layers
+                          r_link = 0.015,                           # Linking radius
+                          hidden_channels = 64,                     # Hidden channels
                           n_epochs = 300,                           # Number of epochs
                           simsuite = "IllustrisTNG",                # Simulation suite, choose between "IllustrisTNG" and "SIMBA"
                           pred_params = 1                           # Number of cosmo/astro params to be predicted, starting from Omega_m, sigma_8, etc. (Only for outmode = "cosmo")
                           )
+"""
+# SIMBA best model
+hparams = hyperparameters(outmode = "cosmo",                        # Choose the output to be predicted, either the cosmological parameters ("cosmo") or the power spectrum ("ps")
+                          only_positions = 0,                       # 1 for using only positions as features, 0 for using additional galactic features
+                          learning_rate = 1.087e-06,                # Learning rate
+                          weight_decay = 1.e-07,                    # Weight decay
+                          n_layers = 4,                             # Number of hidden graph layers
+                          r_link = 0.0148,                          # Linking radius
+                          hidden_channels = 64,                     # Hidden channels
+                          n_epochs = 300,                           # Number of epochs
+                          simsuite = "SIMBA",                       # Simulation suite, choose between "IllustrisTNG" and "SIMBA"
+                          pred_params = 1                           # Number of cosmo/astro params to be predicted, starting from Omega_m, sigma_8, etc. (Only for outmode = "cosmo")
+                          )
+#"""
diff --git a/hyperparams_optimization.py b/hyperparams_optimization.py
index 5f5141d..aa77476 100644
--- a/hyperparams_optimization.py
+++ b/hyperparams_optimization.py
@@ -1,7 +1,7 @@
 #----------------------------------------------------------------------
 # Script for optimizing the hyperparameters of the network using optuna
 # Author: Pablo Villanueva Domingo
-# Last update: 14/5/21
+# Last update: 4/22
 #----------------------------------------------------------------------
 
 import optuna
@@ -22,10 +22,10 @@ def objective(trial):
     # Hyperparameters to optimize
     learning_rate = trial.suggest_float("learning_rate", 1e-8, 1e-4, log=True)
     #weight_decay = trial.suggest_float("weight_decay", 1e-8, 1e-6, log=True)
+    weight_decay = 1.e-7
     n_layers = trial.suggest_int("n_layers", 1, 5)
     hidden_channels = trial.suggest_categorical("hidden_channels", [64, 128, 256])
     r_link = trial.suggest_float("r_link", 5.e-3, 5.e-2, log=True)
-    weight_decay = 1.e-7
 
     # Some verbose
     print('\nTrial number: {}'.format(trial.number))
@@ -48,6 +48,7 @@ def objective(trial):
     hparams.simset = simset
     hparams.n_sims = n_sims
 
+    # Run main routine
     min_test_loss = main(hparams, verbose = False)
 
     if torch.cuda.is_available():
@@ -56,6 +57,7 @@ def objective(trial):
     return min_test_loss
 
 
+#--- MAIN ---#
 
 if __name__ == "__main__":
 
@@ -67,19 +69,18 @@ def objective(trial):
 
     # Optuna parameters
     storage = "sqlite:///"+os.getcwd()+"/optuna_"+simsuite+"_"+simset
-    #storage = "sqlite:///optuna_"+simsuite+"_"+simset
     study_name = "gnn"
     n_trials   = 30
 
+    # Define sampler and start optimization
     sampler = optuna.samplers.TPESampler(n_startup_trials=10)
     study = optuna.create_study(study_name=study_name, sampler=sampler, storage=storage, load_if_exists=True)
     study.optimize(objective, n_trials, gc_after_trial=True)
 
+    # Print info for best trial
     print("Best trial:")
     trial = study.best_trial
-
     print("  Value: ", trial.value)
-
     print("  Params: ")
     for key, value in trial.params.items():
         print("    {}: {}".format(key, value))
@@ -89,6 +90,7 @@ def objective(trial):
     hparams.hidden_channels = trial.params["hidden_channels"]
     hparams.r_link = trial.params["r_link"]
 
+    # Save best model and plots
     if not os.path.exists("Best"):
         os.mkdir("Best")
     # Change nominal suite to read correct files (actually in ps mode both suites are employed)
@@ -105,7 +107,7 @@ def objective(trial):
         if os.path.exists(file):
             os.system("cp "+file+" Best/.")
 
-    # Visualization of results
+    # Visualization of optimization results
     fig = plot_optimization_history(study)
     fig.write_image("Plots/optuna_optimization_history_"+simsuite+".png")
 
diff --git a/main.py b/main.py
index 2e4a44e..0d63ffa 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,7 @@
 #----------------------------------------------------
 # Main routine for training and testing GNN models
 # Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
 #----------------------------------------------------
 
 import time, datetime, psutil
@@ -15,9 +15,6 @@
 # If testsuite==True, it takes a model already pretrained in the other suite and tests it in the selected one
 def main(hparams, verbose = True, testsuite = False):
 
-    hparams.n_sims = 30
-    hparams.n_epochs = 5
-
     # Load data and create dataset
     dataset = create_dataset(hparams)
     node_features = dataset[0].x.shape[1]
@@ -79,10 +76,7 @@ def main(hparams, verbose = True, testsuite = False):
 
     # Plot power spectrum and relative error
     elif hparams.outmode=="ps":
-
-        plot_relerr(hparams)
-
-        plot_ps(hparams, testsuite)
+        plot_ps(hparams)
 
     return test_loss
 
diff --git a/ps_test.py b/ps_test.py
index 5fe3635..0150199 100644
--- a/ps_test.py
+++ b/ps_test.py
@@ -1,10 +1,10 @@
 #----------------------------------------------------
-# Main routine for training and testing GNN models
+# Compute the power spectrum of different point distirbutions with the GNN trained in CAMELS
 # Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
 #----------------------------------------------------
 
-import time, datetime, psutil
+import time, datetime
 from Source.metalayer import *
 from Source.training import *
 from Source.plotting import *
@@ -14,21 +14,19 @@
 import MAS_library as MASL
 import Pk_library as PKL
 
-vol = (boxsize/1.e3)**3.    # (Mpc/h)^3
-
-#simtype = "NeymanScott"
-#simtype = "Poisson"
-
-# power spectrum parameters
+# Power spectrum parameters
 BoxLen = 25.0
 grid    = 512
 MAS     = 'CIC'
 kmax    = 20.0 #h/Mpc
 axis    = 0
 threads = 28
+vol = (boxsize/1.e3)**3.    # (Mpc/h)^3
 
-#--- Point processes ---#
 
+#--- POINT DISTRIBUTIONS ---#
+
+# Poisson point process
 def poisson_process(num_points):
 
     #pos = torch.rand((num_points,3))
@@ -36,39 +34,34 @@ def poisson_process(num_points):
 
     return pos
 
-# Generate a Neyman-Scott process with a gaussian kernel (Thomas point process)
+# Neyman-Scott process with a gaussian kernel (Thomas point process)
 # Based on https://hpaulkeeler.com/simulating-a-thomas-cluster-point-process/
 def neynmanscott_process(num_parents, num_daughters, sigma):
 
     # Generate parents
     x_par = poisson_process(num_parents)
 
-    #plt.scatter(x_par[:,0], x_par[:,1],color="r")
-
-    # Simulate Poisson point process for the daughters (ie final point process)
+    # Simulate Poisson point process for the daughters
     numbPointsDaughter = np.random.poisson(num_daughters, x_par.shape[0])
-    numbPoints = sum(numbPointsDaughter) # total number of points
+    numbPoints = sum(numbPointsDaughter)
 
-    # Generate the (relative) locations in Cartesian coordinates by
-    # simulating independent normal variables
+    # Generate the relative locations as independent normal variables
     x_daug = np.random.normal(0, sigma, size=(numbPoints,3)) # (relative) x coordinaets
 
-    # replicate parent points (ie centres of disks/clusters)
+    # Replicate parent points (ie centres of disks/clusters) and center daughters around them
     xx = np.repeat(x_par, numbPointsDaughter, axis=0)
-
-    # translate points (ie parents points are the centres of cluster disks)
     xx = xx + x_daug
 
-    # thin points if outside the simulation window
+    # Retain only those points inside simulation window
     booleInside=((xx[:,0]>=0)&(xx[:,0]<=1)&(xx[:,1]>=0)&(xx[:,1]<=1)&(xx[:,2]>=0)&(xx[:,2]<=1))
-    # retain points inside simulation window
     xx = xx[booleInside]
 
     return xx
 
-# Soneira-Peebles point process
+# Soneira-Peebles point process (Soneira & Peebles 1977, 1978)
 def soneira_peebles_model(lamb, eta, n_levels, R0):
 
+    # Radius for first level
     Rparent = R0
 
     # Generate parents
@@ -76,16 +69,14 @@ def soneira_peebles_model(lamb, eta, n_levels, R0):
     num_parents = eta
     xparents = poisson_process(num_parents)
 
-    #plt.scatter(xparents[:,0], xparents[:,1],s=2.,color="r",alpha=0.5)
-
     xtot = []
     xtot.extend(xparents)
 
+    # Iterate over each level
     for n in range(2,n_levels+1):
         Rparent = Rparent/lamb
         pointsx = []
 
-        #for i, j, in zip(xparents, yparents):
         for ipar in range(len(xparents)):
 
             num_points = np.random.poisson(eta)
@@ -98,28 +89,31 @@ def soneira_peebles_model(lamb, eta, n_levels, R0):
 
     xx = np.array(xtot)
 
-    # retain points inside simulation window
+    # Retain only those points inside simulation window
     booleInside=((xx[:,0]>=0)&(xx[:,0]<=1)&(xx[:,1]>=0)&(xx[:,1]<=1)&(xx[:,2]>=0)&(xx[:,2]<=1))
     xx = xx[booleInside]
 
     return xx
 
+#--- OTHER ROUTINES ---#
+
+# Routine to compute the power spectrum using Pylians
 def compute_ps(pos):
 
     pos = pos.cpu().detach().numpy()
 
     pos = pos*BoxLen
 
-    # construct galaxy 3D density field
+    # Construct galaxy 3D density field
     delta = np.zeros((grid,grid,grid), dtype=np.float32)
     MASL.MA(pos, delta, BoxLen, MAS, verbose=False)
     delta /= np.mean(delta, dtype=np.float64)
     delta -= 1.0
 
-    # compute Pk
+    # Compute the power spectrum
     Pk  = PKL.Pk(delta, BoxLen, axis, MAS, threads, verbose=False)
     k   = Pk.k3D
-    Pk0 = Pk.Pk[:,0] #monopole
+    Pk0 = Pk.Pk[:,0] # Monopole
 
     indexes = np.where(k<kmax)[0]
     return k[indexes], Pk0[indexes]
@@ -127,91 +121,77 @@ def compute_ps(pos):
 # Scatter plot of true vs predicted properties
 def plot_ps_test(hparams, trues, outputs, ktrue, anals):
 
-    figscat, axscat = plt.subplots()
+    figscat, axscat = plt.subplots(figsize=(6,5))
     suite, simset = hparams.simsuite, hparams.simset
     col = colorsuite(suite)
 
-    outputs = 10.**outputs
-
-    #print(trues.shape,outputs.shape)
-
     # Compute the linear correlation coefficient
     r2 = r2_score(trues,outputs)
     #r2=0.
     err_rel = np.mean(np.abs((trues - outputs)/(trues)))
     print("R2",r2,"Rel Error",err_rel)
 
-    indup = 4
-    r2_up = r2_score(trues[indup:],outputs[indup:])
-    #r2=0.
-    err_rel_up = np.mean(np.abs((trues[indup:] - outputs[indup:])/(trues[indup:])))
-
-
-    # Take 200 elements to plot randomly chosen
-    npoints = 5    # 100
+    # Take 5 samples to plot randomly chosen
+    npoints = 5
     indexes = np.random.choice(trues.shape[0], npoints, replace=False)
     outputs = outputs[indexes]
     trues = trues[indexes]
-    #errors = errors[indexes]
     colors = ["r","b","g","orange","purple","cyan","m"]
 
-    #kvec = np.linspace(0.3,20.,num=trues[0].shape[0])
     kvec = np.loadtxt("PS_files/k_values.txt")
-    #print(trues.shape, outputs.shape)
 
     for i in range(npoints):
-        #print(i, trues[i,:].shape)
-        #axscat.plot(kvec,10.**trues[i], color="r", linestyle="-")
-        #axscat.plot(kvec,10.**outputs[i], color="b", linestyle="--")
-        #axscat.plot(kvec,10.**trues[i], color=colors[i], linestyle="-")
-        #axscat.plot(ktrue,10.**trues[i], color=colors[i], linestyle="-")
         axscat.plot(ktrue,trues[i], color=colors[i], linestyle="-")
-        axscat.plot(kvec,outputs[i], color=colors[i], linestyle="--")
-        #axscat.plot(kvec,anals[i], color=colors[i], linestyle=":")
+        axscat.plot(kvec,outputs[i], color=colors[i], linestyle=":")
 
     axscat.set_yscale("log")
     axscat.set_xscale("log")
     axscat.set_ylabel(r"$P(k)$")
     axscat.set_xlabel(r"$k$ [hMpc$^{-1}$]")
-    #plt.ylabel(r"log$_{10}\left[M_{h,infer}/(M_\odot/h)\right]$ - log$_{10}\left[M_{h,truth}/(M_\odot/h)\right]$")
-    #plt.xlabel(r"log$_{10}\left[M_{h,truth}/(M_\odot/h)\right]$")
-    #axscat.yaxis.set_major_locator(MultipleLocator(0.2))
     axscat.grid()
+    axscat.set_xlim(kvec[0],kvec[-1])
 
-    #axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel)+" ("+r"$R^2$={:.2f}".format(r2_up)+"$, \epsilon$={:.2f} %".format(100.*err_rel_up)+")")
-    axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel))
+    customlegend = []
+    customlegend.append( Line2D([0], [0], color="k", linestyle="-", lw=4, label=r"Truth") )
+    customlegend.append( Line2D([0], [0], color="k", linestyle=":", lw=4, label=r"GNN") )
+    axscat.legend(handles=customlegend, loc = "upper right")
 
+    axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel))
 
-    titlefig = "Training in "+suite+" "+simset+", testing in "+suite+" "+simset
     namefig = "test_ps_"+simtype+"_"+hparams.name_model()
-    #axscat.set_title(titlefig)
-
     figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
     plt.close(figscat)
 
+# Generate synthetic galaxy catalogue as point process
 def generate_sim(num_points, hparams, simtype):
 
     if simtype=="Poisson":
         pos = poisson_process(num_points)
+        print("Points",num_points)
 
     elif simtype=="NeymanScott":
         sigma = np.random.uniform(0.01,0.1)
         pos = neynmanscott_process(num_parents=int(np.sqrt(num_points)), num_daughters=int(np.sqrt(num_points)), sigma=sigma)
+        print("Points",num_points,"Sigma",sigma)
 
     elif simtype=="SoneiraPeebles":
-        pos = soneira_peebles_model(lamb=2, eta=6, n_levels=4, R0=.3)
+        n_levels = np.random.randint(4, 6)
+        eta = int(num_points**(1/n_levels))
+        lamb = np.random.uniform(1.5,3.)
+        pos = soneira_peebles_model(lamb=lamb, eta=eta, n_levels=n_levels, R0=.3)
+        print("Points",num_points,"eta",eta,"levels",n_levels,"lambda",lamb)
 
     pos = torch.tensor(pos, dtype=torch.float32)
 
     edge_index, edge_attr = get_edges(pos, hparams.r_link, use_loops=False)
 
-    x = torch.zeros_like(pos[:,:1], dtype=torch.float32)
+    x = torch.zeros_like(pos[:,:1], dtype=torch.float32)    # Not used, only for consistency
 
     y = torch.ones((1,ps_size))*vol/num_points
 
     u = torch.tensor(np.log10(pos.shape[0]), dtype=torch.float32).reshape(1,1)
 
-    # get the graph
+    # Get the graph
     graph = Data(x=x,
                  y=y,
                  edge_index=torch.tensor(edge_index, dtype=torch.long),
@@ -226,17 +206,14 @@ def plot_pointprocess(simtype):
 
     num_sims = 10
     num_gals = np.random.randint(500,900,size=num_sims)
-    #dataset = []
 
     for i in range(num_sims):
         data = generate_sim(num_gals[i], hparams, simtype)
         data.x = data.pos
         edge_index = radius_graph(data.pos, r=hparams.r_link, loop=False)
         visualize_graph(data, simtype+"_"+str(i), 0.1, "3d", edge_index)
-        #dataset.append(data)
 
-# Main routine to train the neural net
-# If testsuite==True, it takes a model already pretrained in the other suite and tests it in the selected one
+# Routine to predict the power spectrum using the pretrained GNN
 def test_ps(hparams, simtype):
 
     hparams.outmode = "ps"
@@ -244,7 +221,6 @@ def test_ps(hparams, simtype):
     dim_out=ps_size
 
     # Initialize model
-    #model = ModelGNN(use_model, node_features, n_layers, r_link)
     model = GNN(node_features=0,
                 n_layers=hparams.n_layers,
                 hidden_channels=hparams.hidden_channels,
@@ -258,9 +234,8 @@ def test_ps(hparams, simtype):
     state_dict = torch.load("Models/"+hparams.name_model(), map_location=device)
     model.load_state_dict(state_dict)
 
-    num_sims = 20
-    num_gals = np.random.randint(500,900,size=num_sims)
-    #num_gals = np.random.randint(10,100,size=num_sims)
+    num_sims = 50
+    num_gals = np.random.randint(700,1200,size=num_sims)
     dataset = []
 
     trues = []
@@ -268,45 +243,36 @@ def test_ps(hparams, simtype):
     anals = []
 
     for i in range(num_sims):
+
         data = generate_sim(num_gals[i], hparams, simtype)
-        dataset.append(data)
+        loader = DataLoader([data], batch_size=1, shuffle=True)
+
+        for data in loader:
 
-    loader = DataLoader(dataset, batch_size=1, shuffle=True)
+            # Get model prediction
+            data.to(device)
+            out = model(data)
+            out = 10.**out
+            outs.append(out.cpu().detach().numpy())
 
-    for data in loader:
-        data.to(device)
-        out = model(data)
-        outs.append(out.cpu().detach().numpy())
+            # Get target power spectrum
+            ktrue, ps_true = compute_ps(data.pos)
+            trues.append( ps_true )
 
-        #ps_true, ktrue = pbox.tools.get_power(data.x.cpu().detach().numpy(), boxsize/1.e3 ,N=data.x.shape[0],bins=ps_size,remove_shotnoise=False)
-        #print(ps_true)
-        ktrue, ps_true = compute_ps(data.pos)
-        trues.append( ps_true )
+            anals.append( data.y.cpu().detach().numpy() )
 
-        anals.append( data.y.cpu().detach().numpy() )
+            print(i,"Err rel={:.3e}".format( np.mean(np.abs((ps_true - out.cpu().detach().numpy())/(ps_true)))) )
 
     hparams.simsuite = hparams.flip_suite()
 
 
     trues = np.array(trues)
-    #trues = np.array(trues).reshape((num_sims,ps_size))
     outs = np.array(outs).reshape((num_sims,ps_size))
     anals = np.array(anals).reshape((num_sims,ps_size))
 
     plot_ps_test(hparams, trues, outs, ktrue, anals)
 
 
-    # Plot sample distribution
-    #data = dataset[0]
-    """for j in range(10):
-        data = dataset[j]
-        data.x = data.pos
-        edge_index = radius_graph(data.pos, r=hparams.r_link, loop=False)
-        visualize_graph(data, simtype+"_"+str(j), 0.1, "3d", edge_index)"""
-
-
-
-
 
 #--- MAIN ---#
 
@@ -322,7 +288,6 @@ def test_ps(hparams, simtype):
     from hyperparameters import hparams
 
     for simtype in ["Poisson", "NeymanScott","SoneiraPeebles"]:
-    #for simtype in ["SoneiraPeebles"]:
 
         test_ps(hparams, simtype)
 
diff --git a/visualize_graphs.py b/visualize_graphs.py
index 2461441..4e4ebfc 100644
--- a/visualize_graphs.py
+++ b/visualize_graphs.py
@@ -1,7 +1,7 @@
 #----------------------------------------------------------------------
-# Script to visualize halos as graphs
+# Script to visualize galaxy catalogues as graphs
 # Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
 #----------------------------------------------------------------------
 
 import time, datetime
@@ -44,24 +44,17 @@ def visualize_graph(data, ind, sizes=0.1, projection="3d", edge_index=None):
     elif projection=="2d":
         ax.scatter(pos[:, 0], pos[:, 1], s=sizes, zorder=1000, alpha=0.5)
 
-    #plt.axis('off')
-    """
-    ax.set_xticks([])
-    ax.set_yticks([])
-    ax.set_zticks([])
-    """
     ax.xaxis.set_tick_params(labelsize=fontsize)
     ax.yaxis.set_tick_params(labelsize=fontsize)
     ax.zaxis.set_tick_params(labelsize=fontsize)
-    #"""
 
     fig.savefig("Plots/visualize_graph_"+str(ind), bbox_inches='tight', dpi=300)
     plt.close(fig)
 
-
+# Plot the degree distribution of the graph (see e.g. http://networksciencebook.com/)
 def plot_degree_distribution(degrees):
 
-    listbins = np.linspace(0,80,num=12)#np.logspace(0,2,num=20)
+    listbins = np.linspace(0,80,num=12)
     deg_dist = []
 
     for array in degrees:
@@ -93,7 +86,8 @@ def display_graphs(simsuite, n_sims, r_link, simset="LH", showgraph=True, get_de
     for simnumber in range(n_sims):
         simpath = simpathroot + simsuite + "/"+simset+"_"
         catalogue = simpath + str(simnumber)+"/fof_subhalo_tab_033.hdf5"
-        # read the catalogue
+
+        # Read the catalogue
         f     = h5py.File(catalogue, 'r')
         pos   = f['/Subhalo/SubhaloPos'][:]/boxsize
         Nstar = f['/Subhalo/SubhaloLenType'][:,4]       #number of stars
@@ -111,7 +105,7 @@ def display_graphs(simsuite, n_sims, r_link, simset="LH", showgraph=True, get_de
 
         if showgraph:
             #visualize_graph(data, simnumber, "2d", edge_index)
-            visualize_graph(data, simnumber, 0.5*data.x[:,3], "3d", data.edge_index)
+            visualize_graph(data, simnumber, projection="3d", edge_index=data.edge_index)
 
         if get_degree:
             degrees.append( degree(edge_index[0], data.num_nodes).numpy() )