diff --git a/.gitignore b/.gitignore
index 37afa5d..814be2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
batchfile.slurm
crosstest_batchfile.slurm
tunning_batchfile.slurm
-__pycache__/
\ No newline at end of file
+__pycache__/
+plotresults.py
diff --git a/.gitignore~ b/.gitignore~
new file mode 100644
index 0000000..37afa5d
--- /dev/null
+++ b/.gitignore~
@@ -0,0 +1,5 @@
+.DS_Store
+batchfile.slurm
+crosstest_batchfile.slurm
+tunning_batchfile.slurm
+__pycache__/
\ No newline at end of file
diff --git a/README.md b/README.md
index 57ecfee..5145bb5 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ See the paper [arXiv:2204.xxxxx](https://arxiv.org/abs/2204.xxxxx) for more deta
-## Scripts
+## Codes
Here is a brief description of the codes included:
@@ -51,13 +51,18 @@ The libraries required for training the models and compute some statistics are:
* `scipy`
* `sklearn`
* `optuna` (only for optimization in `hyperparams_optimization.py`)
+* [`Pylians`](https://pylians3.readthedocs.io/en/master/) (only for computing power spectra in `ps_test.py`)
## Usage
+The codes implemented here are designed to train GNNs for two tasks. The desired task is chosen in `hyperparameters.py` with the `outmode` flag:
+1. Infer cosmological parameters from galaxy catalogues. Set `outmode = "cosmo"`.
+2. Predict the power spectrum from galaxy catalogues. Set `outmode = "ps"`.
+
These are some advices to employ the scripts described above:
1. To perform a search of the optimal hyperparameters, run `hyperparams_optimization.py`.
-2. To train a model with a given set of parameters defined in `hyperparameters.py`, run `main.py`. The hyperparameters currently present in `hyperparameters.py` correspond to the best optimal values for each suite when all galactic features are employed (see the paper).
+2. To train a model with a given set of parameters defined in `hyperparameters.py`, run `main.py`. The hyperparameters currently present in `hyperparameters.py` correspond to the best optimal values for each suite when all galactic features are employed (see the paper). Modify it accordingly to the task.
3. Once a model is trained, run `crosstest.py` to test in the training simulation suite and cross test it in the other one included in CAMELS (IllustrisTNG and SIMBA).
diff --git a/Source/constants.py b/Source/constants.py
index 9ad75b2..cfb6cbf 100644
--- a/Source/constants.py
+++ b/Source/constants.py
@@ -1,7 +1,7 @@
#----------------------------------------------------------------------
# List of constants and some common functions
# Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
#----------------------------------------------------------------------
import numpy as np
@@ -39,15 +39,12 @@
# Batch size
batch_size = 25
-
-
# Number of k bins in the power spectrum
ps_size = 79
#--- FUNCTIONS ---#
-
# Choose color depending on the CAMELS simulation suite
def colorsuite(suite):
if suite=="IllustrisTNG": return "purple"
- elif suite=="SIMBA": return "deepskyblue"
+ elif suite=="SIMBA": return "dodgerblue"
diff --git a/Source/load_data.py b/Source/load_data.py
index 6d667cf..f272989 100644
--- a/Source/load_data.py
+++ b/Source/load_data.py
@@ -1,3 +1,9 @@
+#----------------------------------------------------
+# Routine for loading the CAMELS galaxy catalogues
+# Author: Pablo Villanueva Domingo
+# Last update: 4/22
+#----------------------------------------------------
+
import h5py
from torch_geometric.data import Data, DataLoader
from Source.constants import *
@@ -6,6 +12,7 @@
Nstar_th = 20 # Minimum number of stellar particles required to consider a galaxy
+# Normalize CAMELS parameters
def normalize_params(params):
minimum = np.array([0.1, 0.6, 0.25, 0.25, 0.5, 0.5])
@@ -13,13 +20,17 @@ def normalize_params(params):
params = (params - minimum)/(maximum - minimum)
return params
+# Normalize power spectrum
def normalize_ps(ps):
mean, std = ps.mean(axis=0), ps.std(axis=0)
normps = (ps - mean)/std
return normps
+# Compute KDTree and get edges and edge features
def get_edges(pos, r_link, use_loops):
+ # 1. Get edges
+
# Create the KDTree and look for pairs within a distance r_link
# Boxsize normalize to 1
kd_tree = SS.KDTree(pos, leafsize=16, boxsize=1.0001)
@@ -37,35 +48,36 @@ def get_edges(pos, r_link, use_loops):
edge_index = edge_index.reshape((2,-1))
num_pairs = edge_index.shape[1]
- # Edge attributes
+ # 2. Get edge attributes
+
row, col = edge_index
diff = pos[row]-pos[col]
- # Correct boundaries in distances
+ # Take into account periodic boundary conditions, correcting the distances
for i, pos_i in enumerate(diff):
- #outbound=False
for j, coord in enumerate(pos_i):
if coord > r_link:
- #outbound=True
diff[i,j] -= 1. # Boxsize normalize to 1
elif -coord > r_link:
- #outbound=True
diff[i,j] += 1. # Boxsize normalize to 1
- #if outbound: numbounds+=1
+ # Get translational and rotational invariant features
+ # Distance
dist = np.linalg.norm(diff, axis=1)
+ # Centroid of galaxy catalogue
centroid = np.mean(pos,axis=0)
+ # Unit vectors of node, neighbor and difference vector
unitrow = (pos[row]-centroid)/np.linalg.norm((pos[row]-centroid), axis=1).reshape(-1,1)
unitcol = (pos[col]-centroid)/np.linalg.norm((pos[col]-centroid), axis=1).reshape(-1,1)
unitdiff = diff/dist.reshape(-1,1)
+ # Dot products between unit vectors
cos1 = np.array([np.dot(unitrow[i,:].T,unitcol[i,:]) for i in range(num_pairs)])
cos2 = np.array([np.dot(unitrow[i,:].T,unitdiff[i,:]) for i in range(num_pairs)])
-
- #print(edge_index.shape, cos1.shape, cos2.shape, dist.shape)
+ # Normalize distance by linking radius
dist /= r_link
- edge_attr = np.concatenate([dist.reshape(-1,1), cos1.reshape(-1,1), cos2.reshape(-1,1)], axis=1)
- #print(pos.shape, edge_index.shape, edge_attr.shape)
+ # Concatenate to get all edge attributes
+ edge_attr = np.concatenate([dist.reshape(-1,1), cos1.reshape(-1,1), cos2.reshape(-1,1)], axis=1)
# Add loops
if use_loops:
@@ -78,90 +90,54 @@ def get_edges(pos, r_link, use_loops):
edge_attr = np.append(edge_attr, atrloops, 0)
edge_index = edge_index.astype(int)
- #print(pos.shape, edge_index.shape, edge_attr.shape)
-
-
-
- #print(edge_index.shape, edge_attr.shape)
-
-
- """
- diff = (pos[row]-pos[col])/r_link
-
- #print(diff.shape, edge_index.shape, pos.shape)
- #numbounds = 0
-
- # Correct boundaries in distances
- for i, pos_i in enumerate(diff):
- #outbound=False
- for j, coord in enumerate(pos_i):
- if coord > 1.:
- #outbound=True
- diff[i,j] -= 1./r_link # Boxsize normalize to 1
- elif -coord > 1.:
- #outbound=True
- diff[i,j] += 1./r_link # Boxsize normalize to 1
- #if outbound: numbounds+=1
-
- edge_attr = np.concatenate([diff, np.linalg.norm(diff, axis=1, keepdims=True)], axis=1)
- #print(edge_attr[:,3].min(), edge_attr[:,3].max())
- #print(diff.shape[0], numbounds)
- """
-
return edge_index, edge_attr
-######################################################################################
-# This routine reads the galaxies from a simulation and
-# root ------> folder containing all simulations with their galaxy catalogues
-# sim -------> 'IllustrisTNG' or 'SIMBA'
-# suite -----> 'LH' or 'CV'
-# number ----> number of the simulation
-# snapnum ---> snapshot number (choose depending of the desired redshift)
-# BoxSize ---> size of the simulation box in Mpc/h
-# Nstar_th -----> galaxies need to contain at least Nstar_th stars
-# k ---------> number of neighbors
-# param_file -> file with the value of the cosmological + astrophysical parameters
-def sim_graph(simnumber,param_file,hparams):
+# Routine to create a cosmic graph from a galaxy catalogue
+# simnumber: number of simulation
+# param_file: file with the value of the cosmological + astrophysical parameters
+# hparams: hyperparameters class
+def sim_graph(simnumber, param_file, hparams):
+
+ # Get some hyperparameters
simsuite,simset,r_link,only_positions,outmode,pred_params = hparams.simsuite,hparams.simset,hparams.r_link,hparams.only_positions,hparams.outmode,hparams.pred_params
- # get the name of the galaxy catalogue
+ # Name of the galaxy catalogue
simpath = simpathroot + simsuite + "/"+simset+"_"
catalogue = simpath + str(simnumber)+"/fof_subhalo_tab_0"+hparams.snap+".hdf5"
- # read the catalogue
+ # Read the catalogue
f = h5py.File(catalogue, 'r')
pos = f['/Subhalo/SubhaloPos'][:]/boxsize
Mstar = f['/Subhalo/SubhaloMassType'][:,4] #Msun/h
- SubhaloVel = f["Subhalo/SubhaloVel"][:]
Rstar = f["Subhalo/SubhaloHalfmassRadType"][:,4]
Metal = f["Subhalo/SubhaloStarMetallicity"][:]
Vmax = f["Subhalo/SubhaloVmax"][:]
Nstar = f['/Subhalo/SubhaloLenType'][:,4] #number of stars
f.close()
- # some simulations are slightly outside the box
+ # Some simulations are slightly outside the box, correct it
pos[np.where(pos<0.0)]+=1.0
pos[np.where(pos>1.0)]-=1.0
- # select only galaxies with more than 10 star particles
+ # Select only galaxies with more than Nstar_th star particles
indexes = np.where(Nstar>Nstar_th)[0]
pos = pos[indexes]
Mstar = Mstar[indexes]
- SubhaloVel = SubhaloVel[indexes]
Rstar = Rstar[indexes]
Metal = Metal[indexes]
Vmax = Vmax[indexes]
# Get the output to be predicted by the GNN, either the cosmo parameters or the power spectrum
if outmode=="cosmo":
- # read the value of the cosmological & astrophysical parameters
+ # Read the value of the cosmological & astrophysical parameters
paramsfile = np.loadtxt(param_file, dtype=str)
params = np.array(paramsfile[simnumber,1:-1],dtype=np.float32)
params = normalize_params(params)
- params = params[:pred_params]
+ params = params[:pred_params] # Consider only the first parameters, up to pred_params
y = np.reshape(params, (1,params.shape[0]))
+ # Read the power spectra
elif outmode=="ps":
ps = np.load(param_file)
@@ -170,42 +146,30 @@ def sim_graph(simnumber,param_file,hparams):
#ps = normalize_ps(ps)
y = np.reshape(ps, (1,ps_size))
-
- """
- # compute the number of pairs
- nodes = pos.shape[0]
- u = np.zeros((1,2), dtype=np.float32)
- u[0,0] = np.log10(np.sum(Mstar))
- u[0,1] = np.log10(nodes)
- """
+ # Number of galaxies as global feature
u = np.log10(pos.shape[0]).reshape(1,1)
Mstar = np.log10(1.+ Mstar)
- #SubhaloVel = np.log10(1.+SubhaloVel)
- SubhaloVel/=100.
Rstar = np.log10(1.+ Rstar)
Metal = np.log10(1.+ Metal)
Vmax = np.log10(1. + Vmax)
+
+ # Node features
tab = np.column_stack((Mstar, Rstar, Metal, Vmax))
- #tab = Vmax.reshape(-1,1)
+ #tab = Vmax.reshape(-1,1) # For using only Vmax
+ x = torch.tensor(tab, dtype=torch.float32)
+ # Use loops if node features are considered only
if only_positions:
- #u = np.zeros((1,2), dtype=np.float32) # not used
- tab = np.zeros_like(pos[:,:1]) # not really used
+ tab = np.zeros_like(pos[:,:1]) # Node features not really used
use_loops = False
else:
- use_loops = True#"""
+ use_loops = True
- #use_loops = False
-
- x = torch.tensor(tab, dtype=torch.float32)
-
- #use_loops = False
+ # Get edges and edge features
edge_index, edge_attr = get_edges(pos, r_link, use_loops)
- #edge_index = get_edges(pos, r_link)
- #edge_index = None
- # get the graph
+ # Construct the graph
graph = Data(x=x,
y=torch.tensor(y, dtype=torch.float32),
u=torch.tensor(u, dtype=torch.float32),
@@ -213,54 +177,8 @@ def sim_graph(simnumber,param_file,hparams):
edge_attr=torch.tensor(edge_attr, dtype=torch.float32))
return graph
-######################################################################################
-"""
-######################################################################################
-# This routine creates the dataset for the considered mode
-# mode -------------> 'train', 'valid', 'test' or 'all'
-# seed -------------> random seed to split simulations among train/valid/test
-# sims -------------> total number of simulations
-# root -------------> folder containing all simulations with their galaxy catalogues
-# sim --------------> 'IllustrisTNG' or 'SIMBA'
-# suite ------------> 'LH' or 'CV'
-# number -----------> number of the simulation
-# snapnum ----------> snapshot number (choose depending of the desired redshift)
-# BoxSize ----------> size of the simulation box in Mpc/h
-# Nstar_th --> galaxies need to contain at least Nstar_th stars
-# k ----------------> number of neighbors
-# param_file -------> file with the value of the cosmo & astro parameters
-# batch_size -------> batch size
-# num_workers ------> number of workers to load the data
-# shuffle ----------> whether randomly shuffle the data in the data loader
-def create_dataset(mode, seed, sims, root, sim, suite, snapnum, BoxSize,
- Nstar_th, k, param_file, batch_size, num_workers=1,
- shuffle=True):
-
-
-
- # get the offset and size of the considered mode
- if mode=='train': offset, size = int(0.0*sims), int(0.8*sims)
- elif mode=='valid': offset, size = int(0.8*sims), int(0.1*sims)
- elif mode=='test': offset, size = int(0.9*sims), int(0.1*sims)
- elif mode=='all': offset, size = int(0.0*sims), int(1.0*sims)
- else: raise Exception('wrong mode!')
-
- # randomly shuffle the simulations. Instead of 0 1 2 3...999 have a
- # random permutation. E.g. 5 9 0 29...342
- np.random.seed(seed)
- numbers = np.arange(sims) #shuffle sims not maps
- np.random.shuffle(numbers)
- numbers = numbers[offset:offset+size] #select indexes of mode
-
- # get the dataset
- dataset = []
- for i in numbers:
- dataset.append(sim_graph(root,sim,suite,i,snapnum,BoxSize,
- Nstar_th,k,param_file))
- return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
- num_workers=num_workers)
-"""
+
# Split training and validation sets
def split_datasets(dataset):
@@ -283,13 +201,9 @@ def split_datasets(dataset):
######################################################################################
# Main routine to load data and create the dataset
-# simsuite: simulation suite, either "IllustrisTNG" or "SIMBA"
-# simset: set of simulations:
-# CV: Use simulations with fiducial cosmological and astrophysical parameters, but different random seeds (27 simulations total)
-# LH: Use simulations over latin-hypercube, varying over cosmological and astrophysical parameters, and different random seeds (1000 simulations total)
-# n_sims: number of simulations, maximum 27 for CV and 1000 for LH
def create_dataset(hparams):
+ # Target file depending on the task: inferring cosmo parameters or predicting power spectrum
if hparams.outmode == "cosmo":
param_file = "/projects/QUIJOTE/CAMELS/Sims/CosmoAstroSeed_params_"+hparams.simsuite+".txt"
elif hparams.outmode == "ps":
@@ -311,8 +225,8 @@ def create_dataset(hparams):
# Add other snapshots from other redshifts
# Snapshot redshift
# 004: z=3, 010: z=2, 014: z=1.5, 018: z=1, 024: z=0.5, 033: z=0
- for snap in [24,18,14,10]:
- #for snap in [18,10]:
+ #for snap in [24,18,14,10]:
+ for snap in [18,10]:
hparams.snap = str(snap)
diff --git a/Source/metalayer.py b/Source/metalayer.py
index d1bb669..fc50497 100644
--- a/Source/metalayer.py
+++ b/Source/metalayer.py
@@ -1,3 +1,9 @@
+#----------------------------------------------------
+# Graph Neural Network architecture
+# Author: Pablo Villanueva Domingo
+# Last update: 4/22
+#----------------------------------------------------
+
import torch
import torch.nn.functional as F
from torch_cluster import knn_graph, radius_graph
@@ -5,11 +11,9 @@
from torch_geometric.nn import MessagePassing, MetaLayer, LayerNorm
from torch_scatter import scatter_mean, scatter_sum, scatter_max, scatter_min, scatter_add
from torch_geometric.nn import global_mean_pool, global_max_pool, global_add_pool
-#import torch.utils.checkpoint
from Source.constants import *
-#use_checkpoints = False
-
+# Model for updating edge attritbutes
class EdgeModel(torch.nn.Module):
def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, residuals=True, norm=False):
super().__init__()
@@ -34,11 +38,11 @@ def forward(self, src, dest, edge_attr, u, batch):
out = torch.cat([src, dest, edge_attr], dim=1)
#out = torch.cat([src, dest, edge_attr, u[batch]], 1)
out = self.edge_mlp(out)
- #out = torch.utils.checkpoint.checkpoint_sequential(self.edge_mlp, 2, out)
if self.residuals:
out = out + edge_attr
return out
+# Model for updating node attritbutes
class NodeModel(torch.nn.Module):
def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, residuals=True, norm=False):
super().__init__()
@@ -59,24 +63,22 @@ def forward(self, x, edge_index, edge_attr, u, batch):
# edge_attr: [E, F_e]
# u: [B, F_u]
# batch: [N] with max entry B - 1.
+
row, col = edge_index
- #out = torch.cat([x[row], edge_attr], dim=1)
- #out = self.node_mlp_1(out)
out = edge_attr
+
+ # Multipooling layer
out1 = scatter_add(out, col, dim=0, dim_size=x.size(0))
out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
out3 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
out = torch.cat([x, out1, out2, out3, u[batch]], dim=1)
- #out = torch.cat([x, out], dim=1)
- #out = self.node_mlp(out)
- #if use_checkpoints:
- # out = torch.utils.checkpoint.checkpoint_sequential(self.node_mlp, 2, out)
- #else:
+
out = self.node_mlp(out)
if self.residuals:
out = out + x
return out
+# First edge model for updating edge attritbutes when no initial node features are provided
class EdgeModelIn(torch.nn.Module):
def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=False):
super().__init__()
@@ -97,6 +99,7 @@ def forward(self, src, dest, edge_attr, u, batch):
return out
+# First node model for updating node attritbutes when no initial node features are provided
class NodeModelIn(torch.nn.Module):
def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=False):
super().__init__()
@@ -113,31 +116,30 @@ def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=Fals
def forward(self, x, edge_index, edge_attr, u, batch):
row, col = edge_index
-
out = edge_attr
- #out = scatter_add(out, col, dim=0, dim_size=x.size(0))
+
+ # Multipooling layer
out1 = scatter_add(out, col, dim=0, dim_size=x.size(0))
out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
out3 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
out = torch.cat([out1, out2, out3, u[batch]], dim=1)
- #out = torch.cat([out, u[batch]], dim=1)
out = self.node_mlp(out)
return out
-# Graph Neural Network architecture, based on the Interaction Network (arXiv:1612.00222, arXiv:2002.09405)
+# Graph Neural Network architecture, based on the Graph Network (arXiv:1806.01261)
+# Employing the MetaLayer implementation in Pytorch-Geometric
class GNN(torch.nn.Module):
def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out, only_positions, residuals=True):
super().__init__()
self.n_layers = n_layers
- #self.loop = False
self.link_r = linkradius
self.dim_out = dim_out
self.only_positions = only_positions
- # Input node features (0 if only_positions is used)
+ # Number of input node features (0 if only_positions is used)
node_in = node_features
# Input edge features: |p_i-p_j|, p_i*p_j, p_i*(p_i-p_j)
edge_in = 3
@@ -147,7 +149,7 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
layers = []
- # Encoder layer
+ # Encoder graph block
# If use only positions, node features are created from the aggregation of edge attritbutes of neighbors
if self.only_positions:
inlayer = MetaLayer(node_model=NodeModelIn(node_in, node_out, edge_in, edge_out, hid_channels),
@@ -163,7 +165,7 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
node_in = node_out
edge_in = edge_out
- # Hidden graph layers
+ # Hidden graph blocks
for i in range(n_layers-1):
lay = MetaLayer(node_model=NodeModel(node_in, node_out, edge_in, edge_out, hid_channels, residuals=residuals),
@@ -172,6 +174,7 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
self.layers = ModuleList(layers)
+ # Final aggregation layer
self.outlayer = Sequential(Linear(3*node_out+1, hid_channels),
ReLU(),
Linear(hid_channels, hid_channels),
@@ -180,41 +183,15 @@ def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out
ReLU(),
Linear(hid_channels, self.dim_out))
-
- def get_graph(self, data):
-
- """pos = data.x[:,:3]
-
- # Get edges
- edge_index = radius_graph(pos, r=self.link_r, batch=data.batch, loop=self.loop)
- #edge_index = data.edge_index
-
- row, col = edge_index
-
- # Edge features
- diff = (pos[row]-pos[col])/self.link_r
- edge_attr = torch.cat([diff, torch.norm(diff, dim=1, keepdim=True)], dim=1)"""
-
- edge_index, edge_attr = data.edge_index, data.edge_attr
-
- # Node features
- if self.only_positions:
- h = torch.zeros_like(data.x[:,0])
- else:
- h = data.x
-
- return h, edge_index, edge_attr
-
-
def forward(self, data):
- #def forward(self, x, batch):
- #h, edge_index, edge_attr = self.get_graph(data)
h, edge_index, edge_attr, u = data.x, data.edge_index, data.edge_attr, data.u
+ # Message passing layers
for layer in self.layers:
h, edge_attr, _ = layer(h, edge_index, edge_attr, u, data.batch)
+ # Multipooling layer
addpool = global_add_pool(h, data.batch)
meanpool = global_mean_pool(h, data.batch)
maxpool = global_max_pool(h, data.batch)
diff --git a/Source/networks.py b/Source/networks.py
deleted file mode 100644
index 14a6084..0000000
--- a/Source/networks.py
+++ /dev/null
@@ -1,208 +0,0 @@
-#----------------------------------------------------------------------
-# Definition of the neural network architectures
-# Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
-#----------------------------------------------------------------------
-
-import torch
-from torch.nn import Sequential, Linear, ReLU, ModuleList
-from torch_geometric.nn import MessagePassing, GCNConv, PPFConv, MetaLayer, EdgeConv
-import torch.nn.functional as F
-from torch_geometric.nn import global_mean_pool, global_max_pool, global_add_pool
-from torch_cluster import knn_graph, radius_graph
-from torch_scatter import scatter_mean, scatter_sum, scatter_add, scatter_max, scatter_min
-import numpy as np
-from Source.constants import *
-
-
-#------------------------------
-# Architectures considered:
-# DeepSet
-# Metalayer (graph network)
-#
-#-----------------------------
-
-#--------------------------------------------
-# Message passing architecture
-# See pytorch-geometric documentation for more info
-# pytorch-geometric.readthedocs.io/
-#--------------------------------------------
-
-
-# Node model for the MetaLayer
-class NodeModel(torch.nn.Module):
- def __init__(self, in_channels, hidden_channels, latent_channels, link_r):
- super(NodeModel, self).__init__()
- #self.node_mlp_1 = Sequential(Linear(in_channels,hidden_channels), LeakyReLU(0.2), Linear(hidden_channels,hidden_channels),LeakyReLU(0.2), Linear(mid_channels,out_channels))
- #self.node_mlp_2 = Sequential(Linear(303,500), LeakyReLU(0.2), Linear(500,500),LeakyReLU(0.2), Linear(500,1))
-
- if only_positions:
- inchan = in_channels + 3
- secchan = 3*latent_channels+in_channels
- else:
- inchan = (in_channels-3)*2+3
- secchan = 3*latent_channels+2+in_channels-3
-
-
- self.mlp1 = Sequential(Linear(inchan, hidden_channels),
- ReLU(),
- Linear(hidden_channels, hidden_channels),
- ReLU(),
- Linear(hidden_channels, latent_channels))
-
- self.mlp2 = Sequential(Linear(secchan, hidden_channels),
- ReLU(),
- Linear(hidden_channels, hidden_channels),
- ReLU(),
- Linear(hidden_channels, latent_channels))
-
- self.link_r = link_r
-
- def forward(self, x, edge_index, edge_attr, u, batch):
- # x: [N, F_x], where N is the number of nodes.
- # edge_index: [2, E] with max entry N - 1.
- # edge_attr: [E, F_e]
- # u: [B, F_u]
- # batch: [N] with max entry B - 1.
- row, col = edge_index
- #x_node, x_neigh = x[row], x[col]
-
- #edge_attr = torch.sqrt(torch.sum((x[row,:3]-x[col,:3])**2.,axis=1))
- #edge_attr = edge_attr.view(-1,1)
- edge_attr = x[row,:3]-x[col,:3]
-
- # correct distance for boundaries
- for coord in range(3):
- edge_attr[edge_attr[:,coord]>self.link_r,coord]-=1.
- edge_attr[-edge_attr[:,coord]>self.link_r,coord]+=1.
-
- # define interaction tensor; every pair contains features from input and
- # output node together with
- if only_positions:
- out = torch.cat([x[row], edge_attr], dim=1)
- else:
- out = torch.cat([x[row,3:], x[col,3:], edge_attr], dim=1)
-
- # take interaction feature tensor and embedd it into another tensor
- #out = self.node_mlp_1(out)
- out = self.mlp1(out)
-
- # compute the mean,sum and max of each embed feature tensor for each node
- out1 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
- out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
- out3 = scatter_add(out, col, dim=0, dim_size=x.size(0))
- #out3 = scatter_min(out, col, dim=0, dim_size=x.size(0))[0]
-
- # every node contains a feature tensor with the pooling of the messages from
- # neighbors, its own state, and a global feature
- if only_positions:
- out = torch.cat([x, out1, out2, out3], dim=1)
- else:
- out = torch.cat([x[:,3:], out1, out2, out3, u[batch]], dim=1)
- #print("node post", out.shape)
-
- out = self.mlp2(out)
-
- # In this way, it is traslational equivariant
- out = torch.cat([x[:,:3], out], dim=1)
-
- return out
-
-
-
-#--------------------------------------------
-# General Graph Neural Network architecture
-#--------------------------------------------
-class ModelGNN(torch.nn.Module):
- def __init__(self, use_model, node_features, n_layers, link_r, hidden_channels=300, latent_channels=100, loop=True):
- super(ModelGNN, self).__init__()
-
- # Graph layers
- layers = []
- in_channels = node_features
- for i in range(n_layers):
-
- # Choose the model
- if use_model=="DeepSet":
- lay = Sequential(
- Linear(in_channels, hidden_channels),
- ReLU(),
- Linear(hidden_channels, hidden_channels),
- ReLU(),
- Linear(hidden_channels, latent_channels))
-
-
- elif use_model=="MetaNet":
- lay = MetaLayer(node_model=NodeModel(in_channels, hidden_channels, latent_channels, link_r))
-
- else:
- print("Model not known...")
-
- layers.append(lay)
- in_channels = latent_channels+3
-
-
- self.layers = ModuleList(layers)
-
-
- if only_positions:
- lin_in = (in_channels-3)*3
- else:
- lin_in = (in_channels-3)*3+2
-
- self.lin = Sequential(Linear(lin_in, 2*latent_channels),
- ReLU(),
- Linear(2*latent_channels, latent_channels),
- ReLU(),
- Linear(latent_channels, latent_channels),
- ReLU(),
- Linear(latent_channels, 4))
-
- self.link_r = link_r
- self.pooled = 0.
- self.loop = loop
- self.namemodel = use_model
-
- def forward(self, data):
-
- x, pos, batch, u = data.x, data.x[:,:3], data.batch, data.u
-
- # Get edges using positions by computing the kNNs or the neighbors within a radius
- #edge_index = knn_graph(pos, k=self.link_r, batch=batch, loop=self.loop)
- #edge_index = radius_graph(pos, r=self.link_r, batch=batch, loop=self.loop)
- edge_index = data.edge_index
-
- #if self.namemodel=="MetaNet":
- # edge_attr = get_edge_attr(edge_index,pos)
-
- # Start message passing
- for layer in self.layers:
- if self.namemodel=="DeepSet":
- x = layer(x)
- elif self.namemodel=="MetaNet":
- x, dumb, u = layer(x, edge_index, None, u, batch)
- x = x.relu()
-
-
- # Mix different global pooling layers
- addpool = global_add_pool(x[:,3:], batch) # [num_examples, hidden_channels]
- meanpool = global_mean_pool(x[:,3:], batch)
- maxpool = global_max_pool(x[:,3:], batch)
-
- if only_positions:
- self.pooled = torch.cat([addpool, meanpool, maxpool], dim=1)
- else:
- self.pooled = torch.cat([addpool, meanpool, maxpool, u], dim=1)
-
- # Final linear layer
- out = self.lin(self.pooled)
-
- return out
-
-def get_edge_attr(edge_index,pos):
-
- edge_attr = torch.zeros((edge_index.shape[1],1), dtype=torch.float32, device=device)
- for i, nodein in enumerate(edge_index[0,:]):
- nodeout = edge_index[1,i]
- edge_attr[i,0]=torch.sqrt(torch.sum((pos[nodein]-pos[nodeout])**2.,axis=0))
- return edge_attr
diff --git a/Source/plotting.py b/Source/plotting.py
index ec1221b..d6d75c3 100644
--- a/Source/plotting.py
+++ b/Source/plotting.py
@@ -1,7 +1,7 @@
#----------------------------------------------------------------------
# Script for plotting some statistics
# Author: Pablo Villanueva Domingo
-# Last update: 10/11/21
+# Last update: 4/22
#----------------------------------------------------------------------
import matplotlib.pyplot as plt
@@ -25,6 +25,7 @@ def plot_losses(train_losses, valid_losses, test_loss, err_min, hparams):
plt.savefig("Plots/loss_"+hparams.name_model()+".png", bbox_inches='tight', dpi=300)
plt.close()
+# Remove normalization of cosmo parameters
def denormalize(trues, outputs, errors, minpar, maxpar):
trues = minpar + trues*(maxpar - minpar)
@@ -32,13 +33,12 @@ def denormalize(trues, outputs, errors, minpar, maxpar):
errors = errors*(maxpar - minpar)
return trues, outputs, errors
-# Scatter plot of true vs predicted properties
+# Scatter plot of true vs predicted cosmological parameter
def plot_out_true_scatter(hparams, cosmoparam, testsuite = False):
- figscat, axscat = plt.subplots()
+ figscat, axscat = plt.subplots(figsize=(6,5))
suite, simset = hparams.simsuite, hparams.simset
col = colorsuite(suite)
- #if testsuite: col = colorsuite(hparams.flip_suite())
# Load true values and predicted means and standard deviations
outputs = np.load("Outputs/outputs_"+hparams.name_model()+".npy")
@@ -113,163 +113,105 @@ def plot_out_true_scatter(hparams, cosmoparam, testsuite = False):
axscat.set_xlim([truemin, truemax])
axscat.set_ylim([truemin, truemax])
if testsuite:
- axscat.set_ylim([truemin, truemax+0.3])
+ axscat.set_ylim([truemin, truemax+0.1])
axscat.set_ylabel(r"Prediction")
axscat.set_xlabel(r"Truth")
axscat.grid()
# Title, indicating which are the training and testing suites
if hparams.only_positions:
- endtitle = ", only positions"
+ usefeatures = "Only positions"
else:
- endtitle = ", galactic features"
+ usefeatures = r"Positions + $V_{\rm max}, M_*, R_*, Z_*$"
+ props = dict(boxstyle='round', facecolor='white')#, alpha=0.5)
+ axscat.text((minpar + maxpar)/2-0.02, minpar, usefeatures, color="k", bbox=props )
+
+
if testsuite:
- #titlefig = "Training in "+hparams.flip_suite()+" "+simset+", testing in "+suite+" "+simset
- titlefig = "Cross test in "+suite+endtitle
+ titlefig = "Training in "+hparams.flip_suite()+", testing in "+suite
+ #titlefig = "Cross test in "+suite+usefeatures
namefig = "out_true_testsuite_"+cosmoparam+"_"+hparams.name_model()
else:
- #titlefig = "Training in "+suite+" "+simset+", testing in "+suite+" "+simset
- titlefig = "Train in "+suite+endtitle
+ titlefig = "Training in "+suite+", testing in "+suite
+ #titlefig = "Train in "+suite+usefeatures
namefig = "out_true_"+cosmoparam+"_"+hparams.name_model()
axscat.set_title(titlefig)
figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
plt.close(figscat)
-# Scatter plot of true vs predicted properties
-def plot_ps(hparams, testsuite = False):
+# Plot predicted and target power spectra
+def plot_ps(hparams):
+
+ figscat = plt.figure(figsize=(6,6))
+ gs = gridspec.GridSpec(2,1,height_ratios=[6,2])
+ gs.update(hspace=0.0)#,wspace=0.4,bottom=0.6,top=1.05)
+ axscat=plt.subplot(gs[0])
+ axerr=plt.subplot(gs[1])
- figscat, axscat = plt.subplots(figsize=(8,8))
suite, simset = hparams.simsuite, hparams.simset
col = colorsuite(suite)
- if testsuite: col = colorsuite(hparams.flip_suite())
# Load true values and predicted means and standard deviations
outputs = np.load("Outputs/outputsPS_"+hparams.name_model()+".npy")
trues = np.load("Outputs/truesPS_"+hparams.name_model()+".npy")
- #errors = np.load("Outputs/errors"+cosmoparam+"_"+hparams.name_model()+".npy")
# There is a (0,0) initial point, fix it
outputs = outputs[1:]
trues = trues[1:]
- #errors = errors[1:]
trues, outputs = 10.**trues, 10.**outputs
- """if cosmoparam=="Om":
- minpar, maxpar = 0.1, 0.5
- elif cosmoparam=="Sig":
- minpar, maxpar = 0.6, 1.0
- trues, outputs, errors = denormalize(trues, outputs, errors, minpar, maxpar)"""
-
- # Compute the number of points lying within 1 or 2 sigma regions from their uncertainties
- """cond_success_1sig, cond_success_2sig = np.abs(outputs-trues)<=np.abs(errors), np.abs(outputs-trues)<=2.*np.abs(errors)
- tot_points = outputs.shape[0]
- successes1sig, successes2sig = outputs[cond_success_1sig].shape[0], outputs[cond_success_2sig].shape[0]"""
-
# Compute the linear correlation coefficient
r2 = r2_score(trues,outputs)
err_rel = np.mean(np.abs((trues - outputs)/(trues)))
- #err_rel = np.mean(np.abs((trues - outputs)/(trues)), axis=0)
- #chi2s = (outputs-trues)**2./errors**2.
- #chi2 = chi2s[np.abs(errors)>0.01].mean() # Remove some outliers which make explode the chi2
- #chi2 = chi2s[chi2s<1.e4].mean() # Remove some outliers which make explode the chi2
print("R^2={:.2f}, Relative error={:.2e}".format(r2, err_rel))
- #print("A fraction of succeses of", successes1sig/tot_points, "at 1 sigma,", successes2sig/tot_points, "at 2 sigmas")
-
- # Sort by true value
- #indsort = trues.argsort()
- #print(indsort.shape)
- #outputs, trues = outputs[indsort,:], trues[indsort,:]
- # Compute mean and std region within several bins
- """truebins, binsize = np.linspace(trues[0], trues[-1], num=10, retstep=True)
- means, stds = [], []
- for i, bin in enumerate(truebins[:-1]):
- cond = (trues>=bin) & (trues=0)&(xx[:,0]<=1)&(xx[:,1]>=0)&(xx[:,1]<=1)&(xx[:,2]>=0)&(xx[:,2]<=1))
- # retain points inside simulation window
xx = xx[booleInside]
return xx
-# Soneira-Peebles point process
+# Soneira-Peebles point process (Soneira & Peebles 1977, 1978)
def soneira_peebles_model(lamb, eta, n_levels, R0):
+ # Radius for first level
Rparent = R0
# Generate parents
@@ -76,16 +69,14 @@ def soneira_peebles_model(lamb, eta, n_levels, R0):
num_parents = eta
xparents = poisson_process(num_parents)
- #plt.scatter(xparents[:,0], xparents[:,1],s=2.,color="r",alpha=0.5)
-
xtot = []
xtot.extend(xparents)
+ # Iterate over each level
for n in range(2,n_levels+1):
Rparent = Rparent/lamb
pointsx = []
- #for i, j, in zip(xparents, yparents):
for ipar in range(len(xparents)):
num_points = np.random.poisson(eta)
@@ -98,28 +89,31 @@ def soneira_peebles_model(lamb, eta, n_levels, R0):
xx = np.array(xtot)
- # retain points inside simulation window
+ # Retain only those points inside simulation window
booleInside=((xx[:,0]>=0)&(xx[:,0]<=1)&(xx[:,1]>=0)&(xx[:,1]<=1)&(xx[:,2]>=0)&(xx[:,2]<=1))
xx = xx[booleInside]
return xx
+#--- OTHER ROUTINES ---#
+
+# Routine to compute the power spectrum using Pylians
def compute_ps(pos):
pos = pos.cpu().detach().numpy()
pos = pos*BoxLen
- # construct galaxy 3D density field
+ # Construct galaxy 3D density field
delta = np.zeros((grid,grid,grid), dtype=np.float32)
MASL.MA(pos, delta, BoxLen, MAS, verbose=False)
delta /= np.mean(delta, dtype=np.float64)
delta -= 1.0
- # compute Pk
+ # Compute the power spectrum
Pk = PKL.Pk(delta, BoxLen, axis, MAS, threads, verbose=False)
k = Pk.k3D
- Pk0 = Pk.Pk[:,0] #monopole
+ Pk0 = Pk.Pk[:,0] # Monopole
indexes = np.where(k