Skip to content

Commit

Permalink
add support for general graph datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
lrjconan committed Apr 8, 2019
1 parent 540c97f commit 97a3aca
Show file tree
Hide file tree
Showing 10 changed files with 1,023 additions and 4 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This is the PyTorch implementation of [Lanczos Network](https://arxiv.org/abs/1901.01484) as described in the following ICLR 2019 paper:

```
@inproceedings{liao2018lanczos,
@inproceedings{liao2019lanczos,
title={LanczosNet: Multi-Scale Deep Graph Convolutional Networks},
author={Liao, Renjie and Zhao, Zhizhen and Urtasun, Raquel and Zemel, Richard},
booktitle={ICLR},
Expand Down Expand Up @@ -84,6 +84,27 @@ Python 3, PyTorch(1.0), numpy, scipy, sklearn
```python run_exp.py -c config/X.yaml -t```


## Run on General Graph Datasets

I provide an example code for a synthetic graph regression problem, i.e., given multiple graphs, each of which is accompanied with node embeddings, it is required to predict a real-valued graph embedding vector per graph.

* To generate the synthetic dataset:

```cd dataset```

```PYTHONPATH=../ python get_graph_data.py```

* To run the training:

```python run_exp.py -c config/graph_lanczos_net.yaml```


**Note**:
* Please read ```dataset/get_graph_data.py``` for more information on how to adapt it to your own graph datasets.
* I only add support to LanczosNet by slightly modifying the learnable node embedding to the input node embedding in ```model/lanczos_net_general.py```. It should be straightforward for you to add support to other models if you are interested.



## Cite
Please cite our paper if you use this code in your research work.

Expand Down
46 changes: 46 additions & 0 deletions config/graph_lanczos_net.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
exp_name: graph_lanczos_net
exp_dir: exp/graph_lanczos_net
runner: GraphRunner
use_gpu: true
gpus: [0]
seed: 1234
dataset:
loader_name: GraphData
name: synthetic
data_path: data/synthetic
node_emb_dim: 10
graph_emb_dim: 2
num_edge_type: 1
model:
name: LanczosNetGeneral
short_diffusion_dist: []
long_diffusion_dist: [1, 2, 3, 5, 7, 10, 20, 30]
num_eig_vec: 20
spectral_filter_kind: MLP
input_dim: 10
hidden_dim: [128, 128, 128, 128, 128, 128, 128]
output_dim: 2
num_layer: 7
loss: MSE
output_func: MLP
train:
optimizer: Adam
lr_decay: 0.1
lr_decay_steps: [10000]
num_workers: 0
max_epoch: 200
batch_size: 10
display_iter: 10
snapshot_epoch: 10000
valid_epoch: 100
lr: 1.0e-4
wd: 0.0e-4
momentum: 0.9
shuffle: true
is_resume: false
resume_model: None
test:
batch_size: 64
num_workers: 0
test_model:
1 change: 1 addition & 0 deletions dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from dataset.qm8 import *
from dataset.graph_data import *
102 changes: 102 additions & 0 deletions dataset/get_graph_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import os
import glob
import pickle
import numpy as np
import networkx as nx
from utils.data_helper import get_multi_graph_laplacian_eigs, get_graph_laplacian_eigs, get_laplacian

save_dir = '../data/synthetic/'

if not os.path.exists(save_dir):
os.mkdir(save_dir)
print('made directory {}'.format(save_dir))


def gen_data(min_num_nodes=20,
max_num_nodes=100,
num_graphs=10,
node_emb_dim=10,
graph_emb_dim=2,
edge_prob=0.5,
seed=123):
"""
Generate synthetic graph data for graph regression, i.e., given node
embedding and graph structure as input, predict a graph embedding
as output.
N.B.: modification to other tasks like node classification is straightforward
A single graph in your dataset should contin:
X: Node embedding, numpy array, shape N X D where N is # nodes
A: Graph structure, numpy array, shape N X N X E where E is # edge types
Y: Graph embedding, numpy array, shape N X O
"""
npr = np.random.RandomState(seed)
N = npr.randint(min_num_nodes, high=max_num_nodes+1, size=num_graphs)

data = []
for ii in range(num_graphs):
data_dict = {}
data_dict['X'] = npr.randn(N[ii], node_emb_dim)
# we assume # edge type = 1, but you can easily extend it to be more than 1
data_dict['A'] = np.expand_dims(
nx.to_numpy_matrix(
nx.fast_gnp_random_graph(N[ii], edge_prob, seed=npr.randint(1000))),
axis=2)
data_dict['Y'] = npr.randn(1, graph_emb_dim)
data += [data_dict]

return data


def dump_data(data_list, tag='train'):
count = 0
print('Dump {} data!'.format(tag))
for data in data_list:

data_dict = {}
data_dict['node_feat'] = data['X']
adjs = data['A']
adj_simple = np.sum(adjs, axis=2)
D_list, V_list, L_list = get_multi_graph_laplacian_eigs(
adjs, graph_laplacian_type='L4', use_eigen_decomp=True, is_sym=True)
D, V, L4 = get_graph_laplacian_eigs(
adj_simple,
graph_laplacian_type='L4',
use_eigen_decomp=True,
is_sym=True)

L6 = get_laplacian(adj_simple, graph_laplacian_type='L6')
L7 = get_laplacian(adj_simple, graph_laplacian_type='L7')

data_dict['L_multi'] = np.stack(L_list, axis=2)
data_dict['L_simple_4'] = L4
data_dict['L_simple_6'] = L6
data_dict['L_simple_7'] = L7

# N.B.: for some edge type, adjacency matrix may be diagonal
data_dict['D_simple'] = D if D is not None else np.ones(adjs.shape[0])
data_dict['V_simple'] = V if V is not None else np.eye(adjs.shape[0])
data_dict['D_multi'] = D_list
data_dict['V_multi'] = V_list
data_dict['label'] = data['Y']

pickle.dump(
data_dict,
open(
os.path.join(save_dir, 'synthetic_{}_{:07d}.p'.format(tag, count)),
'wb'))

count += 1

print('100.0 %%')


if __name__ == '__main__':
train_dataset = gen_data(seed=123)
dev_dataset = gen_data(seed=456)
test_dataset = gen_data(seed=789)

dump_data(train_dataset, 'train')
dump_data(dev_dataset, 'dev')
dump_data(test_dataset, 'test')
Loading

0 comments on commit 97a3aca

Please sign in to comment.