Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dataset input bug & Logging graph indexing of patterns #18

Open
wants to merge 46 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6b9b804
Add reporting graph idx
martinakaduc Jun 1, 2021
1b7c725
Fix dataset bugs
martinakaduc Jun 1, 2021
33e1056
Turn off debugging & print out graph idx
martinakaduc Jun 1, 2021
43e3082
Update data
martinakaduc Jul 3, 2021
acf5af1
Fix code
martinakaduc Jul 3, 2021
e55e9b4
add synthetic 4
martinakaduc Jul 4, 2021
461bd73
Update dataset
martinakaduc Jul 4, 2021
e07a68b
Update dataset
martinakaduc Jul 11, 2021
8a4182b
Update data
martinakaduc Aug 7, 2021
eb2b58e
Add script
martinakaduc Aug 7, 2021
e0ec36b
Add data
martinakaduc Aug 7, 2021
36b1b6a
Update data
martinakaduc Aug 8, 2021
ecb6955
Update data
martinakaduc Aug 18, 2021
ceb2204
Add data
martinakaduc Jun 27, 2022
7508a6b
Merge branch 'master' of https://github.com/nguyenquangduc2000/neural…
martinakaduc Jun 27, 2022
a375978
fix bug
martinakaduc Jun 27, 2022
d78da1b
Fix bug
martinakaduc Jun 27, 2022
24bc982
Fix bug
martinakaduc Jun 27, 2022
6dc56fd
Add test time
martinakaduc Jun 28, 2022
d7c632e
Fix ds name
martinakaduc Jun 28, 2022
15c1098
Fix data
martinakaduc Jun 28, 2022
e6d5d73
Fix test
martinakaduc Jun 30, 2022
b9a7904
fix time
martinakaduc Jun 30, 2022
bee7049
a bug
martinakaduc Jun 30, 2022
cf3a45d
FIx load model
martinakaduc Jul 1, 2022
6f3d350
Update requirements
martinakaduc Dec 17, 2022
6fedf42
Update scripts
martinakaduc May 21, 2023
0c11b4a
Merge branch 'master' of https://github.com/nguyenquangduc2000/neural…
martinakaduc May 21, 2023
9511a18
Update pretrained models
martinakaduc May 21, 2023
3a549b5
Remove unneccessary logging
martinakaduc May 21, 2023
94d9b0f
Update data.py
martinakaduc Jun 22, 2024
6f712fb
Update data.py
martinakaduc Jun 22, 2024
0fae283
Fix bug
martinakaduc Jun 23, 2024
7309785
Merge branch 'master' of https://github.com/martinakaduc/neural-subgr…
martinakaduc Jun 23, 2024
26054e8
Update code
martinakaduc Jun 23, 2024
f247e38
Fix bug
martinakaduc Jun 23, 2024
bcecc7d
Fix bug
martinakaduc Jun 23, 2024
982b632
Comment unnecessary things
martinakaduc Jun 23, 2024
2996f56
Test bug
martinakaduc Jun 23, 2024
cb94b22
Fix bugs
martinakaduc Jun 23, 2024
006010d
Add KKI schekpoint
martinakaduc Jun 23, 2024
98a690f
Updating for best model saving & early stoppping
martinakaduc Jun 24, 2024
6637a0a
Add setting seeds
martinakaduc Jun 24, 2024
20f5741
Update ckpts
martinakaduc Jun 25, 2024
f0334a5
Update results
martinakaduc Jun 25, 2024
bfe2370
Update models and results
martinakaduc Jun 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ log-*/
*.p
*.png
plots/
data/
runs/
results/
*.sw?
Expand Down
30 changes: 30 additions & 0 deletions analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import sys

count_pattern_by_size = {}
pattern_list = {}
pid = 0

with open(sys.argv[1], 'r', encoding='utf-8') as file:
pattern_size = 0

for line in file:
if "Saving plots" in line:
plot_name = line.split("/")[-1]
pattern_size = int(plot_name.split("-")[0])

if pattern_size not in count_pattern_by_size:
count_pattern_by_size[pattern_size] = 1
else:
count_pattern_by_size[pattern_size] += 1

if "{" in line and "}" in line:
pattern_list[pid] = (pattern_size, [int(x) for x in line[1:-2].split(", ")])
pid += 1

print("Count pattern")
for k, v in count_pattern_by_size.items():
print(k, v)

print("Pattern:")
for k, v in pattern_list.items():
print(v[0])
Binary file added ckpt/COX2.pt
Binary file not shown.
Binary file added ckpt/COX2_MD.pt
Binary file not shown.
Binary file added ckpt/DBLP-v1.pt
Binary file not shown.
Binary file added ckpt/DHFR.pt
Binary file not shown.
Binary file added ckpt/KKI.pt
Binary file not shown.
Binary file added ckpt/MSRC-21.pt
Binary file not shown.
Binary file added ckpt/large_100_4_20.pt
Binary file not shown.
Binary file added ckpt/large_40_4_20.pt
Binary file not shown.
Binary file added ckpt/large_60_4_20.pt
Binary file not shown.
Binary file added ckpt/large_80_4_20.pt
Binary file not shown.
73 changes: 73 additions & 0 deletions common/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def load_dataset(name):
dataset = TUDataset(root="/tmp/FIRSTMM_DB", name="FIRSTMM_DB")
elif name == "dblp":
dataset = TUDataset(root="/tmp/DBLP_v1", name="DBLP_v1")
elif name == "msrc":
dataset = TUDataset(root="/tmp/MSRC-21", name="MSRC-21")
elif name == "ppi":
dataset = PPI(root="/tmp/PPI")
elif name == "qm9":
Expand Down Expand Up @@ -426,6 +428,77 @@ def add_anchor(g):
self.batch_idx += 1
return pos_a, pos_b, neg_a, neg_b

class PreloadedDataSource(DataSource):
def __init__(self, dataset_name, tag=None):
self.dataset = dataset_name
self.train_keys = []
self.test_keys = []

with open (os.path.join(self.dataset, "train_keys.pkl"), 'rb') as fp:
self.train_keys = pickle.load(fp)
random.shuffle(self.train_keys)
if tag is None:
test_key_file = "test_keys.pkl"
else:
test_key_file = "_".join(["test_keys", tag]) + ".pkl"
with open (os.path.join(self.dataset, test_key_file), 'rb') as fp:
self.test_keys = pickle.load(fp)

self.train_size = len(self.train_keys)
self.test_size = len(self.test_keys)

def gen_data_loaders(self, _, batch_size, train=True, use_distributed_sampling=False):
if train:
size = self.train_size
else:
size = self.test_size
loop_time = size // batch_size
if size % batch_size != 0:
loop_time += 1

loaders = [[batch_size]*loop_time, [], []]
for i in range(loop_time):
if i == 0:
loaders[1].append(0)
loaders[2].append(min(size, batch_size))
else:
loaders[1].append(loaders[2][-1])
loaders[2].append(min(size, loaders[1][-1] + batch_size))

return loaders

def gen_batch(self, batch_size, start, end, train):
pos_a, pos_b, neg_a, neg_b = [], [], [], []
keys = []
if train:
keys = self.train_keys
else:
keys = self.test_keys

for key in keys[start:end]:
with open(os.path.join(self.dataset, key), 'rb') as f:
data = pickle.load(f)
if len(data) == 3:
m1, m2, _ = data
else:
m1, m2 = data

if "iso" in key:
pos_a.append(m2)
pos_b.append(m1)
else:
neg_a.append(m2)
neg_b.append(m1)

if pos_a:
pos_a = utils.batch_nx_graphs(pos_a)
pos_b = utils.batch_nx_graphs(pos_b)
if neg_a:
neg_a = utils.batch_nx_graphs(neg_a)
neg_b = utils.batch_nx_graphs(neg_b)

return pos_a, pos_b, neg_a, neg_b

if __name__ == "__main__":
import matplotlib.pyplot as plt
plt.rcParams.update({"font.size": 14})
Expand Down
3 changes: 3 additions & 0 deletions common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ def forward(self, x, edge_index, edge_weight=None, size=None,
"""
#edge_index, edge_weight = add_remaining_self_loops(
# edge_index, edge_weight, 1, x.size(self.node_dim))
if edge_weight is None:
edge_weight = torch.ones((edge_index.size(1),), dtype=edge_index.dtype,
device=edge_index.device)
edge_index, _ = pyg_utils.remove_self_loops(edge_index)

return self.propagate(edge_index, size=size, x=x,
Expand Down
13 changes: 11 additions & 2 deletions common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@

from common import feature_preprocess

def set_seed(seed):
random.seed(seed)
# torch.backends.cudnn.deterministic=True
# torch.backends.cudnn.benchmark = False
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.cuda.manual_seed_all(seed)

def sample_neigh(graphs, size):
ps = np.array([len(g) for g in graphs], dtype=np.float)
ps /= np.sum(ps)
Expand Down Expand Up @@ -58,7 +67,7 @@ def wl_hash(g, dim=64, node_anchored=False):
vecs[v] = 1
break
for i in range(len(g)):
newvecs = np.zeros((len(g), dim), dtype=np.int)
newvecs = np.zeros((len(g), dim), dtype=np.int64)
for n in g.nodes:
newvecs[n] = vec_hash(np.sum(vecs[list(g.neighbors(n)) + [n]],
axis=0))
Expand Down Expand Up @@ -226,7 +235,7 @@ def batch_nx_graphs(graphs, anchors=None):
#loader = DataLoader(motifs_batch, batch_size=len(motifs_batch))
#for b in loader: batch = b
augmenter = feature_preprocess.FeatureAugment()

if anchors is not None:
for anchor, g in zip(anchors, graphs):
for v in g.nodes:
Expand Down
Loading