-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathk-HCSE.py
195 lines (184 loc) · 7.98 KB
/
k-HCSE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import math
import heapq
#import numba as nb
import numpy as np
import copy
from collections import deque
import stretch
import compress
import find_k_sparest
def get_id():
i = 0
while True:
yield i
i += 1
def graph_parse(adj_matrix):
num_of_nodes = adj_matrix.shape[0]
adj_table = {}
vol,node_vol = 0,[]
for v in range(num_of_nodes):
d_v = 0
adj = set()
for j in range(num_of_nodes):
if adj_matrix[v][j] != 0:
d_v += adj_matrix[v][j]
vol += adj_matrix[v][j]
adj.add(j)
adj_table[v] = adj
node_vol.append(d_v)
return num_of_nodes,vol,node_vol,adj_table
#@nb.jit(nopython=True)
def cut_volume(adj_matrix,S,T):
if len(adj_matrix) == 0 or len(S) == 0 or len(T) == 0:
return 0
cut_vol = 0
for u in S:
for v in T:
if adj_matrix[u][v] != 0:
cut_vol += adj_matrix[u][v]
return cut_vol
def get_entropy(tree_node,root,nodes,vol_all):
'''
:param tree_node: 树节点字典
:param root: 当前u-triangle的根节点
:param nodes: 当前u-triangle的结点
:return: 树结构熵
'''
entropy = 0.0
for node in nodes:
if node == root:
continue
u = tree_node[node]
parent = tree_node[u.parent]
if u.vol!=0:
entropy += (u.g/vol_all)*math.log2(parent.vol/u.vol)
return entropy
def cost_se(tree_node,adj_matrix,root):
se = 0.0
for i in range(len(adj_matrix)-1):
for j in range(i+1,len(adj_matrix)):
if adj_matrix[i][j]!=0:
parent = find_lca(tree_node,i,j,root)
se += adj_matrix[i][j]*math.log2(tree_node[parent].vol)
return se
def cost_das(tree_node,adj_matrix,root):
das = 0
for i in range(len(adj_matrix)-1):
for j in range(i+1,len(adj_matrix)):
if adj_matrix[i][j]!=0:
parent = find_lca(tree_node,i,j,root)
das += adj_matrix[i][j]*len(tree_node[parent].vertices)
return das
class PartitionTreeNode():
def __init__(self, ID, vertices, vol, g, children:set = None,parent = None,child_h = 0, child_cut = 0):
self.ID = ID
self.vertices = vertices
self.parent = parent
self.children = children
self.vol = vol
self.g = g
self.merged = False
self.child_h = child_h #不包括该节点的子树高度
self.child_cut = child_cut
def __str__(self):
return "{" + "{}:{}".format(self.__class__.__name__, self.gatherAttrs()) + "}"
def gatherAttrs(self):
return ",".join("{}={}"
.format(k, getattr(self, k))
for k in self.__dict__.keys())
class PartitionTree():
def __init__(self,adj_matrix,height=5,log_file=None):
self.adj_matrix = adj_matrix
self.tree_node = {}
self.g_num_nodes, self.VOL, self.node_vol, self.adj_table = graph_parse(adj_matrix)
self.id_g = get_id()
self.leaves = []
self.build_leaves()
self.height = height
self.log_file=log_file
def build_leaves(self):
vertices,children = [],set()
vol_all = 0
for vertex in range(self.g_num_nodes):
ID = next(self.id_g)
vol = self.node_vol[vertex]
vol_all += vol
leaf_node = PartitionTreeNode(ID=ID, vertices=[vertex], g = vol, vol=vol)
vertices.append(vertex)
children.add(ID)
self.tree_node[ID] = leaf_node
self.leaves.append(ID)
root_id = next(self.id_g)
root = PartitionTreeNode(ID=root_id,vertices=vertices,vol=vol_all,g=vol_all,children=children,child_h=1)
self.tree_node[root_id] = root
for leaf in self.leaves:
self.tree_node[leaf].parent = root_id
self.root_id = root_id
def build_tree(self):
h=1
if self.log_file:
with open(self.log_file, 'a') as f:
f.write('\n')
f.write('树高为' + str(h) + '\n')
f.write('第' + str(h) + '次entropy:'+str(get_entropy(self.tree_node, self.root_id, list(self.tree_node.keys()), self.VOL))+'\n')
f.write('第' + str(h) + '次entropy:' + str(cost_se(self.tree_node, self.adj_matrix, self.root_id))+'\n')
f.write('第' + str(h) + '次entropy:' + str(cost_das(self.tree_node, self.adj_matrix, self.root_id))+'\n')
f.close()
print('第', h, '次entropy:', get_entropy(self.tree_node, self.root_id, list(self.tree_node.keys()), self.VOL))
print('第', h, '次se:', cost_se(self.tree_node, self.adj_matrix, self.root_id))
print('第', h, '次das:', cost_das(self.tree_node, self.adj_matrix, self.root_id))
now_root, nodes = stretch.stretch(self.tree_node, self.adj_matrix,self.adj_table, self.leaves, self.id_g)
compress.compress(self.tree_node, now_root, nodes, self.VOL)
last_layer, l, h = -1, 2, 2
if self.log_file:
with open(self.log_file, 'a') as f:
f.write('\n')
f.write('树高为'+str(h)+'\n')
f.write('第' + str(h) + '次entropy:'+str(get_entropy(self.tree_node, self.root_id, list(self.tree_node.keys()), self.VOL))+'\n')
f.write('第' + str(h) + '次entropy:' + str(cost_se(self.tree_node, self.adj_matrix, self.root_id))+'\n')
f.write('第' + str(h) + '次entropy:' + str(cost_das(self.tree_node, self.adj_matrix, self.root_id))+'\n')
f.close()
print('第', l, '次entropy:', get_entropy(self.tree_node, self.root_id, list(self.tree_node.keys()), self.VOL))
print('第', l, '次se:', cost_se(self.tree_node, self.adj_matrix, self.root_id))
print('第', l, '次das:', cost_das(self.tree_node, self.adj_matrix, self.root_id))
print('第', l, '次树高', h)
while h != self.height:
max_layer,layer_nodes,sparsest = find_k_sparest.find_sparset_level(self.tree_node,self.adj_matrix,self.adj_table,self.root_id,self.id_g,self.VOL,last_layer)
print('sparse',sparsest,'for ',max_layer)
if sparsest<=0.0:
break
for i in layer_nodes[max_layer]:
u = self.tree_node[i]
if u.children:
now_root, nodes = stretch.stretch(self.tree_node, self.adj_matrix, self.adj_table, u.children, self.id_g)
compress.compress(self.tree_node, now_root, nodes, self.VOL)
last_layer = max_layer
nodes = list(self.tree_node.keys())
h = find_k_sparest.get_height_of_subtree(self.tree_node,self.root_id,nodes)
l+=1
if self.log_file:
with open(self.log_file, 'a') as f:
f.write('\n')
f.write('树高为' + str(h) + '\n')
f.write('第' + str(h) + '次entropy:' + str(get_entropy(self.tree_node, self.root_id, list(self.tree_node.keys()), self.VOL))+'\n')
f.write('第' + str(h) + '次entropy:' + str(cost_se(self.tree_node, self.adj_matrix, self.root_id))+'\n')
f.write('第' + str(h) + '次entropy:' + str(cost_das(self.tree_node, self.adj_matrix, self.root_id))+'\n')
f.close()
print('height of tree:', h,'the time:',l)
print('第', h, '次entropy:', get_entropy(self.tree_node, self.root_id, list(self.tree_node.keys()), self.VOL))
print('第',h,'次se:',cost_se(self.tree_node,self.adj_matrix,self.root_id))
print('第',h,'次das:',cost_das(self.tree_node,self.adj_matrix,self.root_id))
print(' ')
def find_lca(tree_node,i,j,root_id):
p= root_id
flag = True
while flag:
flag = False
for child in tree_node[p].children:
if (i in tree_node[child].vertices)==True and (j in tree_node[child].vertices)==True:
p = child
flag=True
continue
if flag==False:
break
return p