-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph_analyzer.py
108 lines (95 loc) · 3.58 KB
/
graph_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import math
from networkx.algorithms.community import k_clique_communities
# Plot the degree distributions
def plot_dist(graph):
# Get all the degrees from the graph
degrees = [val for (node, val) in graph.degree()]
# Get the count of each degree
deg_counts = Counter(degrees)
k = range(1,max(degrees))
p = [0] * max(degrees)
# Get the probability distribution for a node to be <=k
for degree in k:
p[degree] = 0
for other_degree, count in deg_counts.items():
if other_degree > degree:
p[degree] += count
p = [val for val in p if val > 0]
# Create a linear plot
plt.plot(k, p)
plt.xlabel("k")
plt.ylabel("p(k)")
plt.title("Linear plot of degree distribution")
plt.show()
# Create the log-log plot
log_k = [math.log(val) for val in k]
log_p = [math.log(val) for val in p if val > 0]
plt.plot(log_k, log_p)
plt.xlabel("log(k)")
plt.ylabel("log(p(k))")
plt.title("Log-Log plot of degree distribution")
plt.show()
# Calculate betweenness centralities with networkX
def calc_betweenness(graph):
print("Calculating betweenness centralities")
bb = nx.betweenness_centrality(graph)
nx.set_node_attributes(graph, bb, 'betweenness')
# Find all the cliques in the graph
def calc_cliques(graph):
clique_dict = {}
print("Finding cliques\n")
# This returns a list of cliques, stored as lists
cliques = list(nx.find_cliques(graph))
counter = 1
# For every clique list
for clique in cliques:
# Ignore 2-cliques
if len(clique) > 2:
# Assign the nodes to a new clique value
for node in clique:
clique_dict[node] = counter
counter += 1
else:
# Assign 2-cliques and below to not be part of a clique
for node in clique:
clique_dict[node] = 0
nx.set_node_attributes(graph, clique_dict, name="clique")
return cliques
# Using cliques, do clique percolation to find communities
def find_communities(graph, cliques, graph_type):
print("Calculating for: ", graph_type)
print("Constructing communities")
# Can also be done for k=2 if we didn't ignore 2-cliques above
community_dict_3 = {}
communities_k3 = list(k_clique_communities(graph, 3, cliques=cliques))
label = 0
sizes = []
# Assign labels to each node based on their community
for community in communities_k3:
size = 0
for node in community:
community_dict_3[node] = label
size += 1
sizes.append(size)
label += 1
avg_size = sum(sizes) / len(sizes)
print("Number of communities: ", len(sizes))
print("Average community size: ", avg_size)
print("Number of songs in communities: ", sum(sizes), "\n")
nx.set_node_attributes(graph, community_dict_3, name="communitykthree")
'''Call a variety of graph analzying methods, some from networkx, and add the attributes to the graph object before
exporting'''
def analyze(graph):
#Done in gephi
#calc_betweenness(graph)
plot_dist(graph)
giant = max(nx.connected_component_subgraphs(graph), key=len)
all_cliques = calc_cliques(graph)
find_communities(graph, all_cliques, "Entire Graph")
giant_cliques = calc_cliques(giant)
find_communities(giant, giant_cliques, "Giant Component")
nx.write_gml(giant, "giant_component.gml", stringizer=nx.readwrite.gml.literal_stringizer)
nx.write_gml(graph, "test_graph.gml", stringizer=nx.readwrite.gml.literal_stringizer)