RoleSim_role_detection_v1.py

"""RoleSim_role_detection_v1.py: Code for performing role detection using similarity graph generated by the other file."""

__author__ = "Abhishek Mahadevan Raju"
__credits__ = ["Himanshi Allahabadi"]
__license__ = "GPL"
__version__ = "1.0.0"
__maintainer__ = "Abhishek Mahadevan Raju"
__status__ = "Prototype"

import networkx as nx
import numpy as np
import time
import RoleSim_v1

# The code was originally meant to analyze a graph of movies from the IMDb dataset, and thus there might be a
# few variables defined for this purpose. However, the code should work independent of the dataset.

'''
In this code, an equi_width histogram will be created to make bins that define roles that nodes fall under. In the case of nodes
falling under multiple roles, the most common roles will be determined to be the node's role.
'''

obtained_graph_from_file = RoleSim_v1.full_operation()

# print(obtained_graph_from_file.number_of_nodes())
# print(obtained_graph_from_file.number_of_edges())
# print(list(obtained_graph_from_file.nodes()))

# Histogram creation.

def histrogram_creator(number_of_bins = 60):
    init = 1.0/number_of_bins
    list_of_bins = [0,init]
    i = init
    while i < 1.0:
        i += init
        list_of_bins.append(i)

    # print(list_of_bins)

    for index, i in enumerate(sorted(list_of_bins[-2:], reverse=True)):
        if np.absolute(1.0 - i) < init:
            actual_index = - 1
            del list_of_bins[actual_index]

    list_of_bins.append(1.0)
    print("Final list of bins", list_of_bins)
    return list_of_bins


# Testing code

# x = np.array([0.2, 6.4, 3.0, 1.6])
# inds = np.digitize(x, list_of_bins)

# print(inds)

def bin_creator(obtained_graph_from_file, list_of_bins):
    edge_list = list(obtained_graph_from_file.edges(data=True))
    node_list = list(obtained_graph_from_file.nodes())
    edge_weight_dict = {}

    for node in list(obtained_graph_from_file.nodes()):
        edge_weight_dict[node] = []

    edge_weights = []

    for edge in edge_list:
        edge_weight_dict[edge[0]].append(edge[2]['weight'])
        edge_weight_dict[edge[1]].append(edge[2]['weight'])

    print(edge_weight_dict[node_list[0]])
    print(len(edge_weight_dict[node_list[0]]))

    node_bin_dict = {}
    for node, value in edge_weight_dict.items():
        node_bin_dict[node] = np.digitize(edge_weight_dict[node], list_of_bins)

    return node_bin_dict

# Test code

# print(node_bin_dict[node_list[6]])
# print(len(node_bin_dict[node_list[6]]))
# print(np.unique(node_bin_dict[node_list[6]], return_counts=True))
# u, c = np.unique(node_bin_dict[node_list[6]], return_counts=True)

# y = u[c == c.max()]
# print(y)

def role_creator(list_of_bins, node_bin_dict):
    node_role_dict = {}
    role_count = {}
    for i in range(len(list_of_bins)):
        role_count[i] = 0

    for node, bins in node_bin_dict.items():
        u, c = np.unique(bins, return_counts=True)
        y = u[c == c.max()]
        role_count[y[0]] += 1
        node_role_dict[node] = y[0]
    #     break

    # Count of items per role
    print(role_count)
    # Role per node
    print(node_role_dict)

    return node_role_dict, role_count

def streamline(obtained_graph_from_file):
    list_of_bins = histrogram_creator()
    return role_creator(list_of_bins, bin_creator(obtained_graph_from_file, list_of_bins))

np.save("roles_obtained_from_RoleSim.npy",node_role_dict)

# for i, v in role_count.items():
#   print(i, v)

index_to_movie_dict = np.load('movie_nodes_index_id_dict.npy', allow_pickle=True)
index_to_movie_dict = dict(np.ndenumerate(index_to_movie_dict))
print(index_to_movie_dict)
print(type(index_to_movie_dict))