sna
In [1]:
import networkx as nx
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from networkx.readwrite import json_graph
import json
%matplotlib inline
In [2]:
h = nx.read_gpickle(“graph.bin”)
print nx.info(h)
Name: ()
Type: DiGraph
Number of nodes: 4120
Number of edges: 4678
Average in degree: 1.1354
Average out degree: 1.1354
In [3]:
from operator import itemgetter
def getTopCentrality(centralityFun, h, n):
c = sorted(centralityFun(h).items(), key=itemgetter(1), reverse=True)
top = c[:n]
print(“| —— | —————– | ——————– |”)
for i, x in enumerate(top):
# print(“%i\t%10s\t%f” %(i+1, x[0], x[1]) )
print(“| %2i | %12s | %f |” %(i+1, x[0], x[1]))
return top
centraFun = [nx.degree_centrality, nx.in_degree_centrality,nx.out_degree_centrality, nx.betweenness_centrality, nx.closeness_centrality, nx.eigenvector_centrality, nx.pagerank]
names = [“Degree Centrality”, “In-degree Centrality”, “Out-degree Centrality”, “Betweenness Centrality”, “Closeness Centrality”, “Eigenvector Centrality”, “Pagerank”]
tops = []
for i in range(len(names)):
print(“| Rank | User name | %18s|” % names[i])
top = getTopCentrality(centraFun[i], h, 10)
print(“\n”)
tops.append(top)
# getTopCentrality(nx.degree_centrality, h, 10)
| Rank | User name | Degree Centrality|
| —— | —————– | ——————– |
| 1 | angusshire | 0.267298 |
| 2 | batermj | 0.051712 |
| 3 | fly51fly | 0.043943 |
| 4 | nelsonic | 0.042000 |
| 5 | donnemartin | 0.027919 |
| 6 | daimajia | 0.025492 |
| 7 | trietptm | 0.021850 |
| 8 | galaris | 0.020636 |
| 9 | gauravssnl | 0.018208 |
| 10 | fperez | 0.017237 |
| Rank | User name | In-degree Centrality|
| —— | —————– | ——————– |
| 1 | donnemartin | 0.027919 |
| 2 | daimajia | 0.025492 |
| 3 | angusshire | 0.022578 |
| 4 | fperez | 0.017237 |
| 5 | amueller | 0.014324 |
| 6 | mrocklin | 0.013596 |
| 7 | Zulko | 0.013110 |
| 8 | ppwwyyxx | 0.011411 |
| 9 | pudo | 0.010439 |
| 10 | mahmoud | 0.009954 |
| Rank | User name | Out-degree Centrality|
| —— | —————– | ——————– |
| 1 | angusshire | 0.244720 |
| 2 | batermj | 0.050012 |
| 3 | fly51fly | 0.040787 |
| 4 | nelsonic | 0.035931 |
| 5 | trietptm | 0.018208 |
| 6 | galaris | 0.017723 |
| 7 | gauravssnl | 0.017480 |
| 8 | radovankavicky | 0.013838 |
| 9 | indrajithbandara | 0.008497 |
| 10 | vishalbelsare | 0.008012 |
| Rank | User name | Betweenness Centrality|
| —— | —————– | ——————– |
| 1 | angusshire | 0.025285 |
| 2 | nelsonic | 0.008361 |
| 3 | paulhendricks | 0.005906 |
| 4 | pranitbauva1997 | 0.005058 |
| 5 | baya | 0.004657 |
| 6 | hooopo | 0.004492 |
| 7 | tonyseek | 0.003691 |
| 8 | pirate | 0.002931 |
| 9 | batermj | 0.002700 |
| 10 | OrkoHunter | 0.002674 |
| Rank | User name | Closeness Centrality|
| —— | —————– | ——————– |
| 1 | angusshire | 0.259906 |
| 2 | batermj | 0.183126 |
| 3 | indrajithbandara | 0.168880 |
| 4 | paulhendricks | 0.167636 |
| 5 | radovankavicky | 0.167528 |
| 6 | fly51fly | 0.166674 |
| 7 | galaris | 0.161429 |
| 8 | trietptm | 0.158976 |
| 9 | cprogrammer1994 | 0.158735 |
| 10 | mcanthony | 0.158064 |
| Rank | User name | Eigenvector Centrality|
| —— | —————– | ——————– |
| 1 | donnemartin | 0.219502 |
| 2 | angusshire | 0.213775 |
| 3 | daimajia | 0.190127 |
| 4 | fperez | 0.133724 |
| 5 | pudo | 0.110548 |
| 6 | byt3bl33d3r | 0.108596 |
| 7 | Zulko | 0.104271 |
| 8 | mrocklin | 0.101942 |
| 9 | Miserlou | 0.101771 |
| 10 | amueller | 0.099386 |
| Rank | User name | Pagerank|
| —— | —————– | ——————– |
| 1 | daimajia | 0.010603 |
| 2 | mrocklin | 0.009026 |
| 3 | donnemartin | 0.008744 |
| 4 | fperez | 0.008304 |
| 5 | moskytw | 0.007320 |
| 6 | angusshire | 0.007087 |
| 7 | spitfire-sidra | 0.006225 |
| 8 | Zulko | 0.005374 |
| 9 | avikj | 0.004606 |
| 10 | amueller | 0.004512 |
In [4]:
commons = set()
for i in range(len(tops)):
commons = commons.union(set([x[0] for x in tops[i]]))
print(len(commons))
print(commons)
print(nx.eigenvector_centrality(h.subgraph(commons)))
print(“| Rank | User name | In-degree Centrality|” )
print(getTopCentrality(nx.in_degree_centrality, h.subgraph(commons), 10))
# nx.draw(h.subgraph(commons), with_labels = True)
# plt.savefig(‘labels.png’)
# save to json which can be visualized by d3.js
json.dump(json_graph.node_link_data(h.subgraph(commons)), open(‘importantnodes.json’, ‘w’))
33
set([u’byt3bl33d3r’, u’fperez’, u’OrkoHunter’, u’donnemartin’, u’moskytw’, u’daimajia’, u’batermj’, u’cprogrammer1994′, u’fly51fly’, u’mahmoud’, u’radovankavicky’, u’indrajithbandara’, u’paulhendricks’, u’ppwwyyxx’, u’Zulko’, u’hooopo’, u’baya’, u’Miserlou’, u’angusshire’, u’tonyseek’, u’pranitbauva1997′, u’pirate’, u’vishalbelsare’, u’mrocklin’, u’avikj’, u’amueller’, u’spitfire-sidra’, u’trietptm’, u’mcanthony’, u’nelsonic’, u’pudo’, u’galaris’, u’gauravssnl’])
{u’byt3bl33d3r’: 0.19719231271273835, u’hooopo’: 0.09450000023689209, u’donnemartin’: 0.3539009811713699, u’daimajia’: 0.16267415047168873, u’galaris’: 0.16910787668452737, u’cprogrammer1994′: 0.12915992525257552, u’fly51fly’: 0.17757265456849733, u’mahmoud’: 0.08382607808344876, u’radovankavicky’: 0.2234214325447363, u’indrajithbandara’: 0.0929665995746708, u’paulhendricks’: 0.2903715258243272, u’tonyseek’: 0.07663183566240735, u’mrocklin’: 0.11912972170797892, u’Zulko’: 0.14594240881909407, u’fperez’: 0.2841149892734136, u’baya’: 0.11662124560695658, u’Miserlou’: 0.1739575998910423, u’angusshire’: 0.32901399486558913, u’ppwwyyxx’: 0.12230052966009973, u’pranitbauva1997′: 0.16834587516223534, u’trietptm’: 0.17757971840802272, u’pirate’: 0.1657162657604965, u’vishalbelsare’: 0.13208976866920571, u’moskytw’: 0.07268429855953133, u’batermj’: 0.16932098147709398, u’amueller’: 0.10857192315214592, u’spitfire-sidra’: 0.07268429855953133, u’avikj’: 0.0, u’mcanthony’: 0.22312593285868107, u’nelsonic’: 0.22100600407913354, u’pudo’: 0.13059632639502589, u’OrkoHunter’: 0.08999353921630769, u’gauravssnl’: 0.07082891508865617}
| Rank | User name | In-degree Centrality|
| —— | —————– | ——————– |
| 1 | donnemartin | 0.312500 |
| 2 | angusshire | 0.312500 |
| 3 | paulhendricks | 0.250000 |
| 4 | fperez | 0.250000 |
| 5 | radovankavicky | 0.187500 |
| 6 | nelsonic | 0.187500 |
| 7 | byt3bl33d3r | 0.187500 |
| 8 | galaris | 0.156250 |
| 9 | mrocklin | 0.156250 |
| 10 | mcanthony | 0.156250 |
[(u’donnemartin’, 0.3125), (u’angusshire’, 0.3125), (u’paulhendricks’, 0.25), (u’fperez’, 0.25), (u’radovankavicky’, 0.1875), (u’nelsonic’, 0.1875), (u’byt3bl33d3r’, 0.1875), (u’galaris’, 0.15625), (u’mrocklin’, 0.15625), (u’mcanthony’, 0.15625)]
In [5]:
commons = set(h.nodes())
for i in [5, 6]:
commons = commons.intersection(set([x[0] for x in tops[i]]))
commons
Out[5]:
{u’Zulko’,
u’amueller’,
u’angusshire’,
u’daimajia’,
u’donnemartin’,
u’fperez’,
u’mrocklin’}
In [6]:
commons = set(h.nodes())
for i in [0, 2]:
commons = commons.intersection(set([x[0] for x in tops[i]]))
commons
Out[6]:
{u’angusshire’,
u’batermj’,
u’fly51fly’,
u’galaris’,
u’gauravssnl’,
u’nelsonic’,
u’trietptm’}
In [7]:
print(h.in_degree(u’donnemartin’))
print(h.out_degree(u’donnemartin’))
115
0
In [8]:
print(h.in_degree(u’daimajia’))
print(h.out_degree(u’daimajia’))
105
0
In [9]:
print(h.in_degree(u’angusshire’))
print(h.out_degree(u’angusshire’))
93
1008
In [10]:
print(h.in_degree(u’batermj’))
print(h.out_degree(u’batermj’))
7
206
In [11]:
nx.number_weakly_connected_components(h)
Out[11]:
2127
In [12]:
nx.number_strongly_connected_components(h)
Out[12]:
3764
In [13]:
wc = sorted(nx.weakly_connected_components(h), key = len, reverse=True)
In [14]:
print(nx.info(h.subgraph(wc[0])))
Name: ()
Type: DiGraph
Number of nodes: 1954
Number of edges: 4627
Average in degree: 2.3680
Average out degree: 2.3680
In [15]:
wcs = sorted(nx.strongly_connected_components(h), key = len, reverse=True)
print(nx.info(h.subgraph(wcs[0])))
Name: ()
Type: DiGraph
Number of nodes: 263
Number of edges: 919
Average in degree: 3.4943
Average out degree: 3.4943
In [16]:
print(len(wc))
lens = [len(x) for x in wc]
print(np.sum(np.array(lens) == 1))
print(lens[:10])
2127
2092
[1954, 4, 4, 3, 3, 2, 2, 2, 2, 2]
In [17]:
print(len(wcs))
lens = [len(x) for x in wcs]
print(np.sum(np.array(lens) == 1))
print(lens[:10])
3764
3702
[263, 7, 6, 6, 5, 5, 5, 4, 4, 3]
In [18]:
# nx.draw(h.subgraph(wcs[0]), with_labels=True)
# save to json which can be visualized by d3.js
json.dump(json_graph.node_link_data(h.subgraph(wcs[0])), open(‘largestStrongComponent.json’, ‘w’))
In [19]:
degrees =sorted(nx.degree(h).values(),reverse=True)
In [20]:
import matplotlib.pyplot as plt
# plt.hist(degrees, bins = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11, 12, 13, 14, 15, 20, 40, 100])
plt.hist(nx.degree(h).values(), bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.hist(nx.degree(h).values(), bins = list(range(11)))
plt.title(“Degree distribution”)
plt.xlabel(“Degree”)
plt.ylabel(“Number of nodes”)
# plt.hist(nx.degree(h).values())
Out[20]:
In [21]:
import numpy as np
degrees = np.array(nx.degree(h).values())
for i in range(11):
num = sum(degrees == i)
print(“| %d | %d |” %(i, num))
print(np.sum(np.array(nx.degree(h).values()) > 10))
# print(“| %d | %d |” %(i, num))
| 0 | 2092 |
| 1 | 847 |
| 2 | 422 |
| 3 | 219 |
| 4 | 135 |
| 5 | 95 |
| 6 | 63 |
| 7 | 37 |
| 8 | 34 |
| 9 | 32 |
| 10 | 21 |
123
In [22]:
# nx.neighbors(h, “donnemartin”)
# nx.neighbors(h, “angusshire”)
# h[“donnemartin”]
# h.has_edge(“angusshire”, “donnemartin”)
nodes = h.nodes()
pairs = []
for i in range(len(nodes)):
for j in range(i+1, len(nodes)):
if(h.has_edge(nodes[i], nodes[j]) and h.has_edge(nodes[j], nodes[i])):
pairs.append((nodes[i], nodes[j]))
print(len(pairs))
print(pairs[:10])
335
[(u’fffaraz’, u’1995parham’), (u’fffaraz’, u’Tabrizian’), (u’geekplux’, u’gaocegege’), (u’pdelong42′, u’nielssorensen’), (u’boliza’, u’geometrybase’), (u’sorra’, u’AndriyLin’), (u’ZeroCrystal’, u’riomus’), (u’ashubly25′, u’nelsonic’), (u’Marlysson’, u’pirate’), (u’Marlysson’, u’alephmelo’)]
In [23]:
plt.hist(h.in_degree().values(), bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
Out[23]:
(array([ 44., 13., 1., 5., 3., 0., 1., 0., 1.]),
array([ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]),
)
In [24]:
plt.hist(h.out_degree().values(), bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
Out[24]:
(array([ 31., 3., 5., 0., 1., 0., 3., 0., 0.]),
array([ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]),
)
In [25]:
plt.boxplot(nx.pagerank(h).values())
Out[25]:
{‘boxes’: [
‘caps’: [
‘fliers’: [
‘means’: [],
‘medians’: [
‘whiskers’: [
In [ ]: