The study of social networks has gained importance in recent years within social and behavioral research on HIV and AIDS. Social network research offers a means to map routes of potential viral transfer, to analyze the influence of peer norms and practices on the risk behaviors of individuals. This example analyzes the results of a study of high-risk drug use for HIV prevention in Hartford, Connecticut. This social network collected on drug users has 194 nodes and 273 edges.
In [1]:
import swat
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
# Also import networkx used for rendering a network
import networkx as nx
%matplotlib inline
In [2]:
s = swat.CAS('http://viya.mycompany.com:8777') # REST API
In [3]:
s.loadactionset('hypergroup')
Out[3]:
In [4]:
drug_network = pd.read_csv('drug_network.csv')
Hypergroup doesn't support numeric source and target columns - so make sure to cast them as varchars.
In [5]:
drug_network['SOURCE'] = drug_network['FROM'].astype(str)
drug_network['TARGET'] = drug_network['TO'].astype(str)
drug_network.head()
Out[5]:
In [6]:
if s.tableexists('drug_network').exists:
s.CASTable('drug_network').droptable()
dataset = s.upload_frame(drug_network,
importoptions=dict(vars=[dict(type='double'),
dict(type='double'),
dict(type='varchar'),
dict(type='varchar')]),
casout=dict(name='drug_network', promote=True))
In [7]:
dataset.columninfo()
Out[7]:
In [8]:
dataset.head()
Out[8]:
In [9]:
dataset.summary()
Out[9]:
In [10]:
def renderNetworkGraph(filterCommunity=-1, size=18, sizeVar='_HypGrp_',
colorVar='', sizeMultipler=500, nodes_table='nodes',
edges_table='edges'):
''' Build an array of node positions and related colors based on community '''
nodes = s.CASTable(nodes_table)
if filterCommunity >= 0:
nodes = nodes.query('_Community_ EQ %F' % filterCommunity)
nodes = nodes.to_frame()
nodePos = {}
nodeColor = {}
nodeSize = {}
communities = []
i = 0
for nodeId in nodes._Value_:
nodePos[nodeId] = (nodes._AllXCoord_[i], nodes._AllYCoord_[i])
if colorVar:
nodeColor[nodeId] = nodes[colorVar][i]
if nodes[colorVar][i] not in communities:
communities.append(nodes[colorVar][i])
nodeSize[nodeId] = max(nodes[sizeVar][i],0.1)*sizeMultipler
i += 1
communities.sort()
# Build a list of source-target tuples
edges = s.CASTable(edges_table)
if filterCommunity >= 0:
edges = edges.query('_SCommunity_ EQ %F AND _TCommunity_ EQ %F' %
(filterCommunity, filterCommunity))
edges = edges.to_frame()
edgeTuples = []
i = 0
for p in edges._Source_:
edgeTuples.append( (edges._Source_[i], edges._Target_[i]) )
i += 1
# Add nodes and edges to the graph
plt.figure(figsize=(size,size))
graph = nx.DiGraph()
graph.add_edges_from(edgeTuples)
# Size mapping
getNodeSize=[nodeSize[v] for v in graph]
# Color mapping
jet = cm = plt.get_cmap('jet')
getNodeColor=None
if colorVar:
getNodeColor=[nodeColor[v] for v in graph]
cNorm = colors.Normalize(vmin=min(communities), vmax=max(communities))
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
# Using a figure here to work-around the fact that networkx doesn't produce a labelled legend
f = plt.figure(1)
ax = f.add_subplot(1,1,1)
for community in communities:
ax.plot([0],[0], color=scalarMap.to_rgba(community),
label='Community %s' % '{:2.0f}'.format(community),linewidth=10)
# Render the graph
nx.draw_networkx_nodes(graph, nodePos, node_size=getNodeSize,
node_color=getNodeColor, cmap=jet)
nx.draw_networkx_edges(graph, nodePos, width=1, alpha=0.5)
nx.draw_networkx_labels(graph, nodePos, font_size=11, font_family='sans-serif')
if len(communities) > 0:
plt.legend(loc='upper left',prop={'size':11})
plt.title('Hartford Drug User Social Network', fontsize=30)
plt.axis('off')
plt.show()
In [11]:
# Create output table objects
edges = s.CASTable('edges', replace=True)
nodes = s.CASTable('nodes', replace=True)
dataset[['SOURCE', 'TARGET']].hypergroup(
createOut = 'never',
allGraphs = True,
edges = edges,
vertices = nodes
)
Out[11]:
In [12]:
renderNetworkGraph()
In [13]:
dataset[['SOURCE', 'TARGET']].hypergroup(
createOut = 'never',
allGraphs = True,
community = True,
edges = edges,
vertices = nodes
)
Out[13]:
How many hypergroups and communities do we have?
In [14]:
nodes.distinct()
Out[14]:
In [15]:
nodes.summary()
Out[15]:
What are the 5 biggest communities?
In [16]:
topKOut = s.CASTable('topKOut', replace=True)
nodes[['_Community_']].topk(
aggregator = 'N',
topK = 4,
casOut = topKOut
)
topKOut = topKOut.sort_values('_Rank_').head(10)
topKOut.columns
Out[16]:
In [17]:
nCommunities = len(topKOut)
ind = np.arange(nCommunities) # the x locations for the groups
plt.figure(figsize=(8, 4))
p1 = plt.bar(ind + 0.2, topKOut._Score_, 0.5, color='orange', alpha=0.75)
plt.ylabel('Vertices', fontsize=12)
plt.xlabel('Community', fontsize=12)
plt.title('Number of Nodes for the Top %s Communities' % nCommunities)
plt.xticks(ind + 0.2, topKOut._Fmtvar_)
plt.show()
Note: This shows that the biggest communities have up to 63 vertices.
What nodes belong to community 4?
In [18]:
nodes.query('_Community_ EQ 4').head()
Out[18]:
What edges do we have?
In [19]:
edges.head()
Out[19]:
In [20]:
renderNetworkGraph(colorVar='_Community_')
In [21]:
dataset[['SOURCE', 'TARGET']].hypergroup(
createOut = 'never',
community = True,
nCommunities = 5,
allGraphs = True,
edges = edges,
vertices = nodes
)
Out[21]:
In [22]:
renderNetworkGraph(colorVar='_Community_')
How important is a user in the network?
In [23]:
dataset[['SOURCE', 'TARGET']].hypergroup(
createOut = 'never',
community = True,
nCommunities = 5,
centrality = True,
mergeCommSmallest = True,
allGraphs = True,
graphPartition = True,
scaleCentralities = 'central1', # returns centrality values closer to 1 in the center
edges = edges,
vertices = nodes
)
Out[23]:
In [24]:
nodes.head()
Out[24]:
Betweenness centrality quantifies the number of times a node acts as a bridge along the shortest path(s) between two other nodes. As such it describes the importance of a node in a network.
In [25]:
renderNetworkGraph(colorVar='_Community_', sizeVar='_Betweenness_')
In [26]:
renderNetworkGraph(2, size=10, sizeVar='_CentroidAngle_', sizeMultipler=5)
In [27]:
s.close()
Falko Schulz ▪ Principal Software Developer ▪ Business Intelligence Visualization R&D ▪ SAS® Institute ▪ falko.schulz@sas.com ▪ http://www.sas.com
Data used by permission from Margaret R. Weeks at the Institute of Community Resesarch (http://www.incommunityresearch.org) https://www.researchgate.net/publication/227085871_Social_Networks_of_Drug_Users_in_High-Risk_Sites_Finding_the_Connections