In [1]:
%matplotlib inline
show examples of real tweets, retweets, and replies
these are specific examples
can we learn something from the stucture of people's interactions?
we'll do that by visualizing graphs
graphs?
todo include an example of a very simple social network
... or represent networks e.g., road networks, computer networks
let's build earlier example
In [2]:
import networkx as nx
# initialize
graph = nx.Graph()
people = ['jere', 'ella', 'miika', 'anniina', 'mikko', 'olli', 'laura', 'maria']
connections = [('jere', 'ella'), ('ella', 'anniina'), ('ella', 'miika'),
('mikko', 'ella'), ('anniina', 'mikko'), ('laura', 'jere'),
('olli', 'jere'), ('jere', 'maria'), ('miika', 'mikko'),
('maria', 'laura'), ('olli', 'laura')]
# add all nodes
for node in people:
graph.add_node(node)
# add all edges
for node_a, node_b in connections:
graph.add_edge(node_a, node_b)
In [3]:
def get_pyplot_ax(rows = 1, columns = 1, figsize = (7, 7)):
""" helper function """
import matplotlib.pyplot as plt
fig, ax = plt.subplots(rows, columns, figsize = figsize)
if rows == 1 and columns == 1:
ax.axis('off')
elif rows == 1 or columns == 1:
for subax in ax:
subax.axis('off')
elif rows > 1 or columns > 1:
for i in range(rows):
for j in range(columns):
ax[i][j].axis('off')
return fig, ax
In [4]:
fig, ax = get_pyplot_ax()
nx.draw_networkx(graph, ax = ax, node_size = 2300, node_color = 'white')
fig.savefig('img/example_friends.png', dpi = 300)
In [5]:
fig, ax = get_pyplot_ax()
nx.draw_networkx(graph, ax = ax, node_size = 2300, node_color = 'black', node_label = None)
fig.savefig('img/generic_graph.png', dpi = 300)
In [6]:
boy_color = 'green'
girl_color = 'orange'
color_by_gender = {
'jere': boy_color, 'miika': boy_color, 'mikko': boy_color, 'olli': boy_color,
'ella': girl_color, 'anniina': girl_color, 'laura': girl_color, 'maria': girl_color
}
# create a list of color per node
colors = [color_by_gender[node] for node in graph.nodes_iter()]
fig, ax = get_pyplot_ax()
nx.draw_networkx(graph, ax = ax, node_size = 2300, node_color = colors);
In [7]:
fig, ax = get_pyplot_ax()
labels = {}
for person in people:
labels[person] = person.upper()
nx.draw_networkx(graph, labels = labels, ax = ax, node_size = 2300, node_color = colors);
In [18]:
pos = nx.layout.spring_layout(graph)
fig, ax = get_pyplot_ax(rows = 1, columns = 2, figsize = (12, 5))
left_plot = ax[0]
right_plot = ax[1]
nx.draw_networkx(graph, pos = pos, ax = left_plot, node_size = 2300,)
nx.draw_networkx(graph, pos = pos, ax = right_plot, node_size = 2300, node_color = colors)
In [9]:
number_of_friends = graph.degree()
for person in number_of_friends:
print("{} has {} friends".format(person, number_of_friends[person]))
In [10]:
from clustering import spectral_clusters
partition = spectral_clusters(graph, 2)
In [11]:
partition
Out[11]:
In [12]:
group_colors = []
for pos, name in enumerate(graph.nodes_iter()):
side = partition[pos]
color = 'blue' if side == 1 else 'grey'
group_colors.append(color)
In [13]:
fig, ax = get_pyplot_ax()
nx.draw_networkx(graph, ax = ax, node_color = group_colors, node_size = 2300);
{u'contributors': None, u'coordinates': None, u'created_at': u'Fri Mar 25 11:52:14 +0000 2016', u'entities': {u'hashtags': [{u'indices': [0, 3], u'text': u'RT'}, {u'indices': [4, 11], u'text': u'Follow'}, {u'indices': [12, 23], u'text': u'TopStories'}], u'media': [{u'display_url': u'pic.twitter.com/IN2KVsc1fI', u'expanded_url': u'http://twitter.com/KTM_Riders/status/713332663755509761/photo/1', u'id': 713332663633883136, u'id_str': u'713332663633883136', u'indices': [114, 137], u'media_url': u'http://pbs.twimg.com/media/CeZERzaUsAAXPMP.jpg', u'media_url_https': u'https://pbs.twimg.com/media/CeZERzaUsAAXPMP.jpg', u'sizes': {u'large': {u'h': 683, u'resize': u'fit', u'w': 1024}, u'medium': {u'h': 400, u'resize': u'fit', u'w': 600}, u'small': {u'h': 227, u'resize': u'fit', u'w': 340}, u'thumb': {u'h': 150, u'resize': u'crop', u'w': 150}}, u'type': u'photo', u'url': u'https://t.co/IN2KVsc1fI'}], u'symbols': [], u'urls': [{u'display_url': u'bit.ly/1jyqSVe', u'expanded_url': u'http://bit.ly/1jyqSVe', u'indices': [90, 113], u'url': u'https://t.co/sAPChiEEcY'}], u'user_mentions': []}, u'extended_entities': {u'media': [{u'display_url': u'pic.twitter.com/IN2KVsc1fI', u'expanded_url': u'http://twitter.com/KTM_Riders/status/713332663755509761/photo/1', u'id': 713332663633883136, u'id_str': u'713332663633883136', u'indices': [114, 137], u'media_url': u'http://pbs.twimg.com/media/CeZERzaUsAAXPMP.jpg', u'media_url_https': u'https://pbs.twimg.com/media/CeZERzaUsAAXPMP.jpg', u'sizes': {u'large': {u'h': 683, u'resize': u'fit', u'w': 1024}, u'medium': {u'h': 400, u'resize': u'fit', u'w': 600}, u'small': {u'h': 227, u'resize': u'fit', u'w': 340}, u'thumb': {u'h': 150, u'resize': u'crop', u'w': 150}}, u'type': u'photo', u'url': u'https://t.co/IN2KVsc1fI'}]}, u'favorite_count': 0, u'favorited': False, u'filter_level': u'low', u'geo': None, u'id': 713332663755509761, u'id_str': u'713332663755509761', u'in_reply_to_screen_name': None, u'in_reply_to_status_id': None, u'in_reply_to_status_id_str': None, u'in_reply_to_user_id': None, u'in_reply_to_user_id_str': None, u'is_quote_status': False, u'lang': u'en', u'place': None, u'possibly_sensitive': False, u'retweet_count': 0, u'retweeted': False, u'source': u'dlvr.it', u'text': u"#RT #Follow #TopStories Trump Aides Plot 'Two-Phase' Strategy to Win Potential Contested\u2026 https://t.co/sAPChiEEcY https://t.co/IN2KVsc1fI", u'timestamp_ms': u'1458906734776', u'truncated': False, u'user': {u'contributors_enabled': False, u'created_at': u'Wed Jun 06 08:28:15 +0000 2012', u'default_profile': False, u'default_profile_image': False, u'description': u'The Dirt Rider Club', u'favourites_count': 0, u'follow_request_sent': None, u'followers_count': 3218, u'following': None, u'friends_count': 3782, u'geo_enabled': False, u'id': 600799231, u'id_str': u'600799231', u'is_translator': False, u'lang': u'en', u'listed_count': 110, u'location': u'North America', u'name': u'KTM Rider Club', u'notifications': None, u'profile_background_color': u'131516', u'profile_background_image_url': u'http://abs.twimg.com/images/themes/theme14/bg.gif', u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme14/bg.gif', u'profile_background_tile': True, u'profile_image_url': u'http://pbs.twimg.com/profile_images/2375931627/795f1dvdi168knsqo2ch_normal.jpeg', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/2375931627/795f1dvdi168knsqo2ch_normal.jpeg', u'profile_link_color': u'009999', u'profile_sidebar_border_color': u'EEEEEE', u'profile_sidebar_fill_color': u'EFEFEF', u'profile_text_color': u'333333', u'profile_use_background_image': True, u'protected': False, u'screen_name': u'KTM_Riders', u'statuses_count': 162712, u'time_zone': u'Central Time (US & Canada)', u'url': u'https://twitter.com/KTM_Riders', u'utc_offset': -18000, u'verified': False}}
retweet author terry_golfing
text RT @realDonaldTrump: REPEAL AND REPLACE OBAMACARE!
reply author tonyposnanski
text @realDonaldTrump have a better plan than the one you presented
In [14]:
dataset = []
tweet = None
def is_retweet(tweet):
pass
def get_retweet_author(tweet):
pass
def get_original_author(tweet):
pass
def contains_topic(tweet, topic):
pass
In [15]:
topic = '#obamacare'
for tweet in dataset:
if contains_topic(tweet, topic) and is_retweet(tweet):
from_who = get_retweet_author(tweet)
to_who = get_original_author(tweet)
graph.add_edge(from_who, to_who)
In [16]:
partition = spectral_clusters(graph, 2)
to find sides if they exist
In [17]:
group_colors = []
for pos, name in enumerate(graph.nodes_iter()):
side = partition[pos]
color = 'blue' if side == 1 else 'red'
group_colors.append(color)
nx.draw_networkx(graph, ax = ax, node_color = group_colors, node_size = 2300);
slides at https://github.com/mmathioudakis/pycon2016polarization