In [1]:
# Import generic modules
import matplotlib
import networkx as nx
import pandas as pd
import seaborn
In [2]:
# Import the platform_analysis libraries
import platform_analysis.github_analysis as github
import platform_analysis.sna as sna
In [3]:
# Render our plots inline
%matplotlib inline
In [4]:
# Clear font cache for matplotlib
font_cache_path = matplotlib.get_cachedir() + '/fontList.cache'
%rm $font_cache_path
# Set the font
matplotlib.rcParams.update({'font.family': 'Hack'})
In [5]:
# Configuration
repository="fablabs"
username="fablabbcn"
userlogin = "xxxx"
password = "xxxx"
In [6]:
# Get the data from GitHub
repo = github.github_analysis(
repository=repository,
username=username,
userlogin=userlogin,
password=password,
path="/Users/xxxx/Documents")
In [7]:
# Load the data, in case we already have it
# repo = nx.read_graphml("fablabs.graphml")
In [8]:
# Save data without self-loops
sna.save_graph(repo, "fablabs.graphml", self_loops=False)
In [9]:
# Remove self-loops (a user interacting with herself/himself)
self_loops_edges = repo.selfloop_edges(keys=True, data=True)
repo.remove_edges_from(self_loops_edges)
In [10]:
# Transform the graph data into pandas time series
data = sna.graph_to_pandas_time_series(repo)
In [11]:
data.head()
Out[11]:
In [12]:
# Do a first time series analysis
time_data = sna.time_analysis(data=data, focus="global", interaction="0", structure="separated")
In [13]:
time_data.head()
Out[13]:
In [14]:
# Plot the global graph, for all users
pos=nx.spring_layout(repo,iterations=100)
nx.draw_networkx(repo,pos,node_size=50,with_labels=False)
In [15]:
# Group interactions by type
global_stats = sna.type_stats(data=time_data, focus="global")
In [16]:
# Plot generic stats of interaction types
plot = global_stats.plot(kind="bar", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
plot.set_xlabel("Interactions")
plot.set_ylabel("Amount")
plot.set_xticklabels(global_stats.index, rotation=0)
matplotlib.pyplot.savefig("results-01.pdf")
In [17]:
# Plot interactions over time, by type (monthly resample)
plot = time_data.resample('M').sum().plot(kind="area", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-02.pdf")
In [18]:
# Plot interactions over time, by type (quarterly resample)
plot = time_data.resample('Q').sum().plot(kind="area", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
# Plot interactions over time, by type (yearly resample)
matplotlib.pyplot.savefig("results-03.pdf")
In [19]:
# Plot interactions over time, by type (yearly resample)
plot = time_data.resample('A').sum().plot(kind="area", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='12M')
plot.set_xticklabels([x.strftime('%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-04.pdf")
In [20]:
# Plot interactions over time, split by type (monthly resample)
plot = time_data.resample('M').sum().plot(label=("X", "Y"), subplots=True, legend=True, kind="area", sharex=True, sharey=True, ylim=(0, 150), figsize=(40,20))
ax = plot.flatten()
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
for i in plot:
i.set_ylabel("Interactions")
i.set_xlabel("Time")
ax[0].set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
ax[0].set_xticks(xticks)
ax[0].grid('on', which='major', axis='x')
i.set_ylim(0,45)
matplotlib.pyplot.suptitle("Activity over time in "+username+" / "+repository, size=26, y=0.925)
matplotlib.pyplot.savefig("results-05.pdf")
In [21]:
# Combine all the interactions together
time_data_combined = sna.time_analysis(data=data, focus="global", interaction="0", structure="combined")
In [22]:
# Plot interactions over time (monthly resample)
plot = time_data_combined.resample('M').sum().plot(kind="area", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-06.pdf")
In [23]:
# Plot interactions over time (quarterly resample)
plot = time_data_combined.resample('Q').sum().plot(kind="area", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-07.pdf")
In [24]:
# Plot interactions over time (yearly resample)
plot = time_data_combined.resample('A').sum().plot(kind="area", figsize=(20,6), title="Interactions over time in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-08.pdf")
In [25]:
# Get data of single users
user_data = sna.time_analysis(data=data, focus="user", interaction="0", structure="separated")
In [26]:
user_data.head()
Out[26]:
In [27]:
# Plot interactions of @ceritium user over time, by type (monthly resample)
plot = user_data.ix["ceritium"].resample('M').sum().plot(kind="area", figsize=(20,6), ylim=(0,40), title="Interactions over time of @ceritium in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-09.pdf")
In [28]:
# Plot interactions of @hiromipaw user over time, by type (monthly resample)
plot = user_data.ix["hiromipaw"].resample('M').sum().plot(kind="area", ylim=(0,40), figsize=(20,6), title="Interactions over time of @hiromipaw in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-10.pdf")
In [29]:
# Plot interactions of @openp2pdesign user over time, by type (monthly resample)
plot = user_data.ix["openp2pdesign"].resample('M').sum().plot(kind="area", ylim=(0,40), figsize=(20,6), title="Interactions over time of @openp2pdesign in "+username+" / "+repository)
# Axes
plot.set_xlabel("Time")
plot.set_ylabel("Interactions")
# Time format for x axis
xticks = pd.date_range(start=time_data.index.min(), end=time_data.index.max(), freq='4M')
plot.set_xticklabels([x.strftime('%h\n%Y') for x in xticks]);
plot.set_xticks(xticks)
plot.grid('on', which='major', axis='x')
matplotlib.pyplot.savefig("results-11.pdf")
In [30]:
# Check @openp2pdesign data table (only first rows)
user_data.ix["openp2pdesign"].head()
Out[30]:
In [31]:
# Check @openp2pdesign stats data table
user_data.ix["openp2pdesign"].describe()
Out[31]:
In [32]:
# All users stats
user_data.describe()
Out[32]:
In [33]:
# All users data
user_data
Out[33]: