In [229]:
import py2neo
import pandas as pd
graph = py2neo.Graph()
query = """
MATCH (testClass:Class)-[:DECLARES]->(testMethod:Method)-[:ANNOTATED_BY]->()-[:OF_TYPE]->(:Type {name:"Test"}),
(testMethod)-[i:INVOKES]->(method:Method)<-[:DECLARES]-(type:Type)
WHERE type.fqn STARTS WITH "at.dropover" AND NOT type.name ENDS WITH "Test"
RETURN
testClass.name as test_class,
testMethod.signature as test_method,
type.name as type,
method.signature as method,
COUNT(DISTINCT i) as invokes
"""
invokes = pd.DataFrame(graph.data(query))
invokes.head()
Out[229]:
In [230]:
invokes['prod'] = invokes.type + "#" + invokes.method.str.split().str[1].str.split("(").str[0]
invokes = invokes[~(invokes.method == "<init>")]
invokes['test'] = invokes.test_class + "#" + invokes.test_method.str.split().str[1].str.split("(").str[0]
invokes.head()
Out[230]:
In [231]:
matrix = invokes.pivot_table(index=['test_class', 'test_method'], columns=['type', 'method'], values='invokes')
matrix = matrix.fillna(0)
matrix.head()
Out[231]:
In [232]:
from sklearn.decomposition import PCA
model = PCA(n_components=2, random_state=0)
data = model.fit(matrix)
data
Out[232]:
In [233]:
T = model.transform(matrix)
T[:5]
Out[233]:
In [234]:
from matplotlib import cm
classes = matrix.index.labels[0].values() / matrix.index.labels[0].max()
colors = [x for x in cm.hsv(classes)]
colors[:5]
Out[234]:
In [235]:
from matplotlib.colors import rgb2hex
rgb_colors = [rgb2hex(x) for x in colors]
rgb_colors[:5]
Out[235]:
Welche Testmethoden testen die Fachlichkeit???
In [236]:
methods_per_test_class = invokes.groupby(['test_class', 'test_method']).method.apply(list)
methods_per_test_class.head()
Out[236]:
In [237]:
types_per_test_class = invokes.groupby(['test_class', 'test_method']).type.apply(list)
types_per_test_class.head()
Out[237]:
In [238]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.scatter(T[:,0], T[:,1], c=colors)
Out[238]:
In [239]:
df = pd.DataFrame(T, columns=['x', 'y'])
df['test_class'] = matrix.reset_index().test_class
df['test_method'] = matrix.reset_index().test_method
df['methods'] = methods_per_test_class.reset_index().method
df['types'] = types_per_test_class.reset_index().type
df['color'] = rgb_colors
df.head()
Out[239]:
In [240]:
# TODO: Bokeh, damit man weiß, welche Klassen das sind
In [241]:
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool
def scatter_with_hover(df, x, y, color='blue',
fig=None, cols=None, name=None, marker='o',
fig_width=800, fig_height=800, **kwargs):
"""
Plots an interactive scatter plot of `x` vs `y` using bokeh, with automatic
tooltips showing columns from `df`.
Parameters
----------
df : pandas.DataFrame
DataFrame containing the data to be plotted
x : str
Name of the column to use for the x-axis values
y : str
Name of the column to use for the y-axis values
fig : bokeh.plotting.Figure, optional
Figure on which to plot (if not given then a new figure will be created)
cols : list of str
Columns to show in the hover tooltip (default is to show all)
name : str
Bokeh series name to give to the scattered data
marker : str
Name of marker to use for scatter plot
**kwargs
Any further arguments to be passed to fig.scatter
Returns
-------
bokeh.plotting.Figure
Figure (the same as given, or the newly created figure)
Example
-------
fig = scatter_with_hover(df, 'A', 'B')
show(fig)
fig = scatter_with_hover(df, 'A', 'B', cols=['C', 'D', 'E'], marker='x', color='red')
show(fig)
Author
------
Robin Wilson <robin@rtwilson.com>
with thanks to Max Albert for original code example
"""
# If we haven't been given a Figure obj then create it with default
# size etc.
if fig is None:
fig = figure(width=fig_width, height=fig_height)
# We're getting data from the given dataframe
source = ColumnDataSource(data=df)
# We need a name so that we can restrict hover tools to just this
# particular 'series' on the plot. You can specify it (in case it
# needs to be something specific for other reasons), otherwise
# we just use 'main'
if name is None:
name = 'main'
# Actually do the scatter plot - the easy bit
# (other keyword arguments will be passed to this function)
fig.scatter(x, y, source=source, name=name, marker=marker, color='color', **kwargs)
# Now we create the hover tool, and make sure it is only active with
# the series we plotted in the previous line
hover = HoverTool(names=[name])
if cols is None:
# Display *all* columns in the tooltips
hover.tooltips = [(c, '@' + c) for c in df.columns]
else:
# Display just the given columns in the tooltips
hover.tooltips = [(c, '@' + c) for c in cols]
hover.tooltips.append(('index', '$index'))
# Finally add/enable the tool
fig.add_tools(hover)
return fig
In [242]:
from bokeh.io import push_notebook, show, output_notebook
plt = scatter_with_hover(df, 'x', 'y')
output_notebook()
show(plt)
In [243]:
from sklearn.manifold import MDS
model = MDS()
T = model.fit_transform(matrix)
T[:5]
Out[243]:
In [244]:
df = pd.DataFrame(T, columns=['x', 'y'])
df['test_class'] = matrix.reset_index().test_class
df['test_method'] = matrix.reset_index().test_method
df['methods'] = methods_per_test_class.reset_index().method
df['types'] = types_per_test_class.reset_index().type
df['color'] = rgb_colors
df.head()
Out[244]:
In [245]:
plt = scatter_with_hover(df, 'x', 'y')
output_notebook()
show(plt)
In [246]:
from sklearn.manifold import Isomap
model = Isomap()
T = model.fit_transform(matrix)
T[:5]
Out[246]:
In [247]:
df = pd.DataFrame(T, columns=['x', 'y'])
df['test_class'] = matrix.reset_index().test_class
df['test_method'] = matrix.reset_index().test_method
df['methods'] = methods_per_test_class.reset_index().method
df['types'] = types_per_test_class.reset_index().type
df['color'] = rgb_colors
df.head()
Out[247]:
In [248]:
plt = scatter_with_hover(df, 'x', 'y')
output_notebook()
show(plt)
In [ ]: