In [20]:
import py2neo
import pandas as pd
graph = py2neo.Graph()
data = graph.data("MATCH (f:Field)<-[:DECLARES]-(c:Class)-[:DECLARES]->(m:Method), (m)-[r:READS|WRITES]->(f) RETURN m.name, f.name, r.lineNumber")
df = pd.DataFrame(data)
df['hit'] = 1
df.head()
Out[20]:
In [21]:
matrix = df.drop_duplicates().pivot(index="m.name", columns="f.name", values="hit").fillna(0)
matrix
Out[21]:
In [22]:
from sklearn.metrics.pairwise import cosine_distances
dissimilarity_matrix = cosine_distances(matrix)
dissimilarity_matrix[:5,:5]
Out[22]:
In [23]:
from sklearn.manifold import MDS
# uses a fixed seed for random_state for reproducibility
model = MDS(dissimilarity='precomputed', random_state=0)
dissimilarity_2d = model.fit_transform(dissimilarity_matrix)
dissimilarity_2d[:5]
Out[23]:
In [24]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(8,8))
x = dissimilarity_2d[:,0]
y = dissimilarity_2d[:,1]
plt.scatter(x, y);