In [1]:
import MySQLdb as mdb
import pandas as pd
In [ ]:
# Construct your query of choice
query = ("""
SELECT eventID, name, value, label
FROM my_table
""")
try:
# Connect to server
db = Mdb.connect(read_default_file='../../.my.cnf',
read_default_group='guest',
host='mysql.server.host.name',
db='my_schema_name')
# Read out result of query into a DataFrame
data_df = pd.read_sql(query, db)
data_df.index.name="id"
# Close the connection
db.close()
# Catch database exceptions
except Mdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
In [63]:
# Write data to csv for offline usage
data_df.to_csv('violin-plot-data.csv', index_label='id')
In [2]:
# Load offline data into DataFrame
data_df = pd.read_csv('violin-plot-data.csv', index_col='id')
In [3]:
# Here we have all the data in *Long* format
data_df.head()
Out[3]:
In [4]:
# Use the pivot function to get it into *wide* format
wide_df = data_df.pivot(index='eventID',
columns='key',
values='value')
wide_df.head()
Out[4]:
In [5]:
# This pivoted DataFrame doesn't have the *class* field
# Use the SQL-esque *merge* function to add it to each entry
label_df = (data_df[['eventID', 'label']].drop_duplicates()
.set_index('eventID'))
wide_df = wide_df.merge(label_df,
left_index=True,
right_index=True)
wide_df = wide_df.sort_index()
wide_df.head()
Out[5]:
In [6]:
import matplotlib.pylab as plt
import seaborn as sns
# Show plots in notebook
%matplotlib inline
# Set some styling options
sns.set_style("darkgrid")
sns.set_context("paper", font_scale=1.4)
In [7]:
# Using Pandas
feature = "AfterInhMATRIX5"
wide_df.query("label == 1")[feature].hist()
Out[7]:
In [8]:
# Using Seaborn's 'distplot' shows histogram and a kde plot
sns.distplot(wide_df.query("label == 1")[feature])
Out[8]:
In [9]:
for label in range(1,8):
sns.distplot(
wide_df.query("label == @label")[feature],
label=str(label))
plt.legend(loc='best')
Out[9]:
In [10]:
sns.set_style("whitegrid", )
sns.boxplot(x='label', y=feature,
data=wide_df.sort_values('label'),
palette='muted')
Out[10]:
In [11]:
sns.violinplot(x='label', y=feature,
data=wide_df.sort_values('label'),
palette='muted')
Out[11]:
In [ ]: