In [6]:
import matplotlib.pyplot as plt
%matplotlib inline
In [12]:
plt.plot([1,2], [2,3]) # Format is: [x1, x2], [y1, y2]
Out[12]:
From http://stackoverflow.com/questions/8271564/matplotlib-comma-separated-number-format-for-axis
For example, when you're plotting events on large genomes where the x-values are in the millions. Of course this works with the y-axis, too.
In [48]:
def x_formatter(x, pos):
# Plot with comma-separator, and no .0
return '{0:,.0f}'.format(x)
# In this example, it might be prettier to just plot in
# Giga basepairs by dividing by 1e6, then the comma is not needed
import matplotlib.ticker as tkr
x_format = tkr.FuncFormatter(x_formatter)
ax = plt.subplot(111)
ax.plot([1000000, 2000000], [2, 1], 'k.')
ax.xaxis.set_major_formatter(x_format)
# Change the limits of the graph manually so that we actually can see the dots
plt.xlim([900000, 2100000])
plt.ylim([0,3])
plt.xlabel('Position on genome (bp)')
Out[48]:
Most journals want 300dpi PNG, and we'll also get rid of the annoying whiteboxes. From http://stackoverflow.com/questions/4042192/reduce-left-and-right-margins-in-matplotlib-plot
plt.savefig('someName.png', dpi=300, bbox_inches='tight')
# alternative is
plt.tight_layout()
In [51]:
import numpy.random as np
# get two random normal distributions
a_diff = np.randn(100)
b_diff = np.randn(100)
# Make histograms -
# alpha controls how dense the color is. 1 is maximum
# bins controls how many bins there are made
# label is for the legend
plt.hist(a_diff, alpha=0.5, bins=10, label='A difference')
plt.hist(b_diff, alpha=0.5, bins=10, label='B difference')
plt.legend()
Out[51]:
In [59]:
x = [0, 1, 2, 3, 4]
y = [10, 5, 17, 8, 9]
z = [0.5, 0.8, 0.1, 0.7, 0.1]
f, axarr = plt.subplots(2)
axarr[0].plot(x,y, 'k') # because I hate blue lines
axarr[0].set_title('Number of SNPs')
axarr[0].set_xlabel('Position on chromosome')
axarr[0].set_ylabel('Length in nucleotides')
axarr[1].plot(x,z, 'k')
axarr[1].set_title('Relative number of SNPs in blocks')
axarr[1].set_ylabel('Number of SNPs')
axarr[1].set_xlabel('Position on chromosome')
# kajiggle the following value if both plots are too close
f.subplots_adjust(hspace=0.8)
In [97]:
x = [1,2,3,4,5,6]
y = [10,12,15,17,19,23]
# First scipy
import scipy.stats
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)
line = [slope*i+intercept for i in x]
r_2 = r_value**2
f, axarr = plt.subplots(2) # make 2 subplots
axarr[0].plot(x, y, 'k.', x, line, 'k')
# let's just put the equation into the title, it's a bit ugly
axarr[0].set_title(r'$y={1:.2f}x + {2:.2f}$. $r^2={0:.2f}.$ '.format(r_value**2, slope, intercept))
# now with matplotlib
from matplotlib.pylab import polyfit, poly1d
fit = polyfit(x,y,1)
fit_fn = poly1d(fit)
# Next part from http://stackoverflow.com/questions/893657/how-do-i-calculate-r-squared-using-python-and-numpy
import numpy as np
yhat = fit_fn(x)
ybar = np.sum(y)/len(y)
ssreg = np.sum((yhat-ybar)**2)
sstot = np.sum((y - ybar)**2)
r_squared = ssreg / sstot # I prefer scipy
axarr[1].set_title(r'{0} $r^2={1:.3f}$'.format(fit_fn, r_squared))
axarr[1].plot(x,y, 'k.', x, fit_fn(x), 'k')
# r^2 is weirdly enough slightly different. Hm.
f.subplots_adjust(hspace=0.8)
In [45]:
import numpy.random as np
a_diff = np.randn(100)
b_diff = np.randn(100)
# There are several t-tests in scipy, one for related samples (that also means x and y have to be of same length),
# one for unrelated ones (same length for both not needed)
import scipy.stats as st
t, p_value = st.ttest_ind(a_diff, b_diff)
print('Independent t-test: t is {0:.2f}, p-value is {1:.2f}'.format(float(t), p_value))
t, p_value = st.ttest_rel(a_diff, b_diff)
print('Related t-test: t is {0:.2f}, p-value is {1:.2f}'.format(float(t), p_value))
# In this case, both values are most of the time reasonably similar
In [ ]:
In [ ]: