In [1]:
# _ _ _ _ ____ ____ ____ ____ ____ ____ ____ ____ ____ ____ _ ____ _ _
# | | |\ | |___ |__| |__/ |__/ |___ | __ |__/ |___ [__ [__ | | | |\ |
# |___ | | \| |___ | | | \ | \ |___ |__] | \ |___ ___] ___] | |__| | \|
# basic linear regression from http://scipy-central.org/item/16/2/basic-linear-regression
%matplotlib inline
import numpy as np
import pylab
from scipy import stats
# Fit the model
x = np.array([1, 2, 5, 7, 10, 15])
y = np.array([2, 6, 7, 9, 14, 19])
slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x, y)
# Calculate some additional outputs
predict_y = intercept + slope * x
pred_error = y - predict_y
degrees_of_freedom = len(x) - 2
residual_std_error = np.sqrt(np.sum(pred_error**2) / degrees_of_freedom)
# Plotting
pylab.plot(x, y, 'o')
pylab.plot(x, predict_y, 'k-')
pylab.show()
In [ ]:
x = """
____ _ _
/ ___| ___ ___ ___ _ __ __| | __ _ _ __ _ _ __ ___ _(_)___
\___ \ / _ \/ __/ _ \| '_ \ / _` |/ _` | '__| | | | / _` \ \/ / / __|
___) | __/ (_| (_) | | | | (_| | (_| | | | |_| | | (_| |> <| \__ \
|____/ \___|\___\___/|_| |_|\__,_|\__,_|_| \__, | \__,_/_/\_\_|___/
|___/ """
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0, 10, 0.1)
y1 = 0.05 * x**2
y2 = -1 *y1
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(x, y1, 'g-')
ax2.plot(x, y2, 'b-')
ax1.set_xlabel('X data')
ax1.set_ylabel('Y1 data', color='g')
ax2.set_ylabel('Y2 data', color='b')
plt.show()
In [7]:
# ____ _ _ ___ ___ _ ____ ___ ____ ____ _ ___
# [__ | | |__] |__] | | | | | __ |__/ | | \
# ___] |__| |__] | |___ |__| | |__] | \ | |__/
#matplotlib #subplots
import matplotlib.pyplot as plt
%matplotlib inline
xs = range(0,11)
ys = [x**1.3 for x in xs]
num_charts = 10
num_cols = 3
num_rows = num_charts / num_cols
if num_charts % num_cols != 0:
num_rows += 1
f, axarr = plt.subplots(num_rows, num_cols, figsize=(12, 12))
#f.axes.get_xaxis().set_visible(False)
c_row = 0
c_col = 0
for i in range(num_charts):
axarr[c_row, c_col].plot(xs, ys)
axarr[c_row, c_col].set_title('title')
axarr[c_row, c_col].set_ylim(0, max(ys))
axarr[c_row, c_col].get_xaxis().set_visible(False)
c_col = (c_col + 1) % num_cols
if c_col == 0:
c_row += 1
In [1]:
# _ _ _ _
# / \ _ __ (_)_ __ ___ __ _| |_(_) ___ _ __
# / _ \ | '_ \| | '_ ` _ \ / _` | __| |/ _ \| '_ \
# / ___ \| | | | | | | | | | (_| | |_| | (_) | | | |
# /_/ \_\_| |_|_|_| |_| |_|\__,_|\__|_|\___/|_| |_|
# animation
import matplotlib.pyplot as plt
import numpy as np
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy
# DRAW A FIGURE WITH MATPLOTLIB
duration = 2
fig_mpl, ax = plt.subplots(1,figsize=(5,3), facecolor='white')
xx = np.linspace(-2,2,200) # the x vector
zz = lambda d: np.sinc(xx**2)+np.sin(xx+d) # the (changing) z vector
ax.set_title("Elevation in y=0")
ax.set_ylim(-1.5,2.5)
line, = ax.plot(xx, zz(0), lw=3)
# ANIMATE WITH MOVIEPY (UPDATE THE CURVE FOR EACH t). MAKE A GIF.
def make_frame_mpl(t):
line.set_ydata( zz(2*np.pi*t/duration)) # <= Update the curve
return mplfig_to_npimage(fig_mpl) # RGB image of the figure
animation =mpy.VideoClip(make_frame_mpl, duration=duration)
animation.write_gif("sinc_mpl.gif", fps=20)
In [9]:
# ____ ____ _ ____ ____ ___ ____ _ ____ ___ ___ ____ ____
# | | | | | | |__/ |__] |__| | |___ | | |___ [__
# |___ |__| |___ |__| | \ | | | |___ |___ | | |___ ___]
#color #palette #editor
def drawpalette(palette):
"""Uses matplotlib to draw bars of colors corresponding to a list of hex values"""
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
%matplotlib inline
verts = [(0., 0.), (0., 1.), (1., 1.),
(1., 0.),(0., 0.),]
codes = [Path.MOVETO, Path.LINETO, Path.LINETO,
Path.LINETO, Path.CLOSEPOLY,]
path = Path(verts, codes)
fig = plt.figure(figsize=(15,2))
for i in range(len(palette)):
ax = fig.add_subplot(1, len(palette), i + 1)
patch = patches.PathPatch(path, facecolor=palette[i], lw=1)
ax.add_patch(patch)
ax.set_xlim(0,1)
ax.set_ylim(0,1)
plt.axis('off')
plt.show()
drawpalette(['#4b3735', '#62483c', '#7a5a45', '#906b4f', '#a57c57', '#b79377', '#c7ac95', '#d6c5b5', '#e8ded3'])
In [ ]:
# _ _ _ _ _ _ _ ____ _ ____ ___ ____ _ _ ____ ____ ___ ____ _ _ _ _ ____ ___ _ ____ _ _
# |\/| \_/ | | | |___ | |__/ | \ | |__| |__| |__/ | |___ | | |\ | | | | | | |\ |
# | | | |_|_| |___ | | \ |__/ |___ | | | | | \ | | |__| | \| |___ | | |__| | \|
#matplotlib #chart
# used to round limit of y axis up to second-most-significant digit
def determine_y_limit(x):
significance = int(floor((log10(x))))
val = floor(x / (10 ** (significance - 1))) + 1
val = val * (10 ** (significance - 1))
return val
def make_chart(df=df, words = ["redwood", "rollo", "ivan", "eisenhower", "epa"], form = 'line', title='', colors= [], smoothing=0, \
baseline='sym', png_name=''):
dataframe = df[df['word'].isin(words)]
dataframe = pd.DataFrame(pd.pivot_table(dataframe, values='pct', index = 'year', columns=['word']))
dataframe.sort(inplace=True, ascending=True)
startyear = min(list(dataframe.index))
endyear = max(list(dataframe.index))
yearstr = '%d-%d' % (startyear, endyear)
legend_size = 0.01
max_y = 0
for word in dataframe.columns:
max_y = max(max_y, dataframe[word].max())
final_word = word
if smoothing > 0:
newvalues = []
for row in range(len(dataframe)):
start = max(0, row - smoothing)
end = min(len(dataframe) - 1, row + smoothing)
newvalues.append(dataframe[word].iloc[start:end].mean())
for row in range(len(dataframe)):
dataframe[word].iloc[row] = newvalues[row]
y_text = "% of words in corpus"
num_series = len(dataframe.columns)
if colors == []:
colors = ["#1f78b4","#ae4ec9","#33a02c","#fb9a99","#e31a1c","#a6cee3",
"#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#b15928"]
num_colors = len(colors)
if num_series > num_colors:
print "Warning: colors will be repeated."
x_values = list(dataframe.index)
y_zeroes = [0] * len(x_values)
if form == 'line':
fig, ax = plt.subplots(num=None, figsize=(16, 9), dpi=300, facecolor='w', edgecolor='w')
counter = 0
for word in dataframe.words:
color = colors[counter % num_colors]
counter += 1
label = word
ax.plot(x_values, dataframe[word], label=label, color=color, linewidth = 3)
ax.set_ylim(0,determine_y_limit(max_y))
ax.set_xlim(startyear, endyear)
ax.set_ylabel(y_text, size = 13)
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * legend_size,
box.width, box.height * (1 - legend_size)])
legend_cols = min(5, num_series)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=legend_cols)
if form == 'subplots_auto':
counter = 0
fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
print 'Maximum alpha: %d percent' % (determine_y_limit(max_y))
for word in dataframe.columns:
label = word
current_ymax = dataframe[word].max()
tint = 1.0 * current_ymax / determine_y_limit(max_y)
axes[counter].plot(x_values, dataframe[word], color='k')
axes[counter].set_ylim(0,determine_y_limit(current_ymax))
axes[counter].set_xlim(startyear, endyear)
axes[counter].fill_between(x_values, dataframe[word], color=colors[0], alpha=tint, interpolate=True)
axes[counter].set_ylabel(label, size=11)
plt.subplots_adjust(hspace=0.1)
counter += 1
if form == 'subplots_same':
counter = 0
fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
print 'Maximum y axis: %d percent' % (determine_y_limit(max_y))
for word in dataframe.columns:
label = word
axes[counter].plot(x_values, dataframe[word], color='k')
axes[counter].set_ylim(0,determine_y_limit(max_y))
axes[counter].set_xlim(startyear, endyear)
axes[counter].fill_between(x_values, dataframe[word], color=colors[1], alpha=1, interpolate=True)
axes[counter].set_ylabel(label, size=11)
plt.subplots_adjust(hspace=0.1)
counter += 1
if form == 'stream':
figure(num=None, figsize=(20,10), dpi=150, facecolor='w', edgecolor='k')
plt.title(title, size=17)
plt.xlim(startyear, endyear)
yaxtext = 'Percent of words in corpus'
scale = str(determine_y_limit(max_y)) + ')'
yaxtext += scale
plt.ylabel(yaxtext, size=13)
polys = pyplot.stackplot(x_values, *[dataframe[word] for word in dataframe.columns],
colors=colors, baseline=baseline)
legendProxies = []
for poly in polys:
legendProxies.append(pyplot.Rectangle((0, 0), 1, 1, fc=poly.get_facecolor()[0]))
wordlist = []
for word in dataframe.columns:
wordlist.append(word)
plt.legend(legendProxies, wordlist, loc=3, ncol=2)
plt.tick_params(\
axis='y',
which='both', # major and minor ticks
left='off',
right='off',
labelleft='off')
plt.show()
if png_name != '':
fileword = save_path + "/" + png_name + ".png"
plt.savefig(fileword)
plt.close()
In [16]:
# ____ _ _ _ ___ _ ____ ___ _ _ ___ _ ____ ___
# [__ | |\/| |__] | |___ |__] \_/ |__] | | | |
# ___] | | | | |___ |___ | | | |___ |__| |
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#data
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [3, 2, 5, 7, 4, 2, 4, 6, 7, 8]
ysize = [10, 20, 4, 15, 9, 9, 14, 8, 4, 9]
for i in range (0, len(x)):
plt.plot(x[i], y[i], linestyle="None", marker="o", markersize=ysize[i], color="red")
plt.plot(x, y, linestyle="dotted", color="red")
plt.xlim(np.min(x)-1.3, np.max(x)+1.3) #optional
plt.ylim(np.min(y)-1.3, np.max(y)+1.3) #optional
plt.xlabel("y")
plt.ylabel("x")
plt.show()
In [7]:
# simple pylab
import matplotlib.pyplot as plt
%matplotlib inline
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [3, 2, 5, 7, 4, 2, 4, 6, 7, 8]
fig, ax = plt.subplots()
ax.scatter(x,y,s=50,c='b',marker='o')
plt.plot([0,5], [0,5], linestyle='-', marker='None', color='r', markersize=0)
ax.set_xlim(-1, 11)
ax.set_ylim(-1, 11)
t = ax.text(2, 4, "Direction", ha="center", va="center", rotation=45,
size=15)
plt.show()
In [17]:
# ____ ___ _ _ _ _ ____ ____ _ _ ____ ____ ___ _ _ _ _ _ ____
# [__ |__] | | |\ | |___ [__ |\/| | | | | | |__| | |\ | | __
# ___] | |___ | | \| |___ ___] | | |__| |__| | | | | | \| |__]
#spline #smoothing
import matplotlib.pyplot as plt
import numpy as np
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])
plt.plot(T,power)
plt.show()
from scipy.interpolate import spline
xnew = np.linspace(T.min(),T.max(),300)
power_smooth = spline(T,power,xnew)
plt.plot(xnew,power_smooth)
plt.show()
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
%matplotlib inline
x = np.linspace(0,2*np.pi,100)
y = np.sin(x) + np.random.random(100) * 0.2
lowess = sm.nonparametric.lowess(y, x, frac=0.1)
plt.plot(x, y, '+')
plt.plot(lowess[:, 0], lowess[:, 1])
plt.show()
In [6]:
from scipy.interpolate import interp1d
import numpy as np
x = np.linspace(0, 10, 10)
y = np.cos(-x**2/8.0)
f = interp1d(x, y)
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(0, 10, 40)
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(x,y,'o',xnew,f(xnew),'-', xnew, f2(xnew),'--')
plt.legend(['data', 'linear', 'cubic'], loc='best')
plt.show()
In [ ]:
def moving_average(a, n=3) :
ret = np.cumsum(a, dtype=float)
ret[n:] = ret[n:] - ret[:-n]
return ret[n - 1:] / n
In [5]:
lowess
Out[5]:
In [1]:
# ____ ____ ____ ___ ___ ____ ____ ___ _ ____ ___ _ _ ____ ___ ____ _ _ _
# [__ | |__| | | |___ |__/ |__] | | | | |\/| |__| | |__/ | \/
# ___] |___ | | | | |___ | \ | |___ |__| | | | | | | | \ | _/\_
_ = pd.scatter_matrix(baseball.loc[:,'r':'sb'], figsize=(12,8), diagonal='kde')
# # with kde
# import numpy as np
# import matplotlib.pyplot as plt
# import pandas
# iris = pandas.read_csv("snippet_files/iris.csv")
# df = pandas.DataFrame(iris, columns=['slength', 'swidth', 'plength', 'pwidth'])
# pandas.tools.plotting.scatter_matrix(df, alpha=0.2, diagonal='kde') #hist
# plt.show()
In [7]:
# heat map
import matplotlib.pyplot as plt
%matplotlib inline
data = [[0, 0.25], [0.5, 0.75]]
fig, ax = plt.subplots()
im = ax.imshow(data, cmap=plt.get_cmap('hot'), interpolation='nearest',
vmin=0, vmax=1)
fig.colorbar(im)
plt.show()
In [ ]: