In [1]:
# _    _ _  _ ____ ____ ____    ____ ____ ____ ____ ____ ____ ____ _ ____ _  _ 
# |    | |\ | |___ |__| |__/    |__/ |___ | __ |__/ |___ [__  [__  | |  | |\ | 
# |___ | | \| |___ |  | |  \    |  \ |___ |__] |  \ |___ ___] ___] | |__| | \|

# basic linear regression from http://scipy-central.org/item/16/2/basic-linear-regression

%matplotlib inline

import numpy as np
import pylab
from scipy import stats

# Fit the model
x = np.array([1, 2, 5, 7, 10, 15])
y = np.array([2, 6, 7, 9, 14, 19])
slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x, y)

# Calculate some additional outputs
predict_y = intercept + slope * x
pred_error = y - predict_y
degrees_of_freedom = len(x) - 2
residual_std_error = np.sqrt(np.sum(pred_error**2) / degrees_of_freedom)

# Plotting
pylab.plot(x, y, 'o')
pylab.plot(x, predict_y, 'k-')
pylab.show()



In [ ]:
x = """
  ____                           _                               _     
 / ___|  ___  ___ ___  _ __   __| | __ _ _ __ _   _    __ ___  _(_)___ 
 \___ \ / _ \/ __/ _ \| '_ \ / _` |/ _` | '__| | | |  / _` \ \/ / / __|
  ___) |  __/ (_| (_) | | | | (_| | (_| | |  | |_| | | (_| |>  <| \__ \
 |____/ \___|\___\___/|_| |_|\__,_|\__,_|_|   \__, |  \__,_/_/\_\_|___/
                                              |___/                      """      

import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0, 10, 0.1)
y1 = 0.05 * x**2
y2 = -1 *y1

fig, ax1 = plt.subplots()

ax2 = ax1.twinx()
ax1.plot(x, y1, 'g-')
ax2.plot(x, y2, 'b-')

ax1.set_xlabel('X data')
ax1.set_ylabel('Y1 data', color='g')
ax2.set_ylabel('Y2 data', color='b')

plt.show()

In [7]:
# ____ _  _ ___  ___  _    ____ ___    ____ ____ _ ___  
# [__  |  | |__] |__] |    |  |  |     | __ |__/ | |  \ 
# ___] |__| |__] |    |___ |__|  |     |__] |  \ | |__/

#matplotlib #subplots
import matplotlib.pyplot as plt
%matplotlib inline

xs = range(0,11)
ys = [x**1.3 for x in xs]

num_charts = 10
num_cols = 3
num_rows = num_charts / num_cols
if num_charts % num_cols != 0:
    num_rows += 1
    
f, axarr = plt.subplots(num_rows, num_cols, figsize=(12, 12))
#f.axes.get_xaxis().set_visible(False)
c_row = 0 
c_col = 0
for i in range(num_charts):
    axarr[c_row, c_col].plot(xs, ys)
    axarr[c_row, c_col].set_title('title')
    axarr[c_row, c_col].set_ylim(0, max(ys))
    axarr[c_row, c_col].get_xaxis().set_visible(False)
    c_col = (c_col + 1) % num_cols
    if c_col == 0:
        c_row += 1



In [1]:
#      _          _                 _   _             
#     / \   _ __ (_)_ __ ___   __ _| |_(_) ___  _ __  
#    / _ \ | '_ \| | '_ ` _ \ / _` | __| |/ _ \| '_ \ 
#   / ___ \| | | | | | | | | | (_| | |_| | (_) | | | |
#  /_/   \_\_| |_|_|_| |_| |_|\__,_|\__|_|\___/|_| |_|

# animation

import matplotlib.pyplot as plt
import numpy as np
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy

# DRAW A FIGURE WITH MATPLOTLIB

duration = 2

fig_mpl, ax = plt.subplots(1,figsize=(5,3), facecolor='white')
xx = np.linspace(-2,2,200) # the x vector
zz = lambda d: np.sinc(xx**2)+np.sin(xx+d) # the (changing) z vector
ax.set_title("Elevation in y=0")
ax.set_ylim(-1.5,2.5)
line, = ax.plot(xx, zz(0), lw=3)

# ANIMATE WITH MOVIEPY (UPDATE THE CURVE FOR EACH t). MAKE A GIF.

def make_frame_mpl(t):
    line.set_ydata( zz(2*np.pi*t/duration))  # <= Update the curve
    return mplfig_to_npimage(fig_mpl) # RGB image of the figure

animation =mpy.VideoClip(make_frame_mpl, duration=duration)
animation.write_gif("sinc_mpl.gif", fps=20)


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-fafcc567c4e5> in <module>()
      9 import matplotlib.pyplot as plt
     10 import numpy as np
---> 11 from moviepy.video.io.bindings import mplfig_to_npimage
     12 import moviepy.editor as mpy
     13 

ImportError: No module named 'moviepy'

In [9]:
# ____ ____ _    ____ ____    ___  ____ _    ____ ___ ___ ____ ____ 
# |    |  | |    |  | |__/    |__] |__| |    |___  |   |  |___ [__  
# |___ |__| |___ |__| |  \    |    |  | |___ |___  |   |  |___ ___]


#color #palette #editor

def drawpalette(palette):
    """Uses matplotlib to draw bars of colors corresponding to a list of hex values"""
    import matplotlib.pyplot as plt
    from matplotlib.path import Path
    import matplotlib.patches as patches
    %matplotlib inline
    verts = [(0., 0.), (0., 1.), (1., 1.),
             (1., 0.),(0., 0.),]
    codes = [Path.MOVETO, Path.LINETO, Path.LINETO,
             Path.LINETO, Path.CLOSEPOLY,]
    path = Path(verts, codes)
    fig = plt.figure(figsize=(15,2))
    for i in range(len(palette)):
        ax = fig.add_subplot(1, len(palette), i + 1)
        patch = patches.PathPatch(path, facecolor=palette[i], lw=1)
        ax.add_patch(patch)
        ax.set_xlim(0,1)
        ax.set_ylim(0,1)
        plt.axis('off')
    plt.show()
    
drawpalette(['#4b3735', '#62483c', '#7a5a45', '#906b4f', '#a57c57', '#b79377', '#c7ac95', '#d6c5b5', '#e8ded3'])



In [ ]:
# _  _ _   _    _ _ _ ____ _ ____ ___     ____ _  _ ____ ____ ___    ____ _  _ _  _ ____ ___ _ ____ _  _ 
# |\/|  \_/     | | | |___ | |__/ |  \    |    |__| |__| |__/  |     |___ |  | |\ | |     |  | |  | |\ | 
# |  |   |      |_|_| |___ | |  \ |__/    |___ |  | |  | |  \  |     |    |__| | \| |___  |  | |__| | \|

#matplotlib #chart

# used to round limit of y axis up to second-most-significant digit
def determine_y_limit(x):                      
    significance = int(floor((log10(x))))
    val = floor(x / (10 ** (significance - 1))) + 1
    val = val * (10 ** (significance - 1))
    return val

def make_chart(df=df, words = ["redwood", "rollo", "ivan", "eisenhower", "epa"], form = 'line', title='', colors= [], smoothing=0, \
               baseline='sym', png_name=''):
    
    dataframe = df[df['word'].isin(words)]   
    dataframe = pd.DataFrame(pd.pivot_table(dataframe, values='pct', index = 'year', columns=['word']))
    dataframe.sort(inplace=True, ascending=True)
    
    startyear = min(list(dataframe.index))
    endyear = max(list(dataframe.index))
    yearstr = '%d-%d' % (startyear, endyear)
    
    legend_size = 0.01
    
    max_y = 0
    for word in dataframe.columns:
        max_y = max(max_y, dataframe[word].max())
        final_word = word
        if smoothing > 0:
            newvalues = []
            for row in range(len(dataframe)):
                start = max(0, row - smoothing)
                end = min(len(dataframe) - 1, row + smoothing)
                newvalues.append(dataframe[word].iloc[start:end].mean())
            for row in range(len(dataframe)):
                dataframe[word].iloc[row] = newvalues[row]
    
    y_text = "% of words in corpus"
    
    num_series = len(dataframe.columns)
    
    if colors == []:
        colors = ["#1f78b4","#ae4ec9","#33a02c","#fb9a99","#e31a1c","#a6cee3",
                  "#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#b15928"]
    num_colors = len(colors)
    
    if num_series > num_colors:
        print "Warning: colors will be repeated."
    
    x_values = list(dataframe.index)
    y_zeroes = [0] * len(x_values)
    
    if form == 'line':
        fig, ax = plt.subplots(num=None, figsize=(16, 9), dpi=300, facecolor='w', edgecolor='w')
        counter = 0
        for word in dataframe.words:
            color = colors[counter % num_colors]
            counter += 1
            label = word
            ax.plot(x_values, dataframe[word], label=label, color=color, linewidth = 3)
        ax.set_ylim(0,determine_y_limit(max_y)) 
        ax.set_xlim(startyear, endyear)
        ax.set_ylabel(y_text, size = 13)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * legend_size,
                 box.width, box.height * (1 - legend_size)])
        legend_cols = min(5, num_series)
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=legend_cols)

    if form == 'subplots_auto':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum alpha: %d percent' % (determine_y_limit(max_y))
        for word in dataframe.columns:
            label = word
            current_ymax = dataframe[word].max()
            tint = 1.0 * current_ymax / determine_y_limit(max_y)
            axes[counter].plot(x_values, dataframe[word], color='k')
            axes[counter].set_ylim(0,determine_y_limit(current_ymax))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[word], color=colors[0], alpha=tint, interpolate=True)

            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
            
    if form == 'subplots_same':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum y axis: %d percent' % (determine_y_limit(max_y))
        for word in dataframe.columns:
            label = word
            axes[counter].plot(x_values, dataframe[word], color='k')
            axes[counter].set_ylim(0,determine_y_limit(max_y))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[word], color=colors[1], alpha=1, interpolate=True)
            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
        
    if form == 'stream':
        figure(num=None, figsize=(20,10), dpi=150, facecolor='w', edgecolor='k')
        plt.title(title, size=17)        
        plt.xlim(startyear, endyear)
        
        yaxtext = 'Percent of words in corpus'
        
        scale = str(determine_y_limit(max_y)) + ')'
        yaxtext += scale
        plt.ylabel(yaxtext, size=13)
        polys = pyplot.stackplot(x_values, *[dataframe[word] for word in dataframe.columns], 
                                 colors=colors, baseline=baseline)
        legendProxies = []
        for poly in polys:
            legendProxies.append(pyplot.Rectangle((0, 0), 1, 1, fc=poly.get_facecolor()[0]))
        wordlist = []
        for word in dataframe.columns:
            wordlist.append(word)
        plt.legend(legendProxies, wordlist, loc=3, ncol=2)
        
        plt.tick_params(\
            axis='y',          
            which='both',      #  major and minor ticks 
            left='off',      
            right='off',       
            labelleft='off')
        
    plt.show()   
    if png_name != '':
        fileword = save_path + "/" + png_name + ".png"
        plt.savefig(fileword)
    plt.close()

In [16]:
# ____ _ _  _ ___  _    ____    ___  _   _ ___  _    ____ ___ 
# [__  | |\/| |__] |    |___    |__]  \_/  |__] |    |  |  |  
# ___] | |  | |    |___ |___    |      |   |    |___ |__|  | 

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

#data
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [3, 2, 5, 7, 4, 2, 4, 6, 7, 8]
ysize = [10, 20, 4, 15, 9, 9, 14, 8, 4, 9]
    

for i in range (0, len(x)):
    plt.plot(x[i], y[i], linestyle="None", marker="o", markersize=ysize[i], color="red")
    
plt.plot(x, y, linestyle="dotted", color="red")


plt.xlim(np.min(x)-1.3, np.max(x)+1.3) #optional 
plt.ylim(np.min(y)-1.3, np.max(y)+1.3) #optional 

plt.xlabel("y")
plt.ylabel("x")

plt.show()



In [7]:
# simple pylab
import matplotlib.pyplot as plt
%matplotlib inline

x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [3, 2, 5, 7, 4, 2, 4, 6, 7, 8]

fig, ax = plt.subplots()
ax.scatter(x,y,s=50,c='b',marker='o')
plt.plot([0,5], [0,5], linestyle='-', marker='None', color='r', markersize=0)
ax.set_xlim(-1, 11)
ax.set_ylim(-1, 11)
t = ax.text(2, 4, "Direction", ha="center", va="center", rotation=45,
            size=15)
plt.show()



In [17]:
# ____ ___  _    _ _  _ ____    ____ _  _ ____ ____ ___ _  _ _ _  _ ____ 
# [__  |__] |    | |\ | |___    [__  |\/| |  | |  |  |  |__| | |\ | | __ 
# ___] |    |___ | | \| |___    ___] |  | |__| |__|  |  |  | | | \| |__] 

#spline #smoothing

import matplotlib.pyplot as plt
import numpy as np

T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])

plt.plot(T,power)
plt.show()

from scipy.interpolate import spline

xnew = np.linspace(T.min(),T.max(),300)

power_smooth = spline(T,power,xnew)

plt.plot(xnew,power_smooth)
plt.show()



In [2]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
%matplotlib inline

x = np.linspace(0,2*np.pi,100)
y = np.sin(x) + np.random.random(100) * 0.2
lowess = sm.nonparametric.lowess(y, x, frac=0.1)

plt.plot(x, y, '+')
plt.plot(lowess[:, 0], lowess[:, 1])
plt.show()



In [6]:
from scipy.interpolate import interp1d
import numpy as np

x = np.linspace(0, 10, 10)
y = np.cos(-x**2/8.0)
f = interp1d(x, y)
f2 = interp1d(x, y, kind='cubic')

xnew = np.linspace(0, 10, 40)
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(x,y,'o',xnew,f(xnew),'-', xnew, f2(xnew),'--')
plt.legend(['data', 'linear', 'cubic'], loc='best')
plt.show()



In [ ]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

In [5]:
lowess


Out[5]:
array([[ 0.        ,  0.11136982],
       [ 0.06346652,  0.15674923],
       [ 0.12693304,  0.20276234],
       [ 0.19039955,  0.24980073],
       [ 0.25386607,  0.29790807],
       [ 0.31733259,  0.35632601],
       [ 0.38079911,  0.41661145],
       [ 0.44426563,  0.47836361],
       [ 0.50773215,  0.53882443],
       [ 0.57119866,  0.60097874],
       [ 0.63466518,  0.66580877],
       [ 0.6981317 ,  0.73045537],
       [ 0.76159822,  0.79248522],
       [ 0.82506474,  0.85079303],
       [ 0.88853126,  0.89896225],
       [ 0.95199777,  0.93336762],
       [ 1.01546429,  0.95729262],
       [ 1.07893081,  0.97752432],
       [ 1.14239733,  0.99622279],
       [ 1.20586385,  1.01375403],
       [ 1.26933037,  1.03065077],
       [ 1.33279688,  1.05092343],
       [ 1.3962634 ,  1.07142036],
       [ 1.45972992,  1.08355079],
       [ 1.52319644,  1.08481524],
       [ 1.58666296,  1.08262689],
       [ 1.65012947,  1.07995384],
       [ 1.71359599,  1.07232683],
       [ 1.77706251,  1.05427129],
       [ 1.84052903,  1.02958343],
       [ 1.90399555,  1.01046488],
       [ 1.96746207,  0.9975224 ],
       [ 2.03092858,  0.97948783],
       [ 2.0943951 ,  0.95216827],
       [ 2.15786162,  0.92412807],
       [ 2.22132814,  0.89407259],
       [ 2.28479466,  0.85696766],
       [ 2.34826118,  0.81169775],
       [ 2.41172769,  0.76834335],
       [ 2.47519421,  0.73142865],
       [ 2.53866073,  0.69630743],
       [ 2.60212725,  0.65696294],
       [ 2.66559377,  0.61167825],
       [ 2.72906028,  0.55606138],
       [ 2.7925268 ,  0.49217923],
       [ 2.85599332,  0.42251955],
       [ 2.91945984,  0.35355123],
       [ 2.98292636,  0.28477854],
       [ 3.04639288,  0.22105578],
       [ 3.10985939,  0.16180097],
       [ 3.17332591,  0.10411779],
       [ 3.23679243,  0.04409547],
       [ 3.30025895, -0.01828767],
       [ 3.36372547, -0.08353111],
       [ 3.42719199, -0.14672258],
       [ 3.4906585 , -0.21053372],
       [ 3.55412502, -0.27516486],
       [ 3.61759154, -0.34289721],
       [ 3.68105806, -0.40986852],
       [ 3.74452458, -0.46761184],
       [ 3.8079911 , -0.51707645],
       [ 3.87145761, -0.5623037 ],
       [ 3.93492413, -0.60706059],
       [ 3.99839065, -0.64997933],
       [ 4.06185717, -0.68736053],
       [ 4.12532369, -0.72573679],
       [ 4.1887902 , -0.76119122],
       [ 4.25225672, -0.78865043],
       [ 4.31572324, -0.80657068],
       [ 4.37918976, -0.81761061],
       [ 4.44265628, -0.83149749],
       [ 4.5061228 , -0.85156996],
       [ 4.56958931, -0.87342365],
       [ 4.63305583, -0.89390294],
       [ 4.69652235, -0.90362921],
       [ 4.75998887, -0.89971118],
       [ 4.82345539, -0.8852009 ],
       [ 4.88692191, -0.86776554],
       [ 4.95038842, -0.85296967],
       [ 5.01385494, -0.83815383],
       [ 5.07732146, -0.82294787],
       [ 5.14078798, -0.80393639],
       [ 5.2042545 , -0.77575389],
       [ 5.26772102, -0.73502516],
       [ 5.33118753, -0.68877407],
       [ 5.39465405, -0.64526952],
       [ 5.45812057, -0.60113028],
       [ 5.52158709, -0.55764202],
       [ 5.58505361, -0.52295476],
       [ 5.64852012, -0.48872227],
       [ 5.71198664, -0.44433545],
       [ 5.77545316, -0.39294133],
       [ 5.83891968, -0.34148402],
       [ 5.9023862 , -0.28957094],
       [ 5.96585272, -0.2237171 ],
       [ 6.02931923, -0.14700994],
       [ 6.09278575, -0.06843922],
       [ 6.15625227,  0.00953667],
       [ 6.21971879,  0.08913602],
       [ 6.28318531,  0.16996986]])

In [1]:
# ____ ____ ____ ___ ___ ____ ____ ___  _    ____ ___    _  _ ____ ___ ____ _ _  _ 
# [__  |    |__|  |   |  |___ |__/ |__] |    |  |  |     |\/| |__|  |  |__/ |  \/  
# ___] |___ |  |  |   |  |___ |  \ |    |___ |__|  |     |  | |  |  |  |  \ | _/\_

_ = pd.scatter_matrix(baseball.loc[:,'r':'sb'], figsize=(12,8), diagonal='kde')

# # with kde
# import numpy as np
# import matplotlib.pyplot as plt
# import pandas
# iris = pandas.read_csv("snippet_files/iris.csv")
# df = pandas.DataFrame(iris, columns=['slength', 'swidth', 'plength', 'pwidth'])
# pandas.tools.plotting.scatter_matrix(df, alpha=0.2, diagonal='kde') #hist
# plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-8f24101294a7> in <module>()
      3 # ___] |___ |  |  |   |  |___ |  \ |    |___ |__|  |     |  | |  |  |  |  \ | _/\_
      4 
----> 5 _ = pd.scatter_matrix(baseball.loc[:,'r':'sb'], figsize=(12,8), diagonal='kde')
      6 
      7 # # with kde

NameError: name 'pd' is not defined

In [7]:
# heat map

import matplotlib.pyplot as plt
%matplotlib inline
data = [[0, 0.25], [0.5, 0.75]]

fig, ax = plt.subplots()
im = ax.imshow(data, cmap=plt.get_cmap('hot'), interpolation='nearest',
               vmin=0, vmax=1)
fig.colorbar(im)
plt.show()



In [ ]: