notebook.community

Edit and run



In [1]:

    
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline



In [2]:

    
def f(x): return np.pi*np.sin(x) + np.pi*np.cos(np.pi * x) + np.sin(x + np.pi)

x = np.linspace(0, 10, 100)# generate 100 points between 0 and 10
y = f(x)

plt.plot(x, y)









    Out[2]:





[<matplotlib.lines.Line2D at 0x10f3792b0>]



In [3]:

    
%config InlineBackend.figure_format = 'retina'
plt.plot(x, y)









    Out[3]:





[<matplotlib.lines.Line2D at 0x10f3c58d0>]



In [4]:

    
_ = plt.plot(x, y)



In [5]:

    
plt.plot(x, y)
plt.show()



In [6]:

    
plt.plot(x, y);

More advanced



In [7]:

    
z = y / 4.0

plt.figure(figsize=(12, 7))
plt.plot(x, y, label="high")
plt.plot(x, z, label="low")

plt.ylim(-6, 7)

plt.annotate(r'$\pi sin(x) + \pi cos(\pi x) + \pi sin(x + \pi )$',
            xy=(2, 5), xycoords='data',
            xytext=(+3, +5.5), textcoords='data', fontsize=15,
            arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=.2"),
            )

plt.plot([0, 10], [0, 0], color='grey', linestyle='-.')
plt.plot([2, 2],[0, 10], color='blue', linewidth=1, linestyle="--");



In [8]:

    
plt.figure(figsize=(12, 7))
plt.hist(np.random.randn(1000));



In [9]:

    
plt.figure(figsize=(12, 7))
plt.boxplot(np.random.randn(50));



In [10]:

    
import sklearn.datasets as d
iris = d.load_iris()

print(iris.DESCR)









    



Iris Plants Database
====================

Notes
-----
Data Set Characteristics:
    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
    :Summary Statistics:

    ============== ==== ==== ======= ===== ====================
                    Min  Max   Mean    SD   Class Correlation
    ============== ==== ==== ======= ===== ====================
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)
    ============== ==== ==== ======= ===== ====================

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :Date: July, 1988

This is a copy of UCI ML iris datasets.
http://archive.ics.uci.edu/ml/datasets/Iris

The famous Iris database, first used by Sir R.A Fisher

This is perhaps the best known database to be found in the
pattern recognition literature.  Fisher's paper is a classic in the field and
is referenced frequently to this day.  (See Duda & Hart, for example.)  The
data set contains 3 classes of 50 instances each, where each class refers to a
type of iris plant.  One class is linearly separable from the other 2; the
latter are NOT linearly separable from each other.

References
----------
   - Fisher,R.A. "The use of multiple measurements in taxonomic problems"
     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
     Mathematical Statistics" (John Wiley, NY, 1950).
   - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
     Structure and Classification Rule for Recognition in Partially Exposed
     Environments".  IEEE Transactions on Pattern Analysis and Machine
     Intelligence, Vol. PAMI-2, No. 1, 67-71.
   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions
     on Information Theory, May 1972, 431-433.
   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II
     conceptual clustering system finds 3 classes in the data.
   - Many, many more ...



In [11]:

    
plt.figure(figsize=(12, 7))
plt.boxplot(iris.data)

plt.xticks(range(1, len(iris.data[0]) + 1), ['sepal length', 'sepal width', 'petal length', 'petal width'] )
plt.ylim(-1, 9)









    Out[11]:





(-1, 9)



In [12]:

    
import seaborn as sns



In [13]:

    
sns.distplot(np.random.randn(1000));



In [14]:

    
crashes = sns.load_dataset('car_crashes')
crashes.head()









    Out[14]:







  
    
      
      total
      speeding
      alcohol
      not_distracted
      no_previous
      ins_premium
      ins_losses
      abbrev
    
  
  
    
      0
      18.8
      7.332
      5.640
      18.048
      15.040
      784.55
      145.08
      AL
    
    
      1
      18.1
      7.421
      4.525
      16.290
      17.014
      1053.48
      133.93
      AK
    
    
      2
      18.6
      6.510
      5.208
      15.624
      17.856
      899.47
      110.35
      AZ
    
    
      3
      22.4
      4.032
      5.824
      21.056
      21.280
      827.34
      142.39
      AR
    
    
      4
      12.0
      4.200
      3.360
      10.920
      10.680
      878.41
      165.63
      CA



In [15]:

    
sns.jointplot(x='alcohol', y='total', kind="reg", data=crashes);



In [16]:

    
iris = sns.load_dataset("iris")
iris.head()









    Out[16]:







  
    
      
      sepal_length
      sepal_width
      petal_length
      petal_width
      species
    
  
  
    
      0
      5.1
      3.5
      1.4
      0.2
      setosa
    
    
      1
      4.9
      3.0
      1.4
      0.2
      setosa
    
    
      2
      4.7
      3.2
      1.3
      0.2
      setosa
    
    
      3
      4.6
      3.1
      1.5
      0.2
      setosa
    
    
      4
      5.0
      3.6
      1.4
      0.2
      setosa



In [17]:

    
sns.FacetGrid(iris, hue="species", size=5) \
  .map(plt.scatter, "sepal_length", "sepal_width") \
  .add_legend();



In [18]:

    
sns.swarmplot(x="species", y="petal_length", data=iris);



In [19]:

    
import pandas as pd
melt_iris = pd.melt(iris, "species", var_name="measurement")
fig, ax = plt.subplots()
fig.set_size_inches(14, 7)
sns.boxplot(x="measurement", y="value", hue="species", data=melt_iris);



In [20]:

    
fig, ax = plt.subplots()
fig.set_size_inches(14, 10)
sns.swarmplot(x="measurement", y="value", hue="species", data=melt_iris);



In [ ]:

	total	speeding	alcohol	not_distracted	no_previous	ins_premium	ins_losses	abbrev
0	18.8	7.332	5.640	18.048	15.040	784.55	145.08	AL
1	18.1	7.421	4.525	16.290	17.014	1053.48	133.93	AK
2	18.6	6.510	5.208	15.624	17.856	899.47	110.35	AZ
3	22.4	4.032	5.824	21.056	21.280	827.34	142.39	AR
4	12.0	4.200	3.360	10.920	10.680	878.41	165.63	CA

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa