In [1]:

    
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pandas import Series,DataFrame
import seaborn as sns



In [2]:

    
#https://www.kaggle.com/residentmario/bivariate-plotting-with-pandas/data
reviews = pd.read_csv("winemag-data_first150k.csv", index_col=0)
reviews.head()









    Out[2]:







  
    
      
      country
      description
      designation
      points
      price
      province
      region_1
      region_2
      variety
      winery
    
  
  
    
      0
      US
      This tremendous 100% varietal wine hails from ...
      Martha's Vineyard
      96
      235.0
      California
      Napa Valley
      Napa
      Cabernet Sauvignon
      Heitz
    
    
      1
      Spain
      Ripe aromas of fig, blackberry and cassis are ...
      Carodorum Selección Especial Reserva
      96
      110.0
      Northern Spain
      Toro
      NaN
      Tinta de Toro
      Bodega Carmen Rodríguez
    
    
      2
      US
      Mac Watson honors the memory of a wine once ma...
      Special Selected Late Harvest
      96
      90.0
      California
      Knights Valley
      Sonoma
      Sauvignon Blanc
      Macauley
    
    
      3
      US
      This spent 20 months in 30% new French oak, an...
      Reserve
      96
      65.0
      Oregon
      Willamette Valley
      Willamette Valley
      Pinot Noir
      Ponzi
    
    
      4
      France
      This is the top wine from La Bégude, named aft...
      La Brûlade
      95
      66.0
      Provence
      Bandol
      NaN
      Provence red blend
      Domaine de la Bégude

Bivariate plotting with pandas

df.plot.scatter() df.plot.hex() df.plot.bar(stacked=True) df.plot.line()



In [3]:

    
# 价格和得分的关系 price  points  100个样本
reviews[reviews['price'] < 100].sample(100).plot.scatter(x='price', y='points')
plt.show()



In [4]:

    
reviews[reviews['price'] < 100].plot.scatter(x='price', y='points')
plt.show()



In [5]:

    
reviews[reviews['price'] < 100].plot.hexbin(x='price', y='points', gridsize=15)
plt.show()



In [39]:

    
count_data = reviews.groupby(['points']).mean()
#[['Williams Selyem', 'Testarossa', 'DFJ Vinhos', 'Chateau Ste. Michelle', 'Columbia Crest', 'Concha y Toro', 'Kendall-Jackson', 'Trapiche', 'Bouchard Père & Fils', 'Kenwood']]
count_data.plot.bar()
plt.show()



In [17]:

    
reviews.columns









    Out[17]:





Index(['country', 'description', 'designation', 'points', 'price', 'province',
       'region_1', 'region_2', 'variety', 'winery'],
      dtype='object')



In [36]:

    
cate = reviews['winery'].value_counts().head(10)
cate.plot.line()
plt.show()



In [27]:

    
count_data = reviews.groupby(['points']).mean()
#[cate]
count_data
#count_data.plot.bar(stacked=True)
#plt.show()



In [42]:

    
pokemon = pd.read_csv("Pokemon.csv", index_col=0)
pokemon.head()









    Out[42]:







  
    
      
      Name
      Type 1
      Type 2
      Total
      HP
      Attack
      Defense
      Sp. Atk
      Sp. Def
      Speed
      Generation
      Legendary
    
    
      #
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      Bulbasaur
      Grass
      Poison
      318
      45
      49
      49
      65
      65
      45
      1
      False
    
    
      2
      Ivysaur
      Grass
      Poison
      405
      60
      62
      63
      80
      80
      60
      1
      False
    
    
      3
      Venusaur
      Grass
      Poison
      525
      80
      82
      83
      100
      100
      80
      1
      False
    
    
      3
      VenusaurMega Venusaur
      Grass
      Poison
      625
      80
      100
      123
      122
      120
      80
      1
      False
    
    
      4
      Charmander
      Fire
      NaN
      309
      39
      52
      43
      60
      50
      65
      1
      False



In [43]:

    
pokemon_stats_legendary = pokemon.groupby(['Legendary', 'Generation']).mean()[['Attack', 'Defense']]
pokemon_stats_legendary.plot.bar(stacked=True)
plt.show()



In [44]:

    
pokemon_stats_by_generation = pokemon.groupby('Generation').mean()[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]
pokemon_stats_by_generation.plot.line()
plt.show()



In [ ]:

	price
points
80	17.224236
81	17.665078
82	19.171879
83	18.013604
84	19.367885
85	20.055067
86	21.816528
87	24.588279
88	27.831296
89	32.538375
90	37.357817
91	43.755835
92	52.303609
93	66.425438
94	81.937812
95	108.927012
96	132.350970
97	178.000000
98	232.131579
99	289.525000
100	401.583333

	country	description	designation	points	price	province	region_1	region_2	variety	winery
0	US	This tremendous 100% varietal wine hails from ...	Martha's Vineyard	96	235.0	California	Napa Valley	Napa	Cabernet Sauvignon	Heitz
1	Spain	Ripe aromas of fig, blackberry and cassis are ...	Carodorum Selección Especial Reserva	96	110.0	Northern Spain	Toro	NaN	Tinta de Toro	Bodega Carmen Rodríguez
2	US	Mac Watson honors the memory of a wine once ma...	Special Selected Late Harvest	96	90.0	California	Knights Valley	Sonoma	Sauvignon Blanc	Macauley
3	US	This spent 20 months in 30% new French oak, an...	Reserve	96	65.0	Oregon	Willamette Valley	Willamette Valley	Pinot Noir	Ponzi
4	France	This is the top wine from La Bégude, named aft...	La Brûlade	95	66.0	Provence	Bandol	NaN	Provence red blend	Domaine de la Bégude

	Name	Type 1	Type 2	Total	HP	Attack	Defense	Sp. Atk	Sp. Def	Speed	Generation	Legendary
#
1	Bulbasaur	Grass	Poison	318	45	49	49	65	65	45	1	False
2	Ivysaur	Grass	Poison	405	60	62	63	80	80	60	1	False
3	Venusaur	Grass	Poison	525	80	82	83	100	100	80	1	False
3	VenusaurMega Venusaur	Grass	Poison	625	80	100	123	122	120	80	1	False
4	Charmander	Fire	NaN	309	39	52	43	60	50	65	1	False