notebook.community

Edit and run



In [ ]:

    
This notebook is based on the "Intro to pandas data structures" by Greg Reda(http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures//)



In [6]:

    
# import and configure the required modules.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
%matplotlib inline



In [9]:

    
# Series
s = pd.Series([8, 'This is a string', 3.14, -1.423423423423, "Another string!"])
s









    Out[9]:





0                   8
1    This is a string
2                3.14
3            -1.42342
4     Another string!
dtype: object



In [10]:

    
d = {'New Delhi': 11, 'Bombay': 22, 'Kolkata': 33, 'Chennai': 44, 'Bangalore': 80}
cities = pd.Series(d)
cities









    Out[10]:





Bangalore    80
Bombay       22
Chennai      44
Kolkata      33
New Delhi    11
dtype: int64



In [11]:

    
cities['Bangalore']









    Out[11]:





80



In [13]:

    
cities[['Bangalore', 'Kolkata', 'Bombay']]









    Out[13]:





Bangalore    80
Kolkata      33
Bombay       22
dtype: int64



In [14]:

    
cities < 20









    Out[14]:





Bangalore    False
Bombay       False
Chennai      False
Kolkata      False
New Delhi     True
dtype: bool



In [15]:

    
cities > 20









    Out[15]:





Bangalore     True
Bombay        True
Chennai       True
Kolkata       True
New Delhi    False
dtype: bool



In [16]:

    
cities[cities > 20]









    Out[16]:





Bangalore    80
Bombay       22
Chennai      44
Kolkata      33
dtype: int64



In [18]:

    
print 'old value:', cities['Bangalore']
cities['Bangalore'] = 90
print 'New value:', cities['Bangalore']









    



old value: 80
New value: 90



In [19]:

    
cities









    Out[19]:





Bangalore    90
Bombay       22
Chennai      44
Kolkata      33
New Delhi    11
dtype: int64



In [20]:

    
print(cities[cities < 90])
print('\n')
cities[cities < 90] = 750

print cities[cities < 90]









    



Bombay       22
Chennai      44
Kolkata      33
New Delhi    11
dtype: int64


Series([], dtype: int64)



In [21]:

    
cities









    Out[21]:





Bangalore     90
Bombay       750
Chennai      750
Kolkata      750
New Delhi    750
dtype: int64



In [22]:

    
print 'Bangalore' in cities
print 'Mysore' in cities









    



True
False



In [23]:

    
cities / 3









    Out[23]:





Bangalore     30.0
Bombay       250.0
Chennai      250.0
Kolkata      250.0
New Delhi    250.0
dtype: float64



In [24]:

    
np.square(cities)









    Out[24]:





Bangalore      8100
Bombay       562500
Chennai      562500
Kolkata      562500
New Delhi    562500
dtype: int64



In [26]:

    
cities[['New Delhi', 'Bombay', 'Mangalore']]









    Out[26]:





New Delhi    750.0
Bombay       750.0
Mangalore      NaN
dtype: float64



In [28]:

    
cities[['New Delhi', 'Bombay', 'Mangalore']] + cities[['Bangalore', 'Mysore']]









    Out[28]:





Bangalore   NaN
Bombay      NaN
Mangalore   NaN
Mysore      NaN
New Delhi   NaN
dtype: float64



In [29]:

    
cities.notnull()









    Out[29]:





Bangalore    True
Bombay       True
Chennai      True
Kolkata      True
New Delhi    True
dtype: bool



In [30]:

    
cities.isnull()









    Out[30]:





Bangalore    False
Bombay       False
Chennai      False
Kolkata      False
New Delhi    False
dtype: bool



In [32]:

    
data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
       'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
       'wins': [11, 8, 10, 15, 11, 6, 10, 4],
       'losses': [5, 8, 6, 1, 5, 10, 6, 12]}



In [33]:

    
football = pd.DataFrame(data, columns=['year', 'team', 'wins', 'losses'])
football



In [38]:

    
!head -n 5 data.csv









    



Year,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,WHIP,H/9,HR/9,BB/9,SO/9,SO/BB,Awards
1995,25,NYY,AL,5,3,.625,5.51,19,10,2,0,0,0,67.0,71,43,41,11,30,0,51,2,1,0,301,84,1.507,9.5,1.5,4.0,6.9,1.70,
1996,26,NYY,AL,8,3,.727,2.09,61,0,14,0,0,5,107.2,73,25,25,1,34,3,130,2,0,1,425,240,0.994,6.1,0.1,2.8,10.9,3.82,CYA-3MVP-12
1997,27,NYY,AL,6,4,.600,1.88,66,0,56,0,0,43,71.2,65,17,15,5,20,6,68,0,0,2,301,239,1.186,8.2,0.6,2.5,8.5,3.40,ASMVP-25
1998,28,NYY,AL,3,0,1.000,1.91,54,0,49,0,0,36,61.1,48,13,13,3,17,1,36,1,0,0,246,233,1.060,7.0,0.4,2.5,5.3,2.12,



In [41]:

    
from_csv = pd.read_csv("data.csv")
from_csv.head()









    Out[41]:






  
    
      
      Year
      Age
      Tm
      Lg
      W
      L
      W-L%
      ERA
      G
      GS
      GF
      CG
      SHO
      SV
      IP
      H
      R
      ER
      HR
      BB
      IBB
      SO
      HBP
      BK
      WP
      BF
      ERA+
      WHIP
      H/9
      HR/9
      BB/9
      SO/9
      SO/BB
      Awards
    
  
  
    
      0
      1995
      25
      NYY
      AL
      5
      3
      0.625
      5.51
      19
      10
      2
      0
      0
      0
      67.0
      71
      43
      41
      11
      30
      0
      51
      2
      1
      0
      301
      84
      1.507
      9.5
      1.5
      4.0
      6.9
      1.70
      NaN
    
    
      1
      1996
      26
      NYY
      AL
      8
      3
      0.727
      2.09
      61
      0
      14
      0
      0
      5
      107.2
      73
      25
      25
      1
      34
      3
      130
      2
      0
      1
      425
      240
      0.994
      6.1
      0.1
      2.8
      10.9
      3.82
      CYA-3MVP-12
    
    
      2
      1997
      27
      NYY
      AL
      6
      4
      0.600
      1.88
      66
      0
      56
      0
      0
      43
      71.2
      65
      17
      15
      5
      20
      6
      68
      0
      0
      2
      301
      239
      1.186
      8.2
      0.6
      2.5
      8.5
      3.40
      ASMVP-25
    
    
      3
      1998
      28
      NYY
      AL
      3
      0
      1.000
      1.91
      54
      0
      49
      0
      0
      36
      61.1
      48
      13
      13
      3
      17
      1
      36
      1
      0
      0
      246
      233
      1.060
      7.0
      0.4
      2.5
      5.3
      2.12
      NaN
    
    
      4
      1999
      29
      NYY
      AL
      4
      3
      0.571
      1.83
      66
      0
      63
      0
      0
      45
      69.0
      43
      15
      14
      2
      18
      3
      52
      3
      1
      2
      268
      257
      0.884
      5.6
      0.3
      2.3
      6.8
      2.89
      ASCYA-3MVP-14



In [42]:

    
cols = ['num', 'game', 'date', 'team', 'home_away', 'opponent',
        'result', 'quarter', 'distance', 'receiver', 'score_before',
        'score_after']
no_headers = pd.read_csv('peyton-passing-TDs-2012.csv', sep=',', header=None, names=cols)
no_headers.head()









    Out[42]:






  
    
      
      num
      game
      date
      team
      home_away
      opponent
      result
      quarter
      distance
      receiver
      score_before
      score_after
    
  
  
    
      0
      1
      1
      2012-09-09
      DEN
      NaN
      PIT
      W 31-19
      3
      71
      Demaryius Thomas
      Trail 7-13
      Lead 14-13*
    
    
      1
      2
      1
      2012-09-09
      DEN
      NaN
      PIT
      W 31-19
      4
      1
      Jacob Tamme
      Trail 14-19
      Lead 22-19*
    
    
      2
      3
      2
      2012-09-17
      DEN
      @
      ATL
      L 21-27
      2
      17
      Demaryius Thomas
      Trail 0-20
      Trail 7-20
    
    
      3
      4
      3
      2012-09-23
      DEN
      NaN
      HOU
      L 25-31
      4
      38
      Brandon Stokley
      Trail 11-31
      Trail 18-31
    
    
      4
      5
      3
      2012-09-23
      DEN
      NaN
      HOU
      L 25-31
      4
      6
      Joel Dreessen
      Trail 18-31
      Trail 25-31



In [44]:

    
no_headers.to_csv('out.csv')



In [ ]:

	year	team	wins	losses
0	2010	Bears	11	5
1	2011	Bears	8	8
2	2012	Bears	10	6
3	2011	Packers	15	1
4	2012	Packers	11	5
5	2010	Lions	6	10
6	2011	Lions	10	6
7	2012	Lions	4	12

	Year	Age	Tm	Lg	W	L	W-L%	ERA	G	GS	GF	SV	IP	H	R	ER	HR	BB	IBB	SO	HBP	BK	WP	BF	ERA+	WHIP	H/9	HR/9	BB/9	SO/9	SO/BB	Awards
0	1995	25	NYY	AL	5	3	0.625	5.51	19	10	2	0	67.0	71	43	41	11	30	0	51	2	1	0	301	84	1.507	9.5	1.5	4.0	6.9	1.70	NaN
1	1996	26	NYY	AL	8	3	0.727	2.09	61	0	14	5	107.2	73	25	25	1	34	3	130	2	0	1	425	240	0.994	6.1	0.1	2.8	10.9	3.82	CYA-3MVP-12
2	1997	27	NYY	AL	6	4	0.600	1.88	66	0	56	43	71.2	65	17	15	5	20	6	68	0	0	2	301	239	1.186	8.2	0.6	2.5	8.5	3.40	ASMVP-25
3	1998	28	NYY	AL	3	0	1.000	1.91	54	0	49	36	61.1	48	13	13	3	17	1	36	1	0	0	246	233	1.060	7.0	0.4	2.5	5.3	2.12	NaN
4	1999	29	NYY	AL	4	3	0.571	1.83	66	0	63	45	69.0	43	15	14	2	18	3	52	3	1	2	268	257	0.884	5.6	0.3	2.3	6.8	2.89	ASCYA-3MVP-14

	num	game	date	team	home_away	opponent	result	quarter	distance	receiver	score_before	score_after
0	1	1	2012-09-09	DEN	NaN	PIT	W 31-19	3	71	Demaryius Thomas	Trail 7-13	Lead 14-13*
1	2	1	2012-09-09	DEN	NaN	PIT	W 31-19	4	1	Jacob Tamme	Trail 14-19	Lead 22-19*
2	3	2	2012-09-17	DEN	@	ATL	L 21-27	2	17	Demaryius Thomas	Trail 0-20	Trail 7-20
3	4	3	2012-09-23	DEN	NaN	HOU	L 25-31	4	38	Brandon Stokley	Trail 11-31	Trail 18-31
4	5	3	2012-09-23	DEN	NaN	HOU	L 25-31	4	6	Joel Dreessen	Trail 18-31	Trail 25-31