Title

This is our code for Week 12

  • Input
  • Explore
  • Analyze

In [2]:
import IPython
print(IPython.sys_info())


{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'cp1252',
 'ipython_path': 'C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\IPython',
 'ipython_version': '5.1.0',
 'os_name': 'nt',
 'platform': 'Windows-7-6.1.7600-SP0',
 'sys_executable': 'C:\\Users\\Dell\\Anaconda3\\python.exe',
 'sys_platform': 'win32',
 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul  5 2016, '
                '11:41:13) [MSC v.1900 64 bit (AMD64)]'}

In [3]:
import pandas as pd

In [11]:
titanic=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/Titanic.csv")

In [12]:
titanic.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1313 entries, 0 to 1312
Data columns (total 7 columns):
Unnamed: 0    1313 non-null int64
Name          1313 non-null object
PClass        1313 non-null object
Age           756 non-null float64
Sex           1313 non-null object
Survived      1313 non-null int64
SexCode       1313 non-null int64
dtypes: float64(1), int64(3), object(3)
memory usage: 71.9+ KB

In [13]:
titanic.head()


Out[13]:
Unnamed: 0 Name PClass Age Sex Survived SexCode
0 1 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1
1 2 Allison, Miss Helen Loraine 1st 2.00 female 0 1
2 3 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0
3 4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1
4 5 Allison, Master Hudson Trevor 1st 0.92 male 1 0

In [14]:
titanic=titanic.drop('Unnamed: 0',1)

In [15]:
titanic.head()


Out[15]:
Name PClass Age Sex Survived SexCode
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1
1 Allison, Miss Helen Loraine 1st 2.00 female 0 1
2 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0
3 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0

In [16]:
titanic2=titanic.copy()

In [17]:
pd.value_counts(titanic.PClass)


Out[17]:
3rd    711
1st    322
2nd    279
*        1
Name: PClass, dtype: int64

In [18]:
pd.value_counts(titanic.Sex)


Out[18]:
male      851
female    462
Name: Sex, dtype: int64

In [19]:
pd.value_counts(titanic.Survived)


Out[19]:
0    863
1    450
Name: Survived, dtype: int64

In [20]:
titanic.iloc[1:3,:]


Out[20]:
Name PClass Age Sex Survived SexCode
1 Allison, Miss Helen Loraine 1st 2.0 female 0 1
2 Allison, Mr Hudson Joshua Creighton 1st 30.0 male 0 0

In [22]:
titanic.head(7)


Out[22]:
Name PClass Age Sex Survived SexCode
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1
1 Allison, Miss Helen Loraine 1st 2.00 female 0 1
2 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0
3 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0
5 Anderson, Mr Harry 1st 47.00 male 1 0
6 Andrews, Miss Kornelia Theodosia 1st 63.00 female 1 1

In [28]:
titanic[['PClass','Age','SexCode']].head()


Out[28]:
PClass Age SexCode
0 1st 29.00 1
1 1st 2.00 1
2 1st 30.00 0
3 1st 25.00 1
4 1st 0.92 0

In [30]:
titanic.Age.head()


Out[30]:
0    29.00
1     2.00
2    30.00
3    25.00
4     0.92
Name: Age, dtype: float64

In [31]:
tpy=titanic.values

In [32]:
tpy


Out[32]:
array([['Allen, Miss Elisabeth Walton', '1st', 29.0, 'female', 1, 1],
       ['Allison, Miss Helen Loraine', '1st', 2.0, 'female', 0, 1],
       ['Allison, Mr Hudson Joshua Creighton', '1st', 30.0, 'male', 0, 0],
       ..., 
       ['Zenni, Mr Philip', '3rd', 22.0, 'male', 0, 0],
       ['Lievens, Mr Rene', '3rd', 24.0, 'male', 0, 0],
       ['Zimmerman, Leo', '3rd', 29.0, 'male', 0, 0]], dtype=object)

In [33]:
import os as os

In [34]:
os.getcwd()


Out[34]:
'C:\\Users\\Dell'

In [35]:
os.chdir('C:\\Users\\Dell\\Desktop')

In [36]:
os.listdir()


Out[36]:
['.Rhistory',
 'BigDiamonds.csv',
 'Class-3-Public-Primary-Certification-Authority.pem.txt',
 'Data Analysis (1)',
 'DataWrangling.pdf',
 'desktop.ini',
 'Diamond (7).csv',
 'dump',
 'GoToWebinar.lnk',
 'kushal.jpg',
 'Pythonajay.docx',
 'SUINV.png',
 '~$thonajay.docx']

In [37]:
titanic.to_csv('C:\\Users\\Dell\\Desktop\\titanic2.csv', index=False)

In [38]:
os.listdir()


Out[38]:
['.Rhistory',
 'BigDiamonds.csv',
 'Class-3-Public-Primary-Certification-Authority.pem.txt',
 'Data Analysis (1)',
 'DataWrangling.pdf',
 'desktop.ini',
 'Diamond (7).csv',
 'dump',
 'GoToWebinar.lnk',
 'kushal.jpg',
 'Pythonajay.docx',
 'SUINV.png',
 'titanic2.csv',
 '~$thonajay.docx']

In [45]:
titanic.head()


Out[45]:
Name PClass Age Sex Survived SexCode
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1
1 Allison, Miss Helen Loraine 1st 2.00 female 0 1
2 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0
3 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0

In [49]:
titanic.query("PClass=='1st' and Survived ==1")


Out[49]:
Name PClass Age Sex Survived SexCode
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0
5 Anderson, Mr Harry 1st 47.00 male 1 0
6 Andrews, Miss Kornelia Theodosia 1st 63.00 female 1 1
8 Appleton, Mrs Edward Dale (Charlotte Lamson) 1st 58.00 female 1 1
11 Astor, Mrs John Jacob (Madeleine Talmadge Force) 1st 19.00 female 1 1
12 Aubert, Mrs Leontine Pauline 1st NaN female 1 1
13 Barkworth, Mr Algernon H 1st NaN male 1 0
15 Baxter, Mrs James (Helene DeLaudeniere Chaput) 1st 50.00 female 1 1
18 Beckwith, Mr Richard Leonard 1st 37.00 male 1 0
19 Beckwith, Mrs Richard Leonard (Sallie Monypeny) 1st 47.00 female 1 1
20 Behr, Mr Karl Howell 1st 26.00 male 1 0
22 Bishop, Mr Dickinson H 1st 25.00 male 1 0
23 Bishop, Mrs Dickinson H (Helen Walton) 1st 19.00 female 1 1
24 Bjornstrm-Steffansson, Mr Mauritz Hakan 1st 28.00 male 1 0
26 Blank, Mr Henry 1st 39.00 male 1 0
27 Bonnell, Miss Caroline 1st 30.00 female 1 1
28 Bonnell, Miss Elizabeth 1st 58.00 female 1 1
30 Bowen, Miss Grace Scott 1st 45.00 female 1 1
31 Bowerman, Miss Elsie Edith 1st 22.00 female 1 1
32 Bradley, Mr George 1st NaN male 1 0
36 Brown, Mrs James Joseph (Margaret Molly" Tobin)" 1st 44.00 female 1 1
37 Brown, Mrs John Murray (Caroline Lane Lamson) 1st 59.00 female 1 1
38 Bucknell, Mrs William Robert (Emma Eliza Ward) 1st 60.00 female 1 1
40 Calderhead, Mr Edward P 1st NaN male 1 0
41 Candee, Mrs Edward (Helen Churchill Hungerford) 1st 53.00 female 1 1
42 Cardeza, Mrs James Warburton Martinez (Charlot... 1st 58.00 female 1 1
43 Cardeza, Mr Thomas Drake Martinez 1st 36.00 male 1 0
47 Carter, Mr William Ernest 1st 36.00 male 1 0
48 Carter, Mrs William Ernest (Lucile Polk) 1st 36.00 female 1 1
... ... ... ... ... ... ...
265 Thorne, Mrs Gertrude Maybelle 1st NaN female 1 1
266 Tucker, Mr Gilbert Milligan, jr 1st 31.00 male 1 0
271 Warren, Mrs Frank Manley (Anna S Atkinson) 1st 60.00 female 1 1
273 White, Mrs J Stuart (Ella Holmes) 1st 55.00 female 1 1
277 Wick, Mrs George Dennick (Martha Hitchcock) 1st 45.00 female 1 1
278 Wick, Miss Mary Natalie 1st 31.00 female 1 1
280 Widener, Mrs George Dunton (Eleanor Elkins) 1st 50.00 female 1 1
282 Willard, Miss Constance 1st 20.00 female 1 1
285 Williams, Mr Richard Norris II 1st 21.00 male 1 0
286 Woolner, Mr Hugh 1st NaN male 1 0
288 Young, Miss Marie Grice 1st 36.00 female 1 1
289 Barber, Ms 1st NaN female 1 1
290 Bazzani, Ms Albina 1st NaN female 1 1
291 Bidois, Miss Rosalie 1st NaN female 1 1
292 Bird, Ms Ellen 1st NaN female 1 1
293 Bissetti, Ms Amelia 1st NaN female 1 1
294 Burns, Ms Elizabeth Margaret 1st NaN female 1 1
295 Chaudanson, Ms Victorine 1st NaN female 1 1
296 Cleaver, Ms Alice 1st NaN female 1 1
297 Daniels, Ms Sarah 1st NaN female 1 1
298 Endres, Miss Caroline Louise 1st NaN female 1 1
301 Francatelli, Ms Laura Mabel 1st NaN female 1 1
303 Geiger, Miss Emily 1st NaN female 1 1
308 Icabad (Icabod), Ms 1st NaN female 1 1
310 Kenchen, Ms Amelia 1st NaN female 1 1
311 LeRoy, Miss Berthe 1st NaN female 1 1
313 Maloney, Ms 1st NaN female 1 1
315 Pericault, Ms 1st NaN female 1 1
318 Segesser, Mlle Emma 1st NaN female 1 1
321 Wilson, Ms Helen 1st NaN female 1 1

193 rows × 6 columns


In [50]:
193/322


Out[50]:
0.5993788819875776

In [52]:
titanic.query("PClass=='3rd' and Survived==1").count()


Out[52]:
Name        138
PClass      138
Age          78
Sex         138
Survived    138
SexCode     138
dtype: int64

In [53]:
138/711


Out[53]:
0.1940928270042194

In [58]:
pd.crosstab(titanic.PClass,titanic.Survived)


Out[58]:
Survived 0 1
PClass
* 1 0
1st 129 193
2nd 160 119
3rd 573 138

In [69]:
pd.crosstab(titanic.PClass,titanic.Survived,margins=True)


Out[69]:
Survived 0 1 All
PClass
* 1 0 1
1st 129 193 322
2nd 160 119 279
3rd 573 138 711
All 863 450 1313

In [71]:
pd.crosstab(titanic.PClass,titanic.Survived,normalize='index')


Out[71]:
Survived 0 1
PClass
* 1.000000 0.000000
1st 0.400621 0.599379
2nd 0.573477 0.426523
3rd 0.805907 0.194093

In [61]:
titanic.query("PClass=='1st' and Sex=='female'").count()


Out[61]:
Name        143
PClass      143
Age         101
Sex         143
Survived    143
SexCode     143
dtype: int64

In [62]:
titanic.query("PClass=='1st' and Sex=='female' and Survived==1").count()


Out[62]:
Name        134
PClass      134
Age          96
Sex         134
Survived    134
SexCode     134
dtype: int64

In [63]:
134/143


Out[63]:
0.9370629370629371

In [64]:
titanic.query("PClass=='3rd' and Sex=='male' and Survived==1").count()


Out[64]:
Name        58
PClass      58
Age         32
Sex         58
Survived    58
SexCode     58
dtype: int64

In [65]:
titanic.query("PClass=='3rd' and Sex=='male' ").count()


Out[65]:
Name        499
PClass      499
Age         216
Sex         499
Survived    499
SexCode     499
dtype: int64

In [67]:
58/499


Out[67]:
0.11623246492985972

In [72]:
pd.crosstab([titanic.PClass, titanic.Sex], titanic.Survived,  margins=True)


Out[72]:
Survived 0 1 All
PClass Sex
* male 1 0 1
1st female 9 134 143
male 120 59 179
2nd female 13 94 107
male 147 25 172
3rd female 132 80 212
male 441 58 499
All 863 450 1313

In [79]:
titanic2.head()


Out[79]:
Name PClass Age Sex Survived SexCode
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1
1 Allison, Miss Helen Loraine 1st 2.00 female 0 1
2 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0
3 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0

In [74]:
titanic.loc[titanic.Survived==1,'Survived2']='Alive'

In [77]:
titanic.loc[titanic.Survived!=1,'Survived2']='Dead'

In [78]:
titanic.head()


Out[78]:
Name PClass Age Sex Survived SexCode Survived2
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1 Alive
1 Allison, Miss Helen Loraine 1st 2.00 female 0 1 Dead
2 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0 Dead
3 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1 Dead
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0 Alive

In [82]:
import numpy as np

In [86]:
titanic = titanic.assign(e=pd.Series(np.random.randn(len(titanic))).values)

In [87]:
titanic.head()


Out[87]:
Name PClass Age Sex Survived SexCode Survived2 e
0 Allen, Miss Elisabeth Walton 1st 29.00 female 1 1 Alive 0.960077
1 Allison, Miss Helen Loraine 1st 2.00 female 0 1 Dead -2.777595
2 Allison, Mr Hudson Joshua Creighton 1st 30.00 male 0 0 Dead 0.294452
3 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25.00 female 0 1 Dead -0.249450
4 Allison, Master Hudson Trevor 1st 0.92 male 1 0 Alive 0.950757

In [90]:
type(titanic.Name)


Out[90]:
pandas.core.series.Series

In [ ]: