In [2]:
import IPython
print(IPython.sys_info())
In [3]:
import pandas as pd
In [11]:
titanic=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/Titanic.csv")
In [12]:
titanic.info()
In [13]:
titanic.head()
Out[13]:
In [14]:
titanic=titanic.drop('Unnamed: 0',1)
In [15]:
titanic.head()
Out[15]:
In [16]:
titanic2=titanic.copy()
In [17]:
pd.value_counts(titanic.PClass)
Out[17]:
In [18]:
pd.value_counts(titanic.Sex)
Out[18]:
In [19]:
pd.value_counts(titanic.Survived)
Out[19]:
In [20]:
titanic.iloc[1:3,:]
Out[20]:
In [22]:
titanic.head(7)
Out[22]:
In [28]:
titanic[['PClass','Age','SexCode']].head()
Out[28]:
In [30]:
titanic.Age.head()
Out[30]:
In [31]:
tpy=titanic.values
In [32]:
tpy
Out[32]:
In [33]:
import os as os
In [34]:
os.getcwd()
Out[34]:
In [35]:
os.chdir('C:\\Users\\Dell\\Desktop')
In [36]:
os.listdir()
Out[36]:
In [37]:
titanic.to_csv('C:\\Users\\Dell\\Desktop\\titanic2.csv', index=False)
In [38]:
os.listdir()
Out[38]:
In [45]:
titanic.head()
Out[45]:
In [49]:
titanic.query("PClass=='1st' and Survived ==1")
Out[49]:
In [50]:
193/322
Out[50]:
In [52]:
titanic.query("PClass=='3rd' and Survived==1").count()
Out[52]:
In [53]:
138/711
Out[53]:
In [58]:
pd.crosstab(titanic.PClass,titanic.Survived)
Out[58]:
In [69]:
pd.crosstab(titanic.PClass,titanic.Survived,margins=True)
Out[69]:
In [71]:
pd.crosstab(titanic.PClass,titanic.Survived,normalize='index')
Out[71]:
In [61]:
titanic.query("PClass=='1st' and Sex=='female'").count()
Out[61]:
In [62]:
titanic.query("PClass=='1st' and Sex=='female' and Survived==1").count()
Out[62]:
In [63]:
134/143
Out[63]:
In [64]:
titanic.query("PClass=='3rd' and Sex=='male' and Survived==1").count()
Out[64]:
In [65]:
titanic.query("PClass=='3rd' and Sex=='male' ").count()
Out[65]:
In [67]:
58/499
Out[67]:
In [72]:
pd.crosstab([titanic.PClass, titanic.Sex], titanic.Survived, margins=True)
Out[72]:
In [79]:
titanic2.head()
Out[79]:
In [74]:
titanic.loc[titanic.Survived==1,'Survived2']='Alive'
In [77]:
titanic.loc[titanic.Survived!=1,'Survived2']='Dead'
In [78]:
titanic.head()
Out[78]:
In [82]:
import numpy as np
In [86]:
titanic = titanic.assign(e=pd.Series(np.random.randn(len(titanic))).values)
In [87]:
titanic.head()
Out[87]:
In [90]:
type(titanic.Name)
Out[90]:
In [ ]: