In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('data/src/sample_pandas_normal.csv')
In [3]:
print(df)
In [4]:
df_bool = (df == 'CA')
print(df_bool)
In [5]:
print(df_bool.sum())
In [6]:
print(df_bool.sum(axis=1))
In [7]:
print(df_bool.values)
In [8]:
print(type(df_bool.values))
In [9]:
print(df_bool.values.sum())
In [10]:
s_bool = df['age'] < 25
print(s_bool)
In [11]:
print(s_bool.sum())
In [12]:
df_bool_multi = ((df == 'CA') | (df == 70))
print(df_bool_multi)
In [13]:
print(df_bool_multi.sum())
In [14]:
print(df_bool_multi.sum(axis=1))
In [15]:
print(df_bool_multi.values.sum())
In [16]:
df_bool_multi_and = ((df['state'] == 'CA') & (df['age'] < 30))
print(df_bool_multi_and)
In [17]:
print(df_bool_multi_and.sum())
In [18]:
df_bool_multi_or = ((df['state'] == 'CA') | (df['age'] < 30))
print(df_bool_multi_or)
In [19]:
print(df_bool_multi_or.sum())
In [20]:
df_bool_not = ~(df == 'CA')
print(df_bool_not)
In [21]:
print(df_bool_not.sum())
In [22]:
print(df_bool_not.sum(axis=1))
In [23]:
print(df_bool_not.values.sum())
In [24]:
df_num = df[['age', 'point']]
print(df_num)
In [25]:
print((df_num <= 70).sum())
In [26]:
print(((df['age'] > 20) & (df['age'] < 40)).sum())
In [27]:
print((df_num % 2 == 1).sum())
In [28]:
df_str = df[['name', 'state']]
print(df_str)
In [29]:
print((df_str == 'NY').sum())
In [30]:
print(df_str['name'].str.endswith('e'))
In [31]:
print(df_str['name'].str.endswith('e').sum())
In [32]:
df = pd.read_csv('data/src/titanic_train.csv')
In [33]:
print(df.head())
In [34]:
df.info()
In [35]:
print(df.isnull().head())
In [36]:
print(df.isnull().sum())
In [37]:
print(df.isnull().sum(axis=1).head())
In [38]:
print(df.isnull().values.sum())
In [39]:
print(df.count())
In [40]:
print(df.count(axis=1).head())
In [41]:
print(df.count().sum())