In [1]:
import pandas as pd
import numpy as np
In [2]:
df = pd.read_csv('data/src/sample_pandas_normal.csv')
df.iloc[1] = np.nan
print(df)
In [3]:
u = df['state'].unique()
print(u)
print(type(u))
In [4]:
vc = df['state'].value_counts()
print(vc)
print(type(vc))
In [5]:
print(df['state'].value_counts(ascending=True))
In [6]:
print(df['state'].value_counts(sort=False))
In [7]:
print(df['state'].value_counts(dropna=False))
In [8]:
print(df['state'].value_counts(dropna=False, normalize=True))
In [9]:
nu = df['state'].nunique()
print(nu)
print(type(nu))
In [10]:
print(df['state'].nunique(dropna=False))
In [11]:
nu_col = df.nunique()
print(nu_col)
print(type(nu_col))
In [12]:
print(df.nunique(dropna=False))
In [13]:
print(df.nunique(dropna=False, axis='columns'))
In [14]:
print(df['state'].nunique())
In [15]:
print(df.nunique())
In [16]:
print(df['state'].unique().tolist())
print(type(df['state'].unique().tolist()))
In [17]:
print(df['state'].value_counts().index.tolist())
print(type(df['state'].value_counts().index.tolist()))
In [18]:
print(df['state'].value_counts(dropna=False).index.values)
print(type(df['state'].value_counts().index.values))
In [19]:
print(df['state'].value_counts()['NY'])
In [20]:
print(df['state'].value_counts().NY)
In [21]:
for index, value in df['state'].value_counts().iteritems():
print(index, ': ', value)
In [22]:
d = df['state'].value_counts().to_dict()
print(d)
print(type(d))
In [23]:
print(d['NY'])
In [24]:
for key, value in d.items():
print(key, ': ', value)
In [25]:
print(df['state'].value_counts())
In [26]:
print(df['state'].value_counts().index[0])
In [27]:
print(df['state'].value_counts().iat[0])
In [28]:
print(df.apply(lambda x: x.value_counts().index[0]))
In [29]:
print(df.apply(lambda x: x.value_counts().iat[0]))
In [30]:
print(df['state'].mode())
In [31]:
print(df['state'].mode().tolist())
In [32]:
print(df['age'].mode().tolist())
In [33]:
s_mode = df.apply(lambda x: x.mode().tolist())
print(s_mode)
In [34]:
print(type(s_mode))
In [35]:
print(s_mode['name'])
In [36]:
print(type(s_mode['name']))
In [37]:
print(df.mode())
In [38]:
print(df.mode().count())
In [39]:
print(df.astype('str').describe())
In [40]:
print(df.astype('str').describe().loc['top'])