In [6]:
import pandas as pd
In [7]:
nba = pd.read_csv('data/nba.csv')
In [8]:
nba.head()
Out[8]:
In [8]:
nba.tail()
Out[8]:
In [9]:
nba.index
Out[9]:
In [10]:
nba.values
Out[10]:
In [12]:
# Rows , Columns
nba.shape
Out[12]:
In [14]:
# dtypes for each column
nba.dtypes
Out[14]:
In [16]:
nba.columns
Out[16]:
In [18]:
# Index / Column
nba.axes
Out[18]:
In [20]:
# VERY IMPORTANT METHOD!
nba.info()
In [21]:
nba.get_dtype_counts()
Out[21]:
In [24]:
revenue = pd.read_csv('data/revenue.csv', index_col='Date')
In [25]:
revenue
Out[25]:
In [27]:
revenue.index
Out[27]:
In [32]:
revenue.sum()
Out[32]:
In [34]:
# Sum columns values
revenue.sum(axis='columns')
#revenue.sum(axis=1)
Out[34]:
In [36]:
nba.head(3)
Out[36]:
In [46]:
# One Column
nba['Name'].head(3)
# nba.Name BAD IDEA!!!!!!
Out[46]:
In [45]:
# Multiple Columns
nba[['Name', 'Team', 'Number']].head(3)
Out[45]:
In [57]:
select = ["Salary", "Team", "Name"]
nba[select].head(3)
Out[57]:
In [75]:
nba = pd.read_csv('data/nba.csv')
nba['TEST'] = 'Hello Test'
In [76]:
nba.head(3)
Out[76]:
In [77]:
nba = pd.read_csv('data/nba.csv')
nba.insert(0, column='TEST', value='Bla bla bla')
nba.insert(5, column='ASD', value='Bla bla bla')
In [79]:
nba.head(3)
Out[79]:
In [80]:
nba = pd.read_csv('data/nba.csv')
In [87]:
nba['Number'].add(10) # return Series, no changes on my original dataframe
# nba['Number'] + 10
Out[87]:
In [86]:
nba.head(3)
Out[86]:
In [88]:
nba['Number'].sub(10)
# nba['Number'] - 10
Out[88]:
In [92]:
nba['Number'].mul(10)
# nba['Number'] * 10
Out[92]:
In [94]:
nba['Number'].div(10)
# nba['Number'] / 10
Out[94]:
In [95]:
nba.head(3)
Out[95]:
In [97]:
# Frequency for each values
nba['Team'].value_counts()
Out[97]:
In [98]:
nba.tail(3)
Out[98]:
In [106]:
# removes any rows with at least one NULL Value
nba.dropna().tail(3)
Out[106]:
In [110]:
# Remove only if ALL VALUES ARE NULL
nba.dropna(how='all', inplace=True)
nba.tail(3)
Out[110]:
In [113]:
# Remove only if a subset of column has at least one NULL value
nba.dropna(subset = ["Salary", "College"]).head(3)
Out[113]:
In [114]:
nba.dropna(subset = ["Salary", "College"]).tail(3)
Out[114]:
In [115]:
nba = pd.read_csv('data/nba.csv')
In [116]:
nba.fillna(0)
Out[116]:
In [118]:
nba = pd.read_csv('data/nba.csv')
In [120]:
nba['College'].fillna("No College", inplace=True)
In [121]:
nba['Salary'].fillna(-1, inplace=True)
In [122]:
nba.tail()
Out[122]:
In [123]:
nba.head()
Out[123]:
In [142]:
nba = pd.read_csv('data/nba.csv')
nba.info()
In [143]:
# Remove All NULL Values
nba.dropna(how='all', inplace=True)
nba.tail()
Out[143]:
In [144]:
# Fix Null Values Manually
nba['Salary'].fillna(0, inplace=True)
nba['College'].fillna('No College', inplace=True)
In [145]:
nba.head(5)
Out[145]:
In [147]:
# Ready to start!
nba.info()
In [148]:
nba.dtypes
Out[148]:
In [150]:
## Convert Salary to Integer
nba['Salary'] = nba['Salary'].astype('int')
In [152]:
nba.info()
In [153]:
nba['Number'] = nba['Number'].astype('int')
nba['Age'] = nba['Age'].astype('int')
In [154]:
nba.info()
In [156]:
nba['Position'].nunique()
Out[156]:
In [157]:
nba['Position'] = nba['Position'].astype('category')
In [158]:
nba.info()
In [159]:
nba['Team'].nunique()
Out[159]:
In [160]:
nba['Team'] = nba['Team'].astype('category')
In [161]:
nba.info()
In [162]:
nba.head()
Out[162]:
In [180]:
nba = pd.read_csv('data/nba.csv')
nba.info()
In [166]:
nba.sort_values('Age', ascending=True).head(3)
Out[166]:
In [167]:
nba.sort_values('Salary', ascending=False, inplace=True)
In [170]:
nba.head(3)
Out[170]:
In [173]:
nba.tail()
Out[173]:
In [171]:
nba.sort_values(['Salary', 'Age'], ascending=[False, True], inplace=True)
In [172]:
nba.head()
Out[172]:
In [187]:
nba.sort_values(['Team', 'Salary'], ascending=[True, False], inplace=True)
In [188]:
nba.head()
Out[188]:
In [196]:
nba = pd.read_csv('data/nba.csv')
nba.info()
In [197]:
nba.sort_index(ascending=False, inplace=True)
In [198]:
nba.head()
Out[198]:
In [203]:
nba = pd.read_csv('data/nba.csv')
nba['Salary'] = nba['Salary'].fillna(0).astype('int')
nba.info()
In [208]:
nba['Salary Rank'] = nba['Salary'].rank(ascending=False).astype('int')
In [209]:
nba.head()
Out[209]:
In [212]:
nba.sort_values('Salary', ascending=False).head()
# nba.sort_values('Salary Rank', ascending=True).head()
Out[212]:
In [ ]: