In [6]:
import pandas as pd
import numpy as np
import numpy as np
"""10 Minutes to Pandas: http://pandas.pydata.org/pandas-docs/stable/10min.html#missing-data"""
In [38]:
# Object Creation
import pandas as pd
import numpy as np
# Series data
obj1 = pd.Series([1, 3, 5, np.nan, 6, 8])
print('\n+=== Series Data: \n', obj1)
# Date data
obj2 = pd.date_range("20130225", periods=10)
print('\n+=== Date Data: \n', obj2)
# Format data
obj3 = pd.DataFrame(np.random.randn(10,4), index=obj2, columns=list('ABCD'))
print('\n+=== Format Data: \n', obj3)
# Series-like format data
obj4 = pd.DataFrame({
'A': 1,
'B': pd.date_range("20130225", periods=4),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(['Test', 'Train', 'Test2', 'Train2']),
'F': 'Foo'
})
print('\n+=== Series-like format Data: \n', obj4, '\n', obj4.dtypes)
In [18]:
# Viewing Data
import pandas as pd
import numpy as np
import numpy as np
# Default
dates = pd.date_range("20130225", periods=5)
df = pd.DataFrame(np.random.randn(5,4), index=dates, columns=list('ABCD'))
print('\n+=== DataFrame Head\n', df.head())
print('\n+=== DataFrame Tail\n', df.tail(3))
print('\n+=== DataFrame Index\n', df.index)
print('\n+=== DataFrame Columns\n', df.columns)
print('\n+=== DataFrame Values\n', df.values)
print('\n+=== DataFrame Describe\n', df.describe())
print('\n+=== DataFrame Transposing your data\n', df.T)
print('\n+=== DataFrame Sorting by an axis\n', df.sort_index(axis=1, ascending=False))
print('\n+=== DataFrame Sorting by values\n', df.sort_values(by='B'))
In [21]:
# Getting
import pandas as pd
import numpy as np
import numpy as np
# Default
dates = pd.date_range("20130225", periods=5)
df = pd.DataFrame(np.random.randn(5,4), index=dates, columns=list('ABCD'))
print('\n+=== DataFrame get []\n', df['A'])
print('\n+=== DataFrame get []\n', df[0: 3])
print('\n+=== DataFrame get []\n', df['2013-02-26': '2013-02-28'])
In [25]:
# Selection by Label
import pandas as pd
import numpy as np
import numpy as np
# Default
dates = pd.date_range("20130225", periods=5)
df = pd.DataFrame(np.random.randn(5,4), index=dates, columns=list('ABCD'))
print('\n+=== DataFrame df.loc[dates[0]]\n', df.loc[dates[0]])
print('\n+=== DataFrame df.loc[:,["A","B"]]\n', df.loc[:,['A','B']])
print('\n+=== DataFrame df.loc["2013-02-26":"2013-02-28",["A","B"]]\n', df.loc['2013-02-26':'2013-02-28',['A','B']])
print('\n+=== DataFrame df.loc["2013-02-26",["A","B"]]\n', df.loc['2013-02-26',['A','B']])
print('\n+=== DataFrame df.loc[dates[0],"A"]\n', df.loc[dates[0],'A'])
print('\n+=== DataFrame df.at[dates[0],"A"]\n', df.at[dates[0],'A'])
In [31]:
# Selection by Position
import pandas as pd
import numpy as np
import numpy as np
# Default
dates = pd.date_range("20130225", periods=5)
df = pd.DataFrame(np.random.randn(5,4), index=dates, columns=list('ABCD'))
print('\n+=== DataFrame df.iloc[3]\n', df.iloc[3])
print('\n+=== DataFrame df.iloc[3:5,0:2]\n', df.iloc[3:5,0:2])
print('\n+=== DataFrame df.iloc[[1,2,4],[0,2]]\n', df.iloc[[1,2,4],[0,2]])
print('\n+=== DataFrame df.iloc[1:3,:]\n', df.iloc[1:3,:])
print('\n+=== DataFrame df.iloc[:,1:3]\n', df.iloc[:,1:3])
print('\n+=== DataFrame df.iloc[1,1]\n', df.iloc[1,1])
print('\n+=== DataFrame df.iat[1,1]\n', df.iat[1,1])
In [37]:
# Boolean Indexing
import pandas as pd
import numpy as np
import numpy as np
# Default
dates = pd.date_range("20130225", periods=5)
df = pd.DataFrame(np.random.randn(5,4), index=dates, columns=list('ABCD'))
print('\n+=== DataFrame df[df.A > 0]\n', df[df.A > 0])
print('\n+=== DataFrame df[df > 0]\n', df[df > 0])
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four']
print('\n+=== DataFrame copy and insert\n', df2)
print('\n+=== DataFrame using isin()\n', df2[df2['E'].isin(['two','four'])])
In [ ]: