Pandas Tutorial


In [1]:
import pandas as pd
import datetime
#from pandas_datareader import data, wb
from matplotlib.pyplot import style 
style.use('ggplot')
#Used to make a list in a list later 
import numpy as np 


web_stats = {'Day':[1,2,3,4,5,6],
             'Visitors':[43,34,65,56,29,76],
             'Bounce_Rate':[65,67,78,65,45,52]}
df = pd.DataFrame(web_stats)

#Used to set the index permenantly
#If inplace isn't set to True it will simply return a data frame with the new index 
#and not alter the original 
df.set_index('Day', inplace=True)
#print(df)

In [ ]:
#Referencing single columns

#If there are spaces you can use the reference method below
print(df['Visitors'])

#The reference below is possible because there are no spaces in the column name 
#If there were spaces this reference could not be made 
print(df.Visitors)

In [ ]:
#Referencing Multiple columns 

print(df[['Bounce_Rate','Visitors']])

In [19]:
#Converting a column to a list 

print(df.Visitors.tolist())


[43, 34, 65, 56, 29, 76]

In [22]:
#converting to an array using numpy 

print(np.array(df[['Bounce_Rate','Visitors']]))


[[65 43]
 [67 34]
 [78 65]
 [65 56]
 [45 29]
 [52 76]]

In [3]:
#convert new array back to data frame 
df2 = pd.DataFrame(np.array(df[['Bounce_Rate','Visitors']]))
print(df2)


    0   1
0  65  43
1  67  34
2  78  65
3  65  56
4  45  29
5  52  76

In [4]:
print(df2.head())


    0   1
0  65  43
1  67  34
2  78  65
3  65  56
4  45  29

In [ ]: