Title: pandas Data Structures
Slug: pandas_data_structures
Summary: pandas Data Structures
Date: 2016-05-01 12:00
Category: Python
Tags: Data Wrangling
Authors: Chris Albon
In [1]:
import pandas as pd
Series are one-dimensional arrays (like R's vectors)
In [2]:
floodingReports = pd.Series([5, 6, 2, 9, 12])
floodingReports
Out[2]:
Note that the first column of numbers (0 to 4) are the index.
In [3]:
floodingReports = pd.Series([5, 6, 2, 9, 12], index=['Cochise County', 'Pima County', 'Santa Cruz County', 'Maricopa County', 'Yuma County'])
floodingReports
Out[3]:
In [4]:
floodingReports['Cochise County']
Out[4]:
In [5]:
floodingReports[floodingReports > 6]
Out[5]:
In [6]:
# Create a dictionary
fireReports_dict = {'Cochise County': 12, 'Pima County': 342, 'Santa Cruz County': 13, 'Maricopa County': 42, 'Yuma County' : 52}
# Convert the dictionary into a pd.Series, and view it
fireReports = pd.Series(fireReports_dict); fireReports
Out[6]:
In [7]:
fireReports.index = ["Cochice", "Pima", "Santa Cruz", "Maricopa", "Yuma"]
fireReports
Out[7]:
In [8]:
data = {'county': ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'],
'year': [2012, 2012, 2013, 2014, 2014],
'reports': [4, 24, 31, 2, 3]}
df = pd.DataFrame(data)
df
Out[8]:
In [9]:
dfColumnOrdered = pd.DataFrame(data, columns=['county', 'year', 'reports'])
dfColumnOrdered
Out[9]:
In [10]:
dfColumnOrdered['newsCoverage'] = pd.Series([42.3, 92.1, 12.2, 39.3, 30.2])
dfColumnOrdered
Out[10]:
In [11]:
del dfColumnOrdered['newsCoverage']
dfColumnOrdered
Out[11]:
In [12]:
dfColumnOrdered.T
Out[12]: