In [2]:
#### Introduction to Data Wrangling with Pandas ####
## Page 1 ##
In [3]:
#### Why do we need pandas, why not our excel? ####
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#so that we can view the graphs inside the notebook
In [2]:
#### Fundamental Data Types in Pandas ####
In [3]:
# 1. Series
# 2. Data Frame
In [4]:
# Series is - 1-D array with corresponding index
In [5]:
s1 = pd.Series(range(1,10,2))
In [6]:
s1
Out[6]:
In [7]:
s3 = pd.Series({1:2, 2:3})
In [8]:
s3
Out[8]:
In [10]:
s2 = pd.Series([1, 2, 3, 4], index=['p', 'q', 'r','s'], name='one')
In [11]:
s2
Out[11]:
In [8]:
# Dataframes are - n-D array with indexing on both rows and columns
In [9]:
# Axis indices serve many purposes like:
# Identify data
# Perform automatic data alignment
# Perform faster subsetting
In [12]:
df1 = pd.DataFrame(s2)
df1
Out[12]:
In [16]:
# You can use other datatypes like list, dictonaries to create a dataframe
In [17]:
# Real power- Import from different formats
In [18]:
# http://pandas.pydata.org/pandas-docs/version/0.20/io.html
In [19]:
# text, binary format, csv, json, excel
In [13]:
df2 = pd.read_csv("wine.csv")
#dataframe_name = pd.read_<format>(filename)
In [15]:
df2.head(2)
Out[15]:
In [13]:
df2 = pd.read_csv("wine.csv", index_col='Unnamed: 0')
df2.head(5)
Out[13]:
In [23]:
# write
# <dataframe's name>.to_<file_format>(<file_name>)
In [24]:
df2.to_json('wine.json')