In [2]:
#### Introduction to Data Wrangling with Pandas ####
## Page 1 ##

In [3]:
#### Why do we need pandas, why not our excel? ####

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
#so that we can view the graphs inside the notebook

In [2]:
#### Fundamental Data Types in Pandas ####

In [3]:
# 1. Series
# 2. Data Frame

In [4]:
# Series is - 1-D array with corresponding index

In [5]:
s1 = pd.Series(range(1,10,2))

In [6]:
s1


Out[6]:
0    1
1    3
2    5
3    7
4    9
dtype: int64

In [7]:
s3 = pd.Series({1:2, 2:3})

In [8]:
s3


Out[8]:
1    2
2    3
dtype: int64

In [10]:
s2 = pd.Series([1, 2, 3, 4], index=['p', 'q', 'r','s'], name='one')

In [11]:
s2


Out[11]:
p    1
q    2
r    3
s    4
Name: one, dtype: int64

In [8]:
# Dataframes are - n-D array with indexing on both rows and columns

In [9]:
# Axis indices serve many purposes like:
# Identify data
# Perform automatic data alignment
# Perform faster subsetting

In [12]:
df1 = pd.DataFrame(s2)
df1


Out[12]:
one
p 1
q 2
r 3
s 4

In [16]:
# You can use other datatypes like list, dictonaries to create a dataframe

In [17]:
# Real power- Import from different formats

In [18]:
# http://pandas.pydata.org/pandas-docs/version/0.20/io.html

In [19]:
# text, binary format,  csv, json, excel

In [13]:
df2 = pd.read_csv("wine.csv") 
#dataframe_name = pd.read_<format>(filename)

In [15]:
df2.head(2)


Out[15]:
Unnamed: 0 country alcohol deaths heart liver
0 1 Australia 2.5 785 211 15.300000
1 2 Austria 3.9 863 167 45.599998

In [13]:
df2 = pd.read_csv("wine.csv", index_col='Unnamed: 0') 
df2.head(5)


Out[13]:
country alcohol deaths heart liver
1 Australia 2.5 785 211 15.300000
2 Austria 3.9 863 167 45.599998
3 Belg/Lux 2.9 883 131 20.700001
4 Canada 2.4 793 191 16.400000
5 Denmark 2.9 971 220 23.900000

In [23]:
# write
# <dataframe's name>.to_<file_format>(<file_name>)

In [24]:
df2.to_json('wine.json')