notebook.community

Edit and run



In [2]:

    
#### Introduction to Data Wrangling with Pandas ####
## Page 1 ##



In [3]:

    
#### Why do we need pandas, why not our excel? ####



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
#so that we can view the graphs inside the notebook



In [2]:

    
#### Fundamental Data Types in Pandas ####



In [3]:

    
# 1. Series
# 2. Data Frame



In [4]:

    
# Series is - 1-D array with corresponding index



In [5]:

    
s1 = pd.Series(range(1,10,2))



In [6]:

    
s1









    Out[6]:





0    1
1    3
2    5
3    7
4    9
dtype: int64



In [7]:

    
s3 = pd.Series({1:2, 2:3})



In [8]:

    
s3









    Out[8]:





1    2
2    3
dtype: int64



In [10]:

    
s2 = pd.Series([1, 2, 3, 4], index=['p', 'q', 'r','s'], name='one')



In [11]:

    
s2









    Out[11]:





p    1
q    2
r    3
s    4
Name: one, dtype: int64



In [8]:

    
# Dataframes are - n-D array with indexing on both rows and columns



In [9]:

    
# Axis indices serve many purposes like:
# Identify data
# Perform automatic data alignment
# Perform faster subsetting



In [12]:

    
df1 = pd.DataFrame(s2)
df1



In [16]:

    
# You can use other datatypes like list, dictonaries to create a dataframe



In [17]:

    
# Real power- Import from different formats



In [18]:

    
# http://pandas.pydata.org/pandas-docs/version/0.20/io.html



In [19]:

    
# text, binary format,  csv, json, excel



In [13]:

    
df2 = pd.read_csv("wine.csv") 
#dataframe_name = pd.read_<format>(filename)



In [15]:

    
df2.head(2)









    Out[15]:






  
    
      
      Unnamed: 0
      country
      alcohol
      deaths
      heart
      liver
    
  
  
    
      0
      1
      Australia
      2.5
      785
      211
      15.300000
    
    
      1
      2
      Austria
      3.9
      863
      167
      45.599998



In [13]:

    
df2 = pd.read_csv("wine.csv", index_col='Unnamed: 0') 
df2.head(5)



In [23]:

    
# write
# <dataframe's name>.to_<file_format>(<file_name>)



In [24]:

    
df2.to_json('wine.json')

	country	alcohol	deaths	heart	liver
1	Australia	2.5	785	211	15.300000
2	Austria	3.9	863	167	45.599998
3	Belg/Lux	2.9	883	131	20.700001
4	Canada	2.4	793	191	16.400000
5	Denmark	2.9	971	220	23.900000