In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
In [ ]:
# create a Series with an arbitrary list
s = pd.Series([7, 'Pi', 3.14, -3233432, 'Happy Learning!'])
s
In [ ]:
s = pd.Series([7, 'Pi', 3.14, -3233432, 'Happy Learning!'],
index=['A', 'Z', 'C', 'Y', 'E'])
s
In [ ]:
d = {'Pagri': 4600, 'Tanggulashan': 4587, 'Ukdungle': 4659, 'Colquechaca': 4692,
'Hunza Khunjerab Pass': 4693, 'El Aguilar': 4895, 'Wenquan': 5019, 'La Rinconada': 5099}
cities = pd.Series(d)
cities
In [ ]:
cities['Pagri']
In [ ]:
cities[['Pagri', 'Colquechaca', 'Wenquan']]
In [ ]:
cities[cities > 4900]
In [ ]:
greater_than_4900 = cities > 4900
print greater_than_4900
print '\n'
print cities[greater_than_4900]
In [ ]:
# changing based on the index
print 'Old value:', cities['Pagri']
cities['Pagri'] = 4990
print 'New value:', cities['Pagri']
In [ ]:
# changing values using boolean logic
print cities[cities < 4900]
print '\n'
cities[cities < 4900] = 4890
print cities[cities < 4900]
In [ ]:
print 'Wenquan' in cities
print 'Pune' in cities
In [ ]:
# divide city values by 3
cities / 3
In [ ]:
# square city values
np.square(cities)
In [ ]:
print cities[['Pagri', 'Colquechaca', 'Wenquan']]
print '\n'
print cities[['Pagri', 'Ukdungle', 'Tanggulashan']]
print '\n'
print cities[['Pagri', 'Colquechaca', 'Wenquan']] + cities[['Pagri', 'Ukdungle', 'Tanggulashan']]
In [ ]:
cities['Pagri'] = np.nan
In [ ]:
cities
In [ ]:
cities.notnull()
In [ ]:
cities.isnull()
In [ ]:
print cities[cities.isnull()]
In [ ]:
data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
'wins': [11, 8, 10, 15, 11, 6, 10, 4],
'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
football = pd.DataFrame(data, columns=['year', 'team', 'wins', 'losses'])
print football
In [ ]:
! head -n 5 /Users/aditya/Desktop/MIT/data/body.csv
In [ ]:
frm_csv = pd.read_csv('data/body.csv')
print frm_csv.head(n=5)
In [ ]:
colnames = ['Date', 'Weight', 'BMI', 'Fat' , 'BP', 'RHR' ,'DS']
frm_csv = pd.read_csv('data/body.csv',
na_values=[0, '0/0'],
sep=',',
parse_dates=[0],
header = 0,
names=colnames)
print frm_csv.head(n=10)
In [ ]:
print frm_csv.describe()
In [ ]:
print frm_csv.dtypes
In [ ]:
print frm_csv.tail()
In [ ]:
print frm_csv[10:15]
In [ ]:
frm_csv['Weight'].head()
In [ ]:
frm_csv[['Date', 'Weight']].head()
In [ ]:
frm_csv[frm_csv.Weight > 65].head()
In [ ]:
condition = frm_csv.Weight > 65
print condition[:5]
In [ ]:
frm_csv.Date[frm_csv.Weight > 65].head()