In [3]:
import pandas as pd
import numpy as np
In [3]:
pd.Series?
In [4]:
animals=['lion','tiger','cat']
pd.Series(animals)
Out[4]:
In [5]:
numbers=[1,2,3]
pd.Series(numbers)
Out[5]:
In [7]:
##Handle missing data
animals=['lion','tiger',None,'man']
pd.Series(animals)
Out[7]:
In [20]:
##In the above case dtype was object however in case of int/float it will be different
##It is NaN and NaN is not same as None
numbers=[1,2,None,4,8]
a=pd.Series(numbers)
In [21]:
np.isnan(np.nan)
Out[21]:
In [24]:
a.index
Out[24]:
In [26]:
namesRollNUmbers={1:"a",2:"b",3:""}
In [28]:
b=pd.Series(namesRollNUmbers)
b.index
Out[28]:
In [37]:
c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu'])
#c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu','Mombasa'])
a=pd.Series(c,index=['Delhi','Thimpu','Mumbai'])
a.index
a
Out[37]:
In [40]:
animals={'India':'Tiger','USA':'Bald Eagle','UK':'Swan','Africa':'Lion'}
animals=pd.Series(animals)
In [47]:
##To retrieve data with location number
animals.iloc[0]
Out[47]:
In [50]:
##Retrieve data with key
animals.loc['USA']
Out[50]:
In [53]:
##See how the below lines beahave, its so different from Java where encapsultion is important
print(animals[0])
print(animals['USA'])
In [ ]:
#If both keys and location are numbers the what??
primenumbers={1:2,2:3,3:5,4:7}
primenumbers=pd.Series(primenumbers)
print(primenumbers[2])
primenumbers.iloc[2]
In [68]:
numbers=[100,102,34,45]
numbers=pd.Series(numbers)
In [70]:
s=0
for i in numbers:
s+=i
s
Out[70]:
In [71]:
np.sum(numbers)
Out[71]:
In [ ]:
##Which of the above sums is faster???
s=pd.Series(np.random.randint(0,100,1000))
s.head()
In [ ]:
%%timeit -n 100
p=0
for i in s:
p+=i
p
In [ ]:
%%timeit -n 100
p=np.sum(s)
In [ ]:
##Say you want to add 2 to all numbers in your series
##Instead of looping thorugh you can do this directly and its faster
print(s.head())
s+=2
s.head()
In [ ]:
for label,value in s.iteritems():
s.set_value(label,value+2)
s.head()
In [ ]:
%%timeit -n 100
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
s.loc[label]= value+2
In [ ]:
%%timeit -n 100
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
s.loc[label]= value+2
In [ ]:
In [ ]: