Series DS


In [3]:
import pandas as pd
import numpy as np

In [3]:
pd.Series?

In [4]:
animals=['lion','tiger','cat']
pd.Series(animals)


Out[4]:
0     lion
1    tiger
2      cat
dtype: object

In [5]:
numbers=[1,2,3]
pd.Series(numbers)


Out[5]:
0    1
1    2
2    3
dtype: int64

In [7]:
##Handle missing data
animals=['lion','tiger',None,'man']
pd.Series(animals)


Out[7]:
0     lion
1    tiger
2     None
3      man
dtype: object

In [20]:
##In the above case dtype was object however in case of int/float it will be different
##It is NaN and NaN is not same as None
numbers=[1,2,None,4,8]
a=pd.Series(numbers)

In [21]:
np.isnan(np.nan)


Out[21]:
True

In [24]:
a.index


Out[24]:
RangeIndex(start=0, stop=5, step=1)

In [26]:
namesRollNUmbers={1:"a",2:"b",3:""}

In [28]:
b=pd.Series(namesRollNUmbers)
b.index


Out[28]:
Int64Index([1, 2, 3], dtype='int64')

In [37]:
c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu'])
#c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu','Mombasa'])
a=pd.Series(c,index=['Delhi','Thimpu','Mumbai'])
a.index
a


Out[37]:
Delhi      India
Thimpu    Bhutan
Mumbai       NaN
dtype: object

Querying Series


In [40]:
animals={'India':'Tiger','USA':'Bald Eagle','UK':'Swan','Africa':'Lion'}
animals=pd.Series(animals)

In [47]:
##To retrieve data with location number
animals.iloc[0]


Out[47]:
'Lion'

In [50]:
##Retrieve data with key
animals.loc['USA']


Out[50]:
'Bald Eagle'

In [53]:
##See how the below lines beahave, its so different from Java where encapsultion is important
print(animals[0])
print(animals['USA'])


Lion
Bald Eagle

In [ ]:
#If both keys and location are numbers the what??

primenumbers={1:2,2:3,3:5,4:7}
primenumbers=pd.Series(primenumbers)
print(primenumbers[2])
primenumbers.iloc[2]

In [68]:
numbers=[100,102,34,45]
numbers=pd.Series(numbers)

In [70]:
s=0
for i in numbers:
    s+=i
s


Out[70]:
281

In [71]:
np.sum(numbers)


Out[71]:
281

In [ ]:
##Which of the above sums is faster???

s=pd.Series(np.random.randint(0,100,1000))
s.head()

In [ ]:
%%timeit -n 100
p=0
for i in s:
    p+=i
p

In [ ]:
%%timeit -n 100
p=np.sum(s)

In [ ]:
##Say you want to add 2 to all numbers in your series
##Instead of looping thorugh you can do this directly and its faster

print(s.head())
s+=2
s.head()

In [ ]:
for label,value in s.iteritems():
    s.set_value(label,value+2)
s.head()

In [ ]:
%%timeit -n 100
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2

In [ ]:
%%timeit -n 100
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2

In [ ]:


In [ ]: