Series DS


In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.Series?

In [3]:
animals=['lion','tiger','cat']
pd.Series(animals)


Out[3]:
0     lion
1    tiger
2      cat
dtype: object

In [4]:
numbers=[1,2,3]
pd.Series(numbers)


Out[4]:
0    1
1    2
2    3
dtype: int64

In [5]:
##Handle missing data
animals=['lion','tiger',None,'man']
pd.Series(animals)


Out[5]:
0     lion
1    tiger
2     None
3      man
dtype: object

In [6]:
##In the above case dtype was object however in case of int/float it will be different
##It is NaN and NaN is not same as None
numbers=[1,2,None,4,8]
a=pd.Series(numbers)

In [7]:
np.isnan(np.nan)


Out[7]:
True

In [8]:
a.index


Out[8]:
RangeIndex(start=0, stop=5, step=1)

In [9]:
namesRollNUmbers={1:"a",2:"b",3:""}

In [10]:
b=pd.Series(namesRollNUmbers)
b.index


Out[10]:
Int64Index([1, 2, 3], dtype='int64')

In [11]:
c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu'])
#c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu','Mombasa'])
a=pd.Series(c,index=['Delhi','Thimpu','Mumbai'])
a.index
a


Out[11]:
Delhi      India
Thimpu    Bhutan
Mumbai       NaN
dtype: object

Querying Series


In [12]:
animals={'India':'Tiger','USA':'Bald Eagle','UK':'Swan','Africa':'Lion'}
animals=pd.Series(animals)

In [13]:
##To retrieve data with location number
animals.iloc[0]


Out[13]:
'Lion'

In [14]:
##Retrieve data with key
animals.loc['USA']


Out[14]:
'Bald Eagle'

In [15]:
##See how the below lines beahave, its so different from Java where encapsultion is important
print(animals[0])
print(animals['USA'])


Lion
Bald Eagle

In [16]:
#If both keys and location are numbers the what??

primenumbers={1:2,2:3,3:5,4:7}
primenumbers=pd.Series(primenumbers)
print(primenumbers[2])
primenumbers.iloc[2]


3
Out[16]:
5

In [17]:
numbers=[100,102,34,45]
numbers=pd.Series(numbers)

In [18]:
s=0
for i in numbers:
    s+=i
s


Out[18]:
281

In [19]:
np.sum(numbers)


Out[19]:
281

In [20]:
##Which of the above sums is faster???

s=pd.Series(np.random.randint(0,100,1000))
s.head()


Out[20]:
0    10
1    66
2    67
3    37
4     7
dtype: int64

In [21]:
%%timeit -n 100
p=0
for i in s:
    p+=i
p


100 loops, best of 3: 177 µs per loop

In [22]:
%%timeit -n 100
p=np.sum(s)


100 loops, best of 3: 46.6 µs per loop

In [23]:
##Say you want to add 2 to all numbers in your series
##Instead of looping thorugh you can do this directly and its faster

print(s.head())
s+=2
s.head()


0    10
1    66
2    67
3    37
4     7
dtype: int64
Out[23]:
0    12
1    68
2    69
3    39
4     9
dtype: int64

In [24]:
for label,value in s.iteritems():
    s.set_value(label,value+2)
s.head()


Out[24]:
0    14
1    70
2    71
3    41
4    11
dtype: int64

In [25]:
2+2


Out[25]:
4

In [26]:
##Compareing the two methods of adding a numbers to all the value

%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2


10 loops, best of 3: 1.28 s per loop

In [27]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2


10 loops, best of 3: 416 µs per loop

In [30]:
s=pd.Series([1,2,4])
s[3]='mangesh'

In [31]:
s


Out[31]:
0          1
1          2
2          4
3    mangesh
dtype: object

In [32]:
sports=pd.Series({"Archery":"USA","Football":"Spain","Hockey":"Canada"})

In [33]:
new_sports=pd.Series(["Australia","England","India","SriLanka"],index=["Cricket","Cricket","Cricket","Cricket"])

In [34]:
sports


Out[34]:
Archery        USA
Football     Spain
Hockey      Canada
dtype: object

In [35]:
new_sports


Out[35]:
Cricket    Australia
Cricket      England
Cricket        India
Cricket     SriLanka
dtype: object

In [36]:
all_sports=sports.append(new_sports)

In [37]:
all_sports


Out[37]:
Archery           USA
Football        Spain
Hockey         Canada
Cricket     Australia
Cricket       England
Cricket         India
Cricket      SriLanka
dtype: object

In [38]:
all_sports['Cricket']


Out[38]:
Cricket    Australia
Cricket      England
Cricket        India
Cricket     SriLanka
dtype: object

In [ ]: