In [1]:
import pandas as pd
import numpy as np
In [2]:
pd.Series?
In [3]:
animals=['lion','tiger','cat']
pd.Series(animals)
Out[3]:
In [4]:
numbers=[1,2,3]
pd.Series(numbers)
Out[4]:
In [5]:
##Handle missing data
animals=['lion','tiger',None,'man']
pd.Series(animals)
Out[5]:
In [6]:
##In the above case dtype was object however in case of int/float it will be different
##It is NaN and NaN is not same as None
numbers=[1,2,None,4,8]
a=pd.Series(numbers)
In [7]:
np.isnan(np.nan)
Out[7]:
In [8]:
a.index
Out[8]:
In [9]:
namesRollNUmbers={1:"a",2:"b",3:""}
In [10]:
b=pd.Series(namesRollNUmbers)
b.index
Out[10]:
In [11]:
c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu'])
#c=pd.Series(['India','Pakistan','Bhutan'],index=['Delhi','Islamabad','Thimpu','Mombasa'])
a=pd.Series(c,index=['Delhi','Thimpu','Mumbai'])
a.index
a
Out[11]:
In [12]:
animals={'India':'Tiger','USA':'Bald Eagle','UK':'Swan','Africa':'Lion'}
animals=pd.Series(animals)
In [13]:
##To retrieve data with location number
animals.iloc[0]
Out[13]:
In [14]:
##Retrieve data with key
animals.loc['USA']
Out[14]:
In [15]:
##See how the below lines beahave, its so different from Java where encapsultion is important
print(animals[0])
print(animals['USA'])
In [16]:
#If both keys and location are numbers the what??
primenumbers={1:2,2:3,3:5,4:7}
primenumbers=pd.Series(primenumbers)
print(primenumbers[2])
primenumbers.iloc[2]
Out[16]:
In [17]:
numbers=[100,102,34,45]
numbers=pd.Series(numbers)
In [18]:
s=0
for i in numbers:
s+=i
s
Out[18]:
In [19]:
np.sum(numbers)
Out[19]:
In [20]:
##Which of the above sums is faster???
s=pd.Series(np.random.randint(0,100,1000))
s.head()
Out[20]:
In [21]:
%%timeit -n 100
p=0
for i in s:
p+=i
p
In [22]:
%%timeit -n 100
p=np.sum(s)
In [23]:
##Say you want to add 2 to all numbers in your series
##Instead of looping thorugh you can do this directly and its faster
print(s.head())
s+=2
s.head()
Out[23]:
In [24]:
for label,value in s.iteritems():
s.set_value(label,value+2)
s.head()
Out[24]:
In [25]:
2+2
Out[25]:
In [26]:
##Compareing the two methods of adding a numbers to all the value
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
s.loc[label]= value+2
In [27]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2
In [30]:
s=pd.Series([1,2,4])
s[3]='mangesh'
In [31]:
s
Out[31]:
In [32]:
sports=pd.Series({"Archery":"USA","Football":"Spain","Hockey":"Canada"})
In [33]:
new_sports=pd.Series(["Australia","England","India","SriLanka"],index=["Cricket","Cricket","Cricket","Cricket"])
In [34]:
sports
Out[34]:
In [35]:
new_sports
Out[35]:
In [36]:
all_sports=sports.append(new_sports)
In [37]:
all_sports
Out[37]:
In [38]:
all_sports['Cricket']
Out[38]:
In [ ]: