Series

Introduction to Pandas Series


In [3]:
# Import Pandas library
import pandas as pd

Create Series From Python List


In [4]:
simple_list = ["one", "two", "three"]

In [5]:
# Create Panda Series from simple python list of strings
pd.Series(data=simple_list)


Out[5]:
0      one
1      two
2    three
dtype: object

Result template:

Index0 Value0
Index1 Value1
dtype: DataType of the values

In [6]:
# Create Panda Series from simple python list of strings
simple_list_of_integers = [1, 2, 3]

In [7]:
pd.Series(simple_list_of_integers)


Out[7]:
0    1
1    2
2    3
dtype: int64

In [8]:
simple_list_of_mix_values = [1, True, "Hello", None, [0, 9, 8], {"name": "Pandas"}]

In [9]:
pd.Series(simple_list_of_mix_values)


Out[9]:
0                       1
1                    True
2                   Hello
3                    None
4               [0, 9, 8]
5    {u'name': u'Pandas'}
dtype: object

Create Series From Python Dictionary


In [10]:
simple_dict = {"first_name": "Steve", 
               "last_name": "Mcurry",
               "age": 17}

In [11]:
# Create Panda Series from simple python dictionary
pd.Series(simple_dict)


Out[11]:
age               17
first_name     Steve
last_name     Mcurry
dtype: object

Each KEY of the dictionary will be used as INDEX of the Series.

Attributes


In [12]:
s = pd.Series(simple_list)

In [13]:
s


Out[13]:
0      one
1      two
2    three
dtype: object

In [14]:
# Values
s.values


Out[14]:
array(['one', 'two', 'three'], dtype=object)

In [15]:
# Index
s.index


Out[15]:
RangeIndex(start=0, stop=3, step=1)

In [16]:
# dtype
s.dtype


Out[16]:
dtype('O')

Most common Data Types:

dtype('O') --> Object
dtype('int64') --> Integer
dtype('float64') --> Float
dtype('bool') --> Bool

Methods


In [17]:
s = pd.Series([1.21, 1.90, 1.55])
s


Out[17]:
0    1.21
1    1.90
2    1.55
dtype: float64

In [18]:
# Sum Method, sum all the values of the Series
s.sum()


Out[18]:
4.6600000000000001

In [19]:
# Product Method
s.product()


Out[19]:
3.56345

In [20]:
# Mean Method
s.mean()


Out[20]:
1.5533333333333335

Parameter & Arguments

Create new Series giving a custom INDEX list.


In [21]:
colors_list = ["red", "blue", "green", "yellow"]
numbers = ["one", "two", "three", "four"]

# pd.Series(colors_list, numbers) 
# pd.Series(colors_list, index=numbers) 
pd.Series(data=colors_list, index=numbers)


Out[21]:
one         red
two        blue
three     green
four     yellow
dtype: object

We can duplciate INDEX.


In [22]:
colors_list = ["red", "blue", "green", "yellow"]
numbers = ["one", "one", "one", "one"]

# pd.Series(colors_list, numbers) 
# pd.Series(colors_list, index=numbers) 
pd.Series(data=colors_list, index=numbers)


Out[22]:
one       red
one      blue
one     green
one    yellow
dtype: object

Other Attributes


In [23]:
s = pd.Series(colors_list)
s


Out[23]:
0       red
1      blue
2     green
3    yellow
dtype: object

In [24]:
# Only UNIQUE values?
s.is_unique


Out[24]:
True

In [25]:
# N Dimension
s.ndim


Out[25]:
1

In [26]:
# Number of (Rows, Columns)
s.shape


Out[26]:
(4,)

In [27]:
# Size
s.size


Out[27]:
4

In [28]:
# Name of the Series
print s.name
s = pd.Series(colors_list, name="Hello")
print s.name
print s


None
Hello
0       red
1      blue
2     green
3    yellow
Name: Hello, dtype: object

sort_values method


In [35]:
s.sort_values().head(1)


Out[35]:
1    blue
Name: Hello, dtype: object

In [37]:
s.sort_values(ascending=False)


Out[37]:
3    yellow
0       red
2     green
1      blue
Name: Hello, dtype: object

inplace Parameter

Used to perform in-place operation, is an elegant alternative to:

s = s.sort_values()

In [40]:
s.sort_values(ascending=False, inplace=True)

In [41]:
s


Out[41]:
3    yellow
0       red
2     green
1      blue
Name: Hello, dtype: object

sort_index method


In [42]:
s.sort_index()


Out[42]:
0       red
1      blue
2     green
3    yellow
Name: Hello, dtype: object

In [43]:
s.sort_index(ascending=False)


Out[43]:
3    yellow
2     green
1      blue
0       red
Name: Hello, dtype: object

In [44]:
s.sort_index(inplace=True)

In [45]:
s


Out[45]:
0       red
1      blue
2     green
3    yellow
Name: Hello, dtype: object

IN Keyword


In [50]:
# Using VALUES
'red' in s.values


Out[50]:
True

In [52]:
# Using INDEX
0 in s
0 in s.index


Out[52]:
True

Get Values By Index


In [53]:
s[0]


Out[53]:
'red'

In [54]:
s.get(0)


Out[54]:
'red'

In [56]:
s.get_value(0)


Out[56]:
'red'

In [58]:
# Get Multiple values by multiple indexes
s[[0,1,2]]


Out[58]:
0      red
1     blue
2    green
Name: Hello, dtype: object

In [60]:
s[0:2]


Out[60]:
0     red
1    blue
Name: Hello, dtype: object

In [64]:
s.get([0,1])


Out[64]:
0     red
1    blue
Name: Hello, dtype: object

In [66]:
s.get(199) #return None

In [68]:
s.get(199, default="suca") # we can set a default value


Out[68]:
'suca'

In [69]:
s.get([1,100], default="You can't see me")


Out[69]:
1      blue
100     NaN
Name: Hello, dtype: object

Math Methods


In [135]:
s = pd.Series([1.21, 1.90, 1.55, 1.98, 4.4, 8.54, 1.21])
s


Out[135]:
0    1.21
1    1.90
2    1.55
3    1.98
4    4.40
5    8.54
6    1.21
dtype: float64

In [94]:
s.count() # counting only values different to NaN


Out[94]:
7

In [95]:
len(s) # counting everything


Out[95]:
7

In [96]:
s.sum()


Out[96]:
20.789999999999999

In [97]:
s.mean()


Out[97]:
2.9699999999999998

In [98]:
s.product()


Out[98]:
320.79809245176

In [99]:
s.std()


Out[99]:
2.6896839963088599

In [100]:
s.min()
# min(s)


Out[100]:
1.21

In [101]:
s.max()
# max(s)


Out[101]:
8.5399999999999991

In [102]:
s.median()


Out[102]:
1.9

In [103]:
s.mode()


Out[103]:
0    1.21
dtype: float64

In [104]:
s.describe()


Out[104]:
count    7.000000
mean     2.970000
std      2.689684
min      1.210000
25%      1.380000
50%      1.900000
75%      3.190000
max      8.540000
dtype: float64

idxmax & idxmin


In [114]:
# Get value at index MAX
index = s.idxmax()
print index, s.get(index)


5 8.54

In [113]:
# Get value at index MIN
index = s.idxmin()
print index, s.get(index)


0 1.21

value_counts


In [116]:
s = pd.Series(data=["Alex", "Pippo", "Vale", "Alex", "Hello"])

In [119]:
# Get occurrences for each value
s.value_counts()


Out[119]:
Alex     2
Vale     1
Hello    1
Pippo    1
dtype: int64

In [121]:
s.value_counts().sum()


Out[121]:
5

In [122]:
s.count() == s.value_counts().sum()


Out[122]:
True

In [133]:
s.value_counts(ascending=True)


Out[133]:
Pippo    1
Hello    1
Vale     1
Alex     2
dtype: int64

apply


In [136]:
s = pd.Series([1.21, 1.90, 1.55, 1.98, 4.4, 8.54, 1.21])
s


Out[136]:
0    1.21
1    1.90
2    1.55
3    1.98
4    4.40
5    8.54
6    1.21
dtype: float64

In [148]:
def apply_very_hard_logic(number):
    if number < 3:
        return "YEAH"
    elif number > 7:
        return "SUPER YEAH"
    return "BETWEEN"

In [149]:
s.apply(apply_very_hard_logic)


Out[149]:
0          YEAH
1          YEAH
2          YEAH
3          YEAH
4       BETWEEN
5    SUPER YEAH
6          YEAH
dtype: object

In [151]:
# For easy stuff use Anonymous functions --> LAMBDA!
# I want to add a $ to the values
s.apply(lambda x: "{} $".format(x))


Out[151]:
0    1.21 $
1     1.9 $
2    1.55 $
3    1.98 $
4     4.4 $
5    8.54 $
6    1.21 $
dtype: object

map


In [154]:
s = pd.Series(data=["Alex", "Pippo", "Vale", "Hello"])
s


Out[154]:
0     Alex
1    Pippo
2     Vale
3     Alex
4    Hello
dtype: object

In [156]:
s2 = pd.Series(data=["Comu", "Pippini", "Gela", "World"], 
               index=["Alex", "Pippo", "Vale", "Hello"])
s2


Out[156]:
Alex        Comu
Pippo    Pippini
Vale        Gela
Hello      World
dtype: object

In [157]:
s.map(s2)


Out[157]:
0       Comu
1    Pippini
2       Gela
3       Comu
4      World
dtype: object

In [159]:
s2_dict = s2.to_dict()

In [160]:
s.map(s2_dict)


Out[160]:
0       Comu
1    Pippini
2       Gela
3       Comu
4      World
dtype: object

In [ ]: