In [3]:
# Import Pandas library
import pandas as pd
In [4]:
simple_list = ["one", "two", "three"]
In [5]:
# Create Panda Series from simple python list of strings
pd.Series(data=simple_list)
Out[5]:
Result template:
Index0 Value0
Index1 Value1
dtype: DataType of the values
In [6]:
# Create Panda Series from simple python list of strings
simple_list_of_integers = [1, 2, 3]
In [7]:
pd.Series(simple_list_of_integers)
Out[7]:
In [8]:
simple_list_of_mix_values = [1, True, "Hello", None, [0, 9, 8], {"name": "Pandas"}]
In [9]:
pd.Series(simple_list_of_mix_values)
Out[9]:
In [10]:
simple_dict = {"first_name": "Steve",
"last_name": "Mcurry",
"age": 17}
In [11]:
# Create Panda Series from simple python dictionary
pd.Series(simple_dict)
Out[11]:
Each KEY of the dictionary will be used as INDEX of the Series.
In [12]:
s = pd.Series(simple_list)
In [13]:
s
Out[13]:
In [14]:
# Values
s.values
Out[14]:
In [15]:
# Index
s.index
Out[15]:
In [16]:
# dtype
s.dtype
Out[16]:
Most common Data Types:
dtype('O') --> Object
dtype('int64') --> Integer
dtype('float64') --> Float
dtype('bool') --> Bool
In [17]:
s = pd.Series([1.21, 1.90, 1.55])
s
Out[17]:
In [18]:
# Sum Method, sum all the values of the Series
s.sum()
Out[18]:
In [19]:
# Product Method
s.product()
Out[19]:
In [20]:
# Mean Method
s.mean()
Out[20]:
In [21]:
colors_list = ["red", "blue", "green", "yellow"]
numbers = ["one", "two", "three", "four"]
# pd.Series(colors_list, numbers)
# pd.Series(colors_list, index=numbers)
pd.Series(data=colors_list, index=numbers)
Out[21]:
We can duplciate INDEX.
In [22]:
colors_list = ["red", "blue", "green", "yellow"]
numbers = ["one", "one", "one", "one"]
# pd.Series(colors_list, numbers)
# pd.Series(colors_list, index=numbers)
pd.Series(data=colors_list, index=numbers)
Out[22]:
In [23]:
s = pd.Series(colors_list)
s
Out[23]:
In [24]:
# Only UNIQUE values?
s.is_unique
Out[24]:
In [25]:
# N Dimension
s.ndim
Out[25]:
In [26]:
# Number of (Rows, Columns)
s.shape
Out[26]:
In [27]:
# Size
s.size
Out[27]:
In [28]:
# Name of the Series
print s.name
s = pd.Series(colors_list, name="Hello")
print s.name
print s
In [35]:
s.sort_values().head(1)
Out[35]:
In [37]:
s.sort_values(ascending=False)
Out[37]:
In [40]:
s.sort_values(ascending=False, inplace=True)
In [41]:
s
Out[41]:
In [42]:
s.sort_index()
Out[42]:
In [43]:
s.sort_index(ascending=False)
Out[43]:
In [44]:
s.sort_index(inplace=True)
In [45]:
s
Out[45]:
In [50]:
# Using VALUES
'red' in s.values
Out[50]:
In [52]:
# Using INDEX
0 in s
0 in s.index
Out[52]:
In [53]:
s[0]
Out[53]:
In [54]:
s.get(0)
Out[54]:
In [56]:
s.get_value(0)
Out[56]:
In [58]:
# Get Multiple values by multiple indexes
s[[0,1,2]]
Out[58]:
In [60]:
s[0:2]
Out[60]:
In [64]:
s.get([0,1])
Out[64]:
In [66]:
s.get(199) #return None
In [68]:
s.get(199, default="suca") # we can set a default value
Out[68]:
In [69]:
s.get([1,100], default="You can't see me")
Out[69]:
In [135]:
s = pd.Series([1.21, 1.90, 1.55, 1.98, 4.4, 8.54, 1.21])
s
Out[135]:
In [94]:
s.count() # counting only values different to NaN
Out[94]:
In [95]:
len(s) # counting everything
Out[95]:
In [96]:
s.sum()
Out[96]:
In [97]:
s.mean()
Out[97]:
In [98]:
s.product()
Out[98]:
In [99]:
s.std()
Out[99]:
In [100]:
s.min()
# min(s)
Out[100]:
In [101]:
s.max()
# max(s)
Out[101]:
In [102]:
s.median()
Out[102]:
In [103]:
s.mode()
Out[103]:
In [104]:
s.describe()
Out[104]:
In [114]:
# Get value at index MAX
index = s.idxmax()
print index, s.get(index)
In [113]:
# Get value at index MIN
index = s.idxmin()
print index, s.get(index)
In [116]:
s = pd.Series(data=["Alex", "Pippo", "Vale", "Alex", "Hello"])
In [119]:
# Get occurrences for each value
s.value_counts()
Out[119]:
In [121]:
s.value_counts().sum()
Out[121]:
In [122]:
s.count() == s.value_counts().sum()
Out[122]:
In [133]:
s.value_counts(ascending=True)
Out[133]:
In [136]:
s = pd.Series([1.21, 1.90, 1.55, 1.98, 4.4, 8.54, 1.21])
s
Out[136]:
In [148]:
def apply_very_hard_logic(number):
if number < 3:
return "YEAH"
elif number > 7:
return "SUPER YEAH"
return "BETWEEN"
In [149]:
s.apply(apply_very_hard_logic)
Out[149]:
In [151]:
# For easy stuff use Anonymous functions --> LAMBDA!
# I want to add a $ to the values
s.apply(lambda x: "{} $".format(x))
Out[151]:
In [154]:
s = pd.Series(data=["Alex", "Pippo", "Vale", "Hello"])
s
Out[154]:
In [156]:
s2 = pd.Series(data=["Comu", "Pippini", "Gela", "World"],
index=["Alex", "Pippo", "Vale", "Hello"])
s2
Out[156]:
In [157]:
s.map(s2)
Out[157]:
In [159]:
s2_dict = s2.to_dict()
In [160]:
s.map(s2_dict)
Out[160]:
In [ ]: