>>> print "how "+"are you?"
how are you?
$ python
Python 2.7.12 (default, Jun 29 2016, 14:05:02)
[GCC 4.2.1 Compatible Apple LLVM 7.3.0 (clang-703.0.31)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>>
$ python myprogram.py
$ jupyter notebook
In [1]:
name = "Pepe"
In [2]:
def myfunction(x):
pass
if x > 10:
pass
pass
return "bigger"
else:
pass
pass
return "smaller"
print "This number is: " + myfunction(5)
In [3]:
"""This is a comment that spands for
more than one line"""
# This is a one line comment
print "This line is executed"
In [4]:
import pandas as pd
from time import clock
In [5]:
months = ["Jan", "Feb", 3, 4, "May", "Jun"]
print months[0]
In [6]:
print months[1:3] # slice operator :
In [7]:
print months[-2:]
Similar to lists, sequence of elements that conforms an immutable object.
In [8]:
tup = ('physics', 'chemistry', 1997, 2000)
print tup[0]
In [9]:
print tup[1:3]
In [10]:
"""functions are pieces of code that you can
call/execute, they are defined with the def keyword"""
def hola_mundo():
print "Hola Mundo!"
In [11]:
""" methods are attributes of an object that
you can call over the object with and "." """
s = "How are you"
print s.split(" ")
In [12]:
for numbers in range(1,5):
print numbers
In [13]:
united_kingdom = ["England", "Scotland", "Wales", "N Ireland"]
one = "France"
if one in united_kingdom:
print "UK"
elif one == "France":
print "Not UK. Bon jour!"
else:
print "Not UK"
"house".len()?
len(house)?
In [14]:
help(len)
In [15]:
len("house")
Out[15]:
In [16]:
help(list)
Open source library providing high-performance structures and data analysis tools for the Python programming language.
In [17]:
import pandas as pd
In [18]:
ss = pd.Series([1,2,3],
index = ['a','b','c'])
ss
Out[18]:
Selection
In [19]:
ss = pd.Series([1,2,3],
index = ['a','b','c'])
print ss[0] # as a list
print ss.iloc[0] # by position, integer
print ss.loc['a'] # by label of the index
print ss.ix['a'] # label (priority)
print ss.ix[0] # position if no label
In [20]:
"""Be careful with the slice operator
using positions or labels"""
print ss.iloc[0:2] # positions 0,1
print ss.loc['a':'c'] # labels 'a','b','c'
Built-in methods
In [21]:
pd.Series([1, 2, 3]).mean()
Out[21]:
In [22]:
pd.Series([1, 2, 3]).sum()
Out[22]:
In [23]:
pd.Series([1, 2, 3]).std()
Out[23]:
In [24]:
df = pd.DataFrame(
data =[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=['row1', 'row2', 'row3'],
columns=['col1', 'col2', 'col3'])
df
Out[24]:
Selection
Select columns
In [25]:
df['col1'] # one col => Series
Out[25]:
In [26]:
df[['col1']] # list of cols => DataFrame
Out[26]:
Select rows
In [27]:
df.loc['row1'] # by row using label
Out[27]:
In [28]:
df.iloc[0] # by row using position
Out[28]:
In [29]:
df.ix['row1'] # by row, using label
print df.ix[0] # by row, using position
Combined selection
In [30]:
print df.loc['row1',['col1', 'col3']] # labels
print df.loc[['row1','row3'],'col1' : 'col3']
In [31]:
df.iloc[0:2,[0,2]] # row position 0,1
Out[31]:
In [32]:
print df.ix[0,['col2','col3']] # position & label
print df.ix['row1':'row3', :]
Should I use always .ix()?
.ix() selector gotcha!
In [33]:
df2 = pd.DataFrame(
data =[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=[1, 2, 3],
columns=['col1', 'col2', 'col3'])
print df2.ix[1] # priority is label
# df2.ix[0] ERROR!!
In [34]:
df2 = pd.DataFrame(
data =[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=[1, 2, 3],
columns=['col1', 'col2', 'col3'])
print df2.ix[1:3] # LABELS!! (1,2,3)
In [35]:
# these two dataframes are the same!!
df2 = pd.DataFrame(
data =[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=[1, 2, 3],
columns=[1, 2, 3])
df3 = pd.DataFrame(
data =[[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df3
Out[35]:
DataFrame Selection Summary
In [36]:
df['col1'] # by columns
df.loc['row1'] # by row, using label
df.iloc[0] # by row, using position
df.ix['row2'] # by row, using label
df.ix[1] # by row, using position
Out[36]:
Built-in method
In [37]:
df.mean() # operates by columns (axis=0)
Out[37]:
Pandas Axis
axis | axis | along | each |
---|---|---|---|
axis=1 | axis="columns" | along the columns | for each row |
axis=0 | axis="index" | along the rows | for each column |
In [38]:
df2 = pd.DataFrame(
data =[[1, 2], [4, 5], [7, 8]],
columns=["A", "B"])
df2
Out[38]:
In [39]:
df2.mean(axis=1) # mean for each row
Out[39]:
In [40]:
df2 = pd.DataFrame(
data =[[1, 2], [4, 5], [7, 8]],
columns=["A", "B"])
df2
Out[40]:
In [41]:
df2.drop("A", axis=1) # drop columns for each row
Out[41]: