In [ ]:
import pandas as pd
from pandas import dataframe

In [8]:
f = lambda x: x
double_x = lambda x: 2*x

In [3]:
def identity(x):
    return x

In [4]:
def double (x):
    return 2*x

What is this?


In [10]:
identity(5)


Out[10]:
5

In [ ]:
double_x(5)

In [9]:
from pandas import DataFrame, Series, Index
d = {
     'a':10,
     'b':20,
     'c':30
     }
s = Series(d, dtype='object')
s


Out[9]:
a    10
b    20
c    30
dtype: object

In [8]:
def test(series):
    """Normalized Shannon Index"""
    # a series in which all the entries are equal should result in normalized entropy of 1.0
    
    # eliminate 0s
    series1 = series[series!=0]
    
    if len(series) > 1:
        # calculate the maximum possible entropy for given length of input series
        max_s = -np.log(1.0/len(series))
    
        total = float(sum(series1))
        p = series1.astype('float')/float(total)

#         p_other = series1.astype('float')/float(total)
#         E_other = -p*np.log(p))/max_s
        
        return sum(-p*np.log(p))/max_s
    else:
        return 0.0
    
    
import numpy as np
s['c']


Out[8]:
30L

In [1]:
#Prabha: calculate
couties_df.P0010001.apply(lambda n: n)

#to apply to a column
def double(x):
    return 2*x
counties_df.P0010001.apply(double)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-ec60bdbfc6fa> in <module>()
      1 #Prabha: calculate
----> 2 couties_df.P0010001.apply(lambda n: n)
      3 
      4 #to apply to a column
      5 def double(x):

NameError: name 'couties_df' is not defined

In [ ]:
# islice('ABCDEFG', 2) --> A B
    # islice('ABCDEFG', 2, 4) --> C D
    # islice('ABCDEFGm', 2, None) --> C D E F G
    # islice('ABCDEFG', 0, None, 2) --> A C E G

In [ ]:
output of sum(df[df[1].str(startswith('C')][2]]) ??