In [2]:
from pandas import *

In [3]:
df = read_csv("pandas_different_indices.txt",sep=r'\s+',names=['DATE','GDP','USRECQ'],index_col=0,parse_dates=True)

In [4]:
df


Out[4]:
GDP USRECQ
DATE
1947-01-01 NaN 0
1947-04-01 NaN 0
1947-07-01 NaN 0
1947-10-01 NaN 0
1948-01-01 0.095023 0
1948-04-01 0.107998 0
1948-07-01 0.117553 0
1948-10-01 0.078371 0
1949-01-01 0.034560 1
1949-04-01 -0.004397 1

I would like to count the number of observation for which USRECQ[DATE+1]==1 and GDP[DATE]>a if GDP[DATE]!='NAN'.


In [30]:
df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > 0)]


Out[30]:
GDP USRECQ
DATE
1948-10-01 0.078371 0
1949-01-01 0.034560 1

In [33]:
df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > 0.07)]


Out[33]:
GDP USRECQ
DATE
1948-10-01 0.078371 0

In [35]:
df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP < 0.04)]


Out[35]:
GDP USRECQ
DATE
1949-01-01 0.03456 1

So we can take the len of any of these expressions to get a count


In [36]:
len(df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP < 0.04)])


Out[36]:
1

In [37]:
len(df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > 0)])


Out[37]:
2

In [39]:
def count_by_gdp(df,gdp):
    ''' Count entries where the next entry has USRECQ == 1 and the entry's GDP
        is > gdp
    '''
    return len(df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > gdp)])

In [40]:
count_by_gdp(df,0.03)


Out[40]:
2

In [41]:
count_by_gdp(df,0.07)


Out[41]:
1

In [43]:
count_by_gdp(df,-0.01)


Out[43]:
2

In [ ]: