In [2]:
from pandas import *
In [3]:
df = read_csv("pandas_different_indices.txt",sep=r'\s+',names=['DATE','GDP','USRECQ'],index_col=0,parse_dates=True)
In [4]:
df
Out[4]:
I would like to count the number of observation for which USRECQ[DATE+1]==1 and GDP[DATE]>a if GDP[DATE]!='NAN'.
In [30]:
df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > 0)]
Out[30]:
In [33]:
df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > 0.07)]
Out[33]:
In [35]:
df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP < 0.04)]
Out[35]:
So we can take the len
of any of these expressions to get a count
In [36]:
len(df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP < 0.04)])
Out[36]:
In [37]:
len(df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > 0)])
Out[37]:
In [39]:
def count_by_gdp(df,gdp):
''' Count entries where the next entry has USRECQ == 1 and the entry's GDP
is > gdp
'''
return len(df[np.logical_and(df.shift(-1)['USRECQ'] == 1,df.GDP > gdp)])
In [40]:
count_by_gdp(df,0.03)
Out[40]:
In [41]:
count_by_gdp(df,0.07)
Out[41]:
In [43]:
count_by_gdp(df,-0.01)
Out[43]:
In [ ]: