In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('files/all_claims_files.csv')
In [3]:
df['year'] = df['file_name'].map(lambda x: x[6:10])
In [4]:
df_sum = df.groupby('year')['MEDREIMB_IP'].sum()
In [5]:
df_sum
Out[5]:
In [6]:
# Use the pd.to_datetime function to convert to time series
df_sum.index = pd.to_datetime(df_sum.index)
In [7]:
df_sum.shift() # Shift inpatient costs one year forward
Out[7]:
In [8]:
df_sum.shift(-1) # Lag inpatient costs by a year
Out[8]:
In [9]:
df_sum.cumsum() # Calculate cumulative sum
Out[9]:
In [10]:
# Rolling is the equivalent of groupby for time series
# the window parameter states how many previous values are needed to calculate the recursion
# the rolling two-year sum is calculated below
df_sum.rolling(window=2).sum()
Out[10]:
In [11]:
df_sum.rolling(window=2).mean() # calculate rolling average
Out[11]:
In [12]:
def yoy_ratio(x):
return x[1] / x[0]
In [13]:
df_sum.rolling(2).apply(yoy_ratio) # calculate the year-over-year increase in inpatient costs
Out[13]:
In [14]:
def yoy_trend(x):
return (x[1] / x[0]) - 1
In [15]:
df_sum.rolling(2).apply(yoy_trend) # calculate the period-to-period trend
Out[15]:
In [17]:
def rolling_GM(x):
return (x[0] * x[1]) ** .5
In [18]:
df_sum.rolling(2).apply(rolling_GM) # calculate a custom window function i.e. rolling geometric mean
Out[18]:
In [20]:
df_sum.plot();
In [ ]: