In [1]:
import pandas as pd
import numpy as np
In [2]:
data = [['Alex',10,'little',0],['Bob',12,'little',1],['Clarke',13,'little',1],['Tim',13,'big',3],['John',13,'big',5],['John',9,'big',6],['Rob',10,'big',8], ['Brit',8, 'little',10], ['Stella', 11, 'big', 9]]
df = pd.DataFrame(data,columns=['Name','Age','Size','Cumul'],dtype=float)
print(df)
In [3]:
def calculate_change_in_field_given_value_in_other_field(df, field_for_computing_change, indicator_field, indicator_value, new_field):
"""calculates the interval change in field cumulated_field and stores it in field."""
# create a sub table
sel_table_row_ids = df[indicator_field] == indicator_value
sel_column = np.array(df.loc[sel_table_row_ids, field_for_computing_change])
for index, value in enumerate(sel_column):
if index>0 and value < sel_column[index-1]:
sel_column[index] = sel_column[index-1]
sel_column = sel_column - [0, *sel_column[:-1]]
df.loc[sel_table_row_ids, new_field] = sel_column
In [4]:
calculate_change_in_field_given_value_in_other_field(df=df, field_for_computing_change="Cumul", indicator_field="Size", indicator_value="big", new_field="Difference")
In [5]:
print(df)
In [6]:
calculate_change_in_field_given_value_in_other_field(df=df, field_for_computing_change="Cumul", indicator_field="Size", indicator_value="little", new_field="Difference")
In [7]:
print(df)
In [8]:
for name in df["Name"].unique():
print(name)
In [9]:
del(df["Cumul"])
print(df)
In [ ]: