Create a table


In [1]:
import pandas as pd
import numpy as np

In [2]:
data = [['Alex',10,'little',0],['Bob',12,'little',1],['Clarke',13,'little',1],['Tim',13,'big',3],['John',13,'big',5],['John',9,'big',6],['Rob',10,'big',8], ['Brit',8, 'little',10], ['Stella', 11, 'big', 9]]
df = pd.DataFrame(data,columns=['Name','Age','Size','Cumul'],dtype=float)
print(df)


     Name   Age    Size  Cumul
0    Alex  10.0  little    0.0
1     Bob  12.0  little    1.0
2  Clarke  13.0  little    1.0
3     Tim  13.0     big    3.0
4    John  13.0     big    5.0
5    John   9.0     big    6.0
6     Rob  10.0     big    8.0
7    Brit   8.0  little   10.0
8  Stella  11.0     big    9.0

Compute the change in a column between values of a specific type according to another column


In [3]:
def calculate_change_in_field_given_value_in_other_field(df, field_for_computing_change, indicator_field, indicator_value, new_field):
    """calculates the interval change in field cumulated_field and stores it in field."""
    # create a sub table
    sel_table_row_ids = df[indicator_field] == indicator_value
    sel_column = np.array(df.loc[sel_table_row_ids,  field_for_computing_change])
    for index, value in enumerate(sel_column):
        if index>0 and value < sel_column[index-1]:
            sel_column[index] = sel_column[index-1]
    sel_column = sel_column - [0, *sel_column[:-1]]
    df.loc[sel_table_row_ids, new_field] = sel_column

In [4]:
calculate_change_in_field_given_value_in_other_field(df=df, field_for_computing_change="Cumul", indicator_field="Size", indicator_value="big", new_field="Difference")

In [5]:
print(df)


     Name   Age    Size  Cumul  Difference
0    Alex  10.0  little    0.0         NaN
1     Bob  12.0  little    1.0         NaN
2  Clarke  13.0  little    1.0         NaN
3     Tim  13.0     big    3.0         3.0
4    John  13.0     big    5.0         2.0
5    John   9.0     big    6.0         1.0
6     Rob  10.0     big    8.0         2.0
7    Brit   8.0  little   10.0         NaN
8  Stella  11.0     big    9.0         1.0

In [6]:
calculate_change_in_field_given_value_in_other_field(df=df, field_for_computing_change="Cumul", indicator_field="Size", indicator_value="little", new_field="Difference")

In [7]:
print(df)


     Name   Age    Size  Cumul  Difference
0    Alex  10.0  little    0.0         0.0
1     Bob  12.0  little    1.0         1.0
2  Clarke  13.0  little    1.0         0.0
3     Tim  13.0     big    3.0         3.0
4    John  13.0     big    5.0         2.0
5    John   9.0     big    6.0         1.0
6     Rob  10.0     big    8.0         2.0
7    Brit   8.0  little   10.0         9.0
8  Stella  11.0     big    9.0         1.0

In [8]:
for name in df["Name"].unique():
    print(name)


Alex
Bob
Clarke
Tim
John
Rob
Brit
Stella

In [9]:
del(df["Cumul"])
print(df)


     Name   Age    Size  Difference
0    Alex  10.0  little         0.0
1     Bob  12.0  little         1.0
2  Clarke  13.0  little         0.0
3     Tim  13.0     big         3.0
4    John  13.0     big         2.0
5    John   9.0     big         1.0
6     Rob  10.0     big         2.0
7    Brit   8.0  little         9.0
8  Stella  11.0     big         1.0

In [ ]: