In [3]:
import datetime as dt
import pandas as pd
import numpy.random as rnd
import numpy as np
from functools import reduce

def manyseries_rand_date_diff_keep_year_and_interval(*sdts):
    ranges = list(map(
        lambda sdt:
            {
                "min": sdt.apply(lambda dt: dt.replace(month=1,  day=1 ) - dt),
                "max": sdt.apply(lambda dt: dt.replace(month=12, day=31) - dt),
            },
        sdts
        ))
    intersection = reduce(
        lambda range1, range2:
            {
                "min": pd.concat([range1["min"], range2["min"]], axis=1).max(axis=1),
                "max": pd.concat([range1["max"], range2["max"]], axis=1).min(axis=1),
            },
        ranges
        )
    rand_change = pd.Series(rnd.uniform(size=len(intersection["max"])))*(intersection["max"] - intersection["min"]) + intersection["min"]
    return list(map(lambda sdt: sdt + rand_change, sdts))

def setup_diffs(df1, df2):
    df1['diff_birth_death'] = df1['date_death'] - df1['date_birth']
    df1['diff_birth_death'] = df1['diff_birth_death']/np.timedelta64(1,'D')

    df2['diff_birth_diag_start'] = df2['diag_start'] - df1['date_birth']
    df2['diff_birth_diag_end'] = df2['diag_end'] - df1['date_birth']
    df2['diff_birth_diag_start'] = df2['diff_birth_diag_start']/np.timedelta64(1,'D')
    df2['diff_birth_diag_end'] = df2['diff_birth_diag_end']/np.timedelta64(1,'D')

df1 = pd.DataFrame({'person_id': [1, 2, 3, 4, 5],
                        'date_birth': ['12/30/1961', '05/29/1967', '02/03/1957', '7/27/1959', '01/13/1971'],
                        'date_death': ['07/23/2017', '05/29/2017', '02/03/2015', np.nan,      np.nan]})
df1['date_birth'] = pd.to_datetime(df1['date_birth'])
df1['date_death'] = pd.to_datetime(df1['date_death'])

df2 = pd.DataFrame({'person_id': [1,1,1,2,3],
                    'visit_id':['A1','A2','A3','B1','B2'],
                    'diag_start': ['01/01/2012', '02/25/2017', '02/03/2015', '07/27/2016', '01/13/2011'],
                    'diag_end': ['05/03/2012','05/29/2017','03/03/2015','08/15/2016','02/13/2011']})
df2['diag_start'] = pd.to_datetime(df2['diag_start'])
df2['diag_end'] = pd.to_datetime(df2['diag_end'])
setup_diffs(df1, df2)

display(df1)
display(df2)

series_list = manyseries_rand_date_diff_keep_year_and_interval(
    df1['date_birth'], df1['date_death'], df2['diag_start'], df2['diag_end'])
df1['date_birth'], df1['date_death'], df2['diag_start'], df2['diag_end'] = series_list
setup_diffs(df1, df2)

display(df1)
display(df2)


person_id date_birth date_death diff_birth_death
0 1 1961-12-30 2017-07-23 20294.0
1 2 1967-05-29 2017-05-29 18263.0
2 3 1957-02-03 2015-02-03 21184.0
3 4 1959-07-27 NaT NaN
4 5 1971-01-13 NaT NaN
person_id visit_id diag_start diag_end diff_birth_diag_start diff_birth_diag_end
0 1 A1 2012-01-01 2012-05-03 18264.0 18387.0
1 1 A2 2017-02-25 2017-05-29 18170.0 18263.0
2 1 A3 2015-02-03 2015-03-03 21184.0 21212.0
3 2 B1 2016-07-27 2016-08-15 20820.0 20839.0
4 3 B2 2011-01-13 2011-02-13 14610.0 14641.0
person_id date_birth date_death diff_birth_death
0 1 1961-12-30 16:38:26.295379634 2017-07-23 16:38:26.295379634 20294.0
1 2 1967-06-16 11:21:56.426229281 2017-06-16 11:21:56.426229281 18263.0
2 3 1957-10-15 18:57:33.479382884 2015-10-15 18:57:33.479382884 21184.0
3 4 1959-05-19 20:23:49.830824664 NaT NaN
4 5 1971-05-25 12:49:10.473685890 NaT NaN
person_id visit_id diag_start diag_end diff_birth_diag_start diff_birth_diag_end
0 1 A1 2012-01-01 16:38:26.295379634 2012-05-03 16:38:26.295379634 18264.0 18387.0
1 1 A2 2017-03-15 11:21:56.426229281 2017-06-16 11:21:56.426229281 18170.0 18263.0
2 1 A3 2015-10-15 18:57:33.479382884 2015-11-12 18:57:33.479382884 21184.0 21212.0
3 2 B1 2016-05-19 20:23:49.830824664 2016-06-07 20:23:49.830824664 20820.0 20839.0
4 3 B2 2011-05-25 12:49:10.473685890 2011-06-25 12:49:10.473685890 14610.0 14641.0

In [ ]:
df1

In [ ]: