In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [6]:
# change path as needed.
PATH_I = 'all_immigrant_probs.csv'
data_I = pd.read_csv(PATH_I, header=None)

In [9]:
PATH_N_1 = 'all_similars_probs.csv'
data_N_1 = pd.read_csv(PATH_N_1, header=None)

In [8]:
PATH_N_2 = 'all_similars_probs2.csv'
data_N_2 = pd.read_csv(PATH_N_2, header=None)

In [10]:
#Immigrants #956
data_I.head()


Out[10]:
0 1 2 3 4 5 6 7 8 9 ... 947 948 949 950 951 952 953 954 955 956
0 -195.0 0.080685 0.083078 0.105677 NaN 0.029739 0.084379 0.011826 0.017002 0.029653 ... 0.129844 0.019499 0.111091 0.072375 0.119258 0.317903 0.066854 0.118523 NaN 0.009324
1 -185.0 0.024921 0.024944 0.075406 NaN 0.037173 0.046541 0.022118 NaN 0.023581 ... 0.125411 0.106249 0.091641 0.076613 0.109993 0.109533 0.075414 0.079401 NaN 0.023035
2 -175.0 0.048208 0.092144 0.042366 0.043871 0.028122 0.111163 0.026134 0.022043 0.125895 ... 0.131912 NaN 0.032582 0.131267 0.083344 0.206429 0.106499 0.124147 NaN 0.019503
3 -165.0 0.062095 0.058378 0.026325 0.023334 0.029739 0.067194 0.036022 0.027748 0.035788 ... 0.117357 NaN 0.131863 0.164130 0.123599 0.068147 0.076054 0.119952 NaN 0.026576
4 -155.0 0.095869 NaN 0.050343 0.025357 0.032289 0.101354 0.037561 NaN 0.025916 ... 0.116511 0.070994 0.069701 NaN 0.117703 0.033291 0.084819 0.147340 NaN 0.028604

5 rows × 957 columns


In [29]:
#None_Immigrants_1 #931
data_N_1.head()


Out[29]:
0 1 2 3 4 5 6 7 8 9 ... 922 923 924 925 926 927 928 929 930 931
0 -195.0 0.121311 NaN NaN NaN NaN 0.020433 NaN 0.042510 0.087133 ... NaN NaN 0.039678 NaN NaN NaN 0.068890 NaN NaN NaN
1 -185.0 0.073652 NaN NaN NaN 0.007970 0.018552 NaN NaN NaN ... NaN 0.029655 0.032877 NaN NaN 0.028360 0.119165 NaN NaN NaN
2 -175.0 0.061799 NaN NaN NaN NaN 0.027754 NaN NaN 0.078050 ... NaN 0.048898 0.036689 NaN NaN NaN 0.106034 NaN NaN NaN
3 -165.0 0.090105 NaN NaN NaN 0.132286 0.018465 NaN NaN NaN ... NaN 0.028506 0.060827 NaN NaN 0.128656 0.081958 NaN 0.053515 NaN
4 -155.0 0.080662 NaN NaN 0.171025 0.106955 0.023169 NaN 0.021538 0.059804 ... NaN 0.025219 0.032160 NaN 0.081611 0.108948 0.074076 NaN 0.044111 NaN

5 rows × 932 columns


In [12]:
#None_Immigrants_2 #794
data_N_2.head()


Out[12]:
0 1 2 3 4 5 6 7 8 9 ... 785 786 787 788 789 790 791 792 793 794
0 -195.0 0.121311 NaN NaN 0.020433 0.042510 0.087133 NaN 0.056845 NaN ... 0.126769 NaN NaN NaN 0.039678 NaN NaN NaN 0.068890 NaN
1 -185.0 0.073652 NaN 0.007970 0.018552 NaN NaN NaN 0.119205 NaN ... 0.199948 NaN NaN 0.029655 0.032877 NaN NaN 0.028360 0.119165 NaN
2 -175.0 0.061799 NaN NaN 0.027754 NaN 0.078050 NaN 0.076231 NaN ... 0.116489 NaN NaN 0.048898 0.036689 NaN NaN NaN 0.106034 NaN
3 -165.0 0.090105 NaN 0.132286 0.018465 NaN NaN NaN 0.070508 NaN ... 0.114648 0.054111 NaN 0.028506 0.060827 NaN NaN 0.128656 0.081958 0.053515
4 -155.0 0.080662 0.171025 0.106955 0.023169 0.021538 0.059804 NaN 0.131228 NaN ... 0.087998 0.076777 NaN 0.025219 0.032160 NaN 0.081611 0.108948 0.074076 0.044111

5 rows × 795 columns


In [13]:
# plot first two users:
data_I.iloc[:,[0,1,2]].plot(x=0)


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x114f28bd0>

In [14]:
# plot first two users:
data_N_1.iloc[:,[0,1,2]].plot(x=0)


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x11614ec10>

In [15]:
# plot first two users:
data_N_2.iloc[:,[0,1,2]].plot(x=0)


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x1023d6790>

In [18]:
# plot all users. (Immigrants) 
data_I.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')


Out[18]:
<matplotlib.text.Text at 0x11d28e190>

In [19]:
# plot all users. 
data_N_1.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')


Out[19]:
<matplotlib.text.Text at 0x11f586990>

In [20]:
# plot all users. Non_Immigrants 
data_N_2.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')


Out[20]:
<matplotlib.text.Text at 0x121fea210>

In [24]:
# plot overall mean.(Immigrant)
data_I.iloc[:,range(1,len(data))].mean(axis=0).plot()


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x12150c3d0>

In [23]:
# plot overall mean.(None Immigrant 1)
data_N_1.iloc[:,range(1,len(data_N_1))].mean(axis=0).plot()


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x123cc3690>

In [25]:
# plot overall mean.(None Immigrant 2) -> Removed empty column
data_N_2.iloc[:,range(1,len(data_N_2))].mean(axis=0).plot()


Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x123f1ced0>

In [26]:
#immigrants
x = data_I.iloc[:39,0]
y = data_I.iloc[:,range(1,len(data_I))].mean(axis=0)
plt.plot(x ,y)

plt.xlabel('time from date of immigration')
plt.ylabel('depression probability mean')

# calc the trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"g-")


Out[26]:
[<matplotlib.lines.Line2D at 0x1242c14d0>]

In [27]:
#Non Immigrants
x = data_N_2.iloc[:39,0]
y = data_N_2.iloc[:,range(1,len(data_N_2))].mean(axis=0)
plt.plot(x ,y)

plt.xlabel('time from date of immigration')
plt.ylabel('depression probability mean')

# calc the trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"g-")


Out[27]:
[<matplotlib.lines.Line2D at 0x1241e1690>]