notebook.community

Edit and run



In [5]:

    
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline



In [6]:

    
# change path as needed.
PATH_I = 'all_immigrant_probs.csv'
data_I = pd.read_csv(PATH_I, header=None)



In [9]:

    
PATH_N_1 = 'all_similars_probs.csv'
data_N_1 = pd.read_csv(PATH_N_1, header=None)



In [8]:

    
PATH_N_2 = 'all_similars_probs2.csv'
data_N_2 = pd.read_csv(PATH_N_2, header=None)



In [10]:

    
#Immigrants #956
data_I.head()









    Out[10]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      947
      948
      949
      950
      951
      952
      953
      954
      955
      956
    
  
  
    
      0
      -195.0
      0.080685
      0.083078
      0.105677
      NaN
      0.029739
      0.084379
      0.011826
      0.017002
      0.029653
      ...
      0.129844
      0.019499
      0.111091
      0.072375
      0.119258
      0.317903
      0.066854
      0.118523
      NaN
      0.009324
    
    
      1
      -185.0
      0.024921
      0.024944
      0.075406
      NaN
      0.037173
      0.046541
      0.022118
      NaN
      0.023581
      ...
      0.125411
      0.106249
      0.091641
      0.076613
      0.109993
      0.109533
      0.075414
      0.079401
      NaN
      0.023035
    
    
      2
      -175.0
      0.048208
      0.092144
      0.042366
      0.043871
      0.028122
      0.111163
      0.026134
      0.022043
      0.125895
      ...
      0.131912
      NaN
      0.032582
      0.131267
      0.083344
      0.206429
      0.106499
      0.124147
      NaN
      0.019503
    
    
      3
      -165.0
      0.062095
      0.058378
      0.026325
      0.023334
      0.029739
      0.067194
      0.036022
      0.027748
      0.035788
      ...
      0.117357
      NaN
      0.131863
      0.164130
      0.123599
      0.068147
      0.076054
      0.119952
      NaN
      0.026576
    
    
      4
      -155.0
      0.095869
      NaN
      0.050343
      0.025357
      0.032289
      0.101354
      0.037561
      NaN
      0.025916
      ...
      0.116511
      0.070994
      0.069701
      NaN
      0.117703
      0.033291
      0.084819
      0.147340
      NaN
      0.028604
    
  

5 rows × 957 columns



In [29]:

    
#None_Immigrants_1 #931
data_N_1.head()









    Out[29]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      922
      923
      924
      925
      926
      927
      928
      929
      930
      931
    
  
  
    
      0
      -195.0
      0.121311
      NaN
      NaN
      NaN
      NaN
      0.020433
      NaN
      0.042510
      0.087133
      ...
      NaN
      NaN
      0.039678
      NaN
      NaN
      NaN
      0.068890
      NaN
      NaN
      NaN
    
    
      1
      -185.0
      0.073652
      NaN
      NaN
      NaN
      0.007970
      0.018552
      NaN
      NaN
      NaN
      ...
      NaN
      0.029655
      0.032877
      NaN
      NaN
      0.028360
      0.119165
      NaN
      NaN
      NaN
    
    
      2
      -175.0
      0.061799
      NaN
      NaN
      NaN
      NaN
      0.027754
      NaN
      NaN
      0.078050
      ...
      NaN
      0.048898
      0.036689
      NaN
      NaN
      NaN
      0.106034
      NaN
      NaN
      NaN
    
    
      3
      -165.0
      0.090105
      NaN
      NaN
      NaN
      0.132286
      0.018465
      NaN
      NaN
      NaN
      ...
      NaN
      0.028506
      0.060827
      NaN
      NaN
      0.128656
      0.081958
      NaN
      0.053515
      NaN
    
    
      4
      -155.0
      0.080662
      NaN
      NaN
      0.171025
      0.106955
      0.023169
      NaN
      0.021538
      0.059804
      ...
      NaN
      0.025219
      0.032160
      NaN
      0.081611
      0.108948
      0.074076
      NaN
      0.044111
      NaN
    
  

5 rows × 932 columns



In [12]:

    
#None_Immigrants_2 #794
data_N_2.head()









    Out[12]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      785
      786
      787
      788
      789
      790
      791
      792
      793
      794
    
  
  
    
      0
      -195.0
      0.121311
      NaN
      NaN
      0.020433
      0.042510
      0.087133
      NaN
      0.056845
      NaN
      ...
      0.126769
      NaN
      NaN
      NaN
      0.039678
      NaN
      NaN
      NaN
      0.068890
      NaN
    
    
      1
      -185.0
      0.073652
      NaN
      0.007970
      0.018552
      NaN
      NaN
      NaN
      0.119205
      NaN
      ...
      0.199948
      NaN
      NaN
      0.029655
      0.032877
      NaN
      NaN
      0.028360
      0.119165
      NaN
    
    
      2
      -175.0
      0.061799
      NaN
      NaN
      0.027754
      NaN
      0.078050
      NaN
      0.076231
      NaN
      ...
      0.116489
      NaN
      NaN
      0.048898
      0.036689
      NaN
      NaN
      NaN
      0.106034
      NaN
    
    
      3
      -165.0
      0.090105
      NaN
      0.132286
      0.018465
      NaN
      NaN
      NaN
      0.070508
      NaN
      ...
      0.114648
      0.054111
      NaN
      0.028506
      0.060827
      NaN
      NaN
      0.128656
      0.081958
      0.053515
    
    
      4
      -155.0
      0.080662
      0.171025
      0.106955
      0.023169
      0.021538
      0.059804
      NaN
      0.131228
      NaN
      ...
      0.087998
      0.076777
      NaN
      0.025219
      0.032160
      NaN
      0.081611
      0.108948
      0.074076
      0.044111
    
  

5 rows × 795 columns



In [13]:

    
# plot first two users:
data_I.iloc[:,[0,1,2]].plot(x=0)









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x114f28bd0>



In [14]:

    
# plot first two users:
data_N_1.iloc[:,[0,1,2]].plot(x=0)









    Out[14]:





<matplotlib.axes._subplots.AxesSubplot at 0x11614ec10>



In [15]:

    
# plot first two users:
data_N_2.iloc[:,[0,1,2]].plot(x=0)









    Out[15]:





<matplotlib.axes._subplots.AxesSubplot at 0x1023d6790>



In [18]:

    
# plot all users. (Immigrants) 
data_I.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')









    Out[18]:





<matplotlib.text.Text at 0x11d28e190>



In [19]:

    
# plot all users. 
data_N_1.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')









    Out[19]:





<matplotlib.text.Text at 0x11f586990>



In [20]:

    
# plot all users. Non_Immigrants 
data_N_2.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')









    Out[20]:





<matplotlib.text.Text at 0x121fea210>



In [24]:

    
# plot overall mean.(Immigrant)
data_I.iloc[:,range(1,len(data))].mean(axis=0).plot()









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x12150c3d0>



In [23]:

    
# plot overall mean.(None Immigrant 1)
data_N_1.iloc[:,range(1,len(data_N_1))].mean(axis=0).plot()









    Out[23]:





<matplotlib.axes._subplots.AxesSubplot at 0x123cc3690>



In [25]:

    
# plot overall mean.(None Immigrant 2) -> Removed empty column
data_N_2.iloc[:,range(1,len(data_N_2))].mean(axis=0).plot()









    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x123f1ced0>



In [26]:

    
#immigrants
x = data_I.iloc[:39,0]
y = data_I.iloc[:,range(1,len(data_I))].mean(axis=0)
plt.plot(x ,y)

plt.xlabel('time from date of immigration')
plt.ylabel('depression probability mean')

# calc the trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"g-")









    Out[26]:





[<matplotlib.lines.Line2D at 0x1242c14d0>]



In [27]:

    
#Non Immigrants
x = data_N_2.iloc[:39,0]
y = data_N_2.iloc[:,range(1,len(data_N_2))].mean(axis=0)
plt.plot(x ,y)

plt.xlabel('time from date of immigration')
plt.ylabel('depression probability mean')

# calc the trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"g-")









    Out[27]:





[<matplotlib.lines.Line2D at 0x1241e1690>]

	0	1	2	3	4	5	6	7	8	9	...	947	948	949	950	951	952	953	954	955	956
0	-195.0	0.080685	0.083078	0.105677	NaN	0.029739	0.084379	0.011826	0.017002	0.029653	...	0.129844	0.019499	0.111091	0.072375	0.119258	0.317903	0.066854	0.118523	NaN	0.009324
1	-185.0	0.024921	0.024944	0.075406	NaN	0.037173	0.046541	0.022118	NaN	0.023581	...	0.125411	0.106249	0.091641	0.076613	0.109993	0.109533	0.075414	0.079401	NaN	0.023035
2	-175.0	0.048208	0.092144	0.042366	0.043871	0.028122	0.111163	0.026134	0.022043	0.125895	...	0.131912	NaN	0.032582	0.131267	0.083344	0.206429	0.106499	0.124147	NaN	0.019503
3	-165.0	0.062095	0.058378	0.026325	0.023334	0.029739	0.067194	0.036022	0.027748	0.035788	...	0.117357	NaN	0.131863	0.164130	0.123599	0.068147	0.076054	0.119952	NaN	0.026576
4	-155.0	0.095869	NaN	0.050343	0.025357	0.032289	0.101354	0.037561	NaN	0.025916	...	0.116511	0.070994	0.069701	NaN	0.117703	0.033291	0.084819	0.147340	NaN	0.028604

	0	1	2	3	4	5	6	7	8	9	...	922	923	924	925	926	927	928	929	930	931
0	-195.0	0.121311	NaN	NaN	NaN	NaN	0.020433	NaN	0.042510	0.087133	...	NaN	NaN	0.039678	NaN	NaN	NaN	0.068890	NaN	NaN	NaN
1	-185.0	0.073652	NaN	NaN	NaN	0.007970	0.018552	NaN	NaN	NaN	...	NaN	0.029655	0.032877	NaN	NaN	0.028360	0.119165	NaN	NaN	NaN
2	-175.0	0.061799	NaN	NaN	NaN	NaN	0.027754	NaN	NaN	0.078050	...	NaN	0.048898	0.036689	NaN	NaN	NaN	0.106034	NaN	NaN	NaN
3	-165.0	0.090105	NaN	NaN	NaN	0.132286	0.018465	NaN	NaN	NaN	...	NaN	0.028506	0.060827	NaN	NaN	0.128656	0.081958	NaN	0.053515	NaN
4	-155.0	0.080662	NaN	NaN	0.171025	0.106955	0.023169	NaN	0.021538	0.059804	...	NaN	0.025219	0.032160	NaN	0.081611	0.108948	0.074076	NaN	0.044111	NaN