In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [6]:
# change path as needed.
PATH_I = 'all_immigrant_probs.csv'
data_I = pd.read_csv(PATH_I, header=None)
In [9]:
PATH_N_1 = 'all_similars_probs.csv'
data_N_1 = pd.read_csv(PATH_N_1, header=None)
In [8]:
PATH_N_2 = 'all_similars_probs2.csv'
data_N_2 = pd.read_csv(PATH_N_2, header=None)
In [10]:
#Immigrants #956
data_I.head()
Out[10]:
In [29]:
#None_Immigrants_1 #931
data_N_1.head()
Out[29]:
In [12]:
#None_Immigrants_2 #794
data_N_2.head()
Out[12]:
In [13]:
# plot first two users:
data_I.iloc[:,[0,1,2]].plot(x=0)
Out[13]:
In [14]:
# plot first two users:
data_N_1.iloc[:,[0,1,2]].plot(x=0)
Out[14]:
In [15]:
# plot first two users:
data_N_2.iloc[:,[0,1,2]].plot(x=0)
Out[15]:
In [18]:
# plot all users. (Immigrants)
data_I.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')
Out[18]:
In [19]:
# plot all users.
data_N_1.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')
Out[19]:
In [20]:
# plot all users. Non_Immigrants
data_N_2.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')
Out[20]:
In [24]:
# plot overall mean.(Immigrant)
data_I.iloc[:,range(1,len(data))].mean(axis=0).plot()
Out[24]:
In [23]:
# plot overall mean.(None Immigrant 1)
data_N_1.iloc[:,range(1,len(data_N_1))].mean(axis=0).plot()
Out[23]:
In [25]:
# plot overall mean.(None Immigrant 2) -> Removed empty column
data_N_2.iloc[:,range(1,len(data_N_2))].mean(axis=0).plot()
Out[25]:
In [26]:
#immigrants
x = data_I.iloc[:39,0]
y = data_I.iloc[:,range(1,len(data_I))].mean(axis=0)
plt.plot(x ,y)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability mean')
# calc the trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"g-")
Out[26]:
In [27]:
#Non Immigrants
x = data_N_2.iloc[:39,0]
y = data_N_2.iloc[:,range(1,len(data_N_2))].mean(axis=0)
plt.plot(x ,y)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability mean')
# calc the trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"g-")
Out[27]: