In [40]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
from mpl_toolkits.mplot3d import Axes3D
from sklearn.model_selection import train_test_split
import warnings
import math


warnings.filterwarnings('ignore')
%matplotlib inline

In [41]:
df_data = pd.read_csv('./data/encirclement/AW_RPV_lassy.csv')

In [ ]:


In [ ]:


In [ ]:


In [42]:
df_data.describe()


Out[42]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar
count 1951.000000 1951.000000 1951.000000 1951.000000 1951.000000 1951.000000 1951.000000
mean 58.143004 10090.904664 -33.095125 -10.197815 3.863532 1.225525 143.094823
std 33.707544 3463.515161 2.824607 8.407530 10.205477 0.644013 19.772935
min 0.000000 4941.000000 -40.462500 -23.121200 -12.477900 0.000000 102.000000
25% 29.000000 6760.000000 -34.900500 -18.791000 -5.839210 1.000000 126.000000
50% 58.000000 9958.000000 -32.744900 -9.663790 3.911340 1.000000 144.000000
75% 87.000000 13270.000000 -31.154650 -1.954120 13.274100 1.000000 157.000000
max 116.000000 16047.000000 -27.047900 2.138300 19.653700 3.000000 184.000000

In [43]:
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(df_data['X'], df_data['Y'], df_data['Z'])
plt.show()



In [44]:
df_data_scalars = pd.concat([df_data['MainVertexSeq'], df_data['MeshScalar']], axis=1)

In [45]:
ax = sns.boxplot(x="MainVertexSeq", y="MeshScalar", data=df_data_scalars)



In [46]:
g1 = df_data.groupby(['MainVertexSeq']).median()
g1 = g1.reset_index()

In [47]:
g1.head(2)


Out[47]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar
0 0 5218.0 -33.62030 -10.56970 -10.93620 1.0 125.0
1 1 5216.0 -32.85095 -9.89157 -11.35725 1.0 125.0

In [48]:
ax = sns.tsplot(data=g1['MeshScalar'])



In [49]:
center_data = [df_data['X'].mean(), df_data['Y'].mean(), df_data['Z'].mean()]

In [50]:
center_data


Out[50]:
[-33.095124500256446, -10.197814784469454, 3.8635315850845826]

In [51]:
df_data_scalars = pd.concat([df_data['MainVertexSeq'], df_data['MeshScalar']], axis=1)

In [52]:
g1 = df_data.groupby(['MainVertexSeq']).median()
g1 = g1.reset_index()

In [53]:
ax = sns.tsplot(data=g1['MeshScalar'])



In [54]:
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

In [55]:
def point_theta(center, A):
    thetas = []; 
    Ref_pt = [A.iloc[0]['X'], A.iloc[0]['Y'], A.iloc[0]['Z']] 
    for index, row in A.iterrows():
        pt = [row['X'], row['Y'], row['Z']]
        theta = angle_between(Ref_pt, pt)
        thetas.append(theta)
    return thetas

In [56]:
thetas = point_theta(center_data, df_data)

In [57]:
thetas_df = pd.Series(thetas, name='theta');

In [58]:
df_data_with_theta = pd.concat([df_data, thetas_df], axis=1)

In [59]:
sns.tsplot(df_data_with_theta['theta'])


Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x119b17610>

In [60]:
df_data_with_thetas_sorted = df_data_with_theta.sort_values(by='theta')

In [61]:
df_data_with_thetas_sorted.head(10)


Out[61]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar theta
0 0 5215 -33.6203 -10.48890 -10.9362 0 125 0.000000
1 0 5215 -33.6203 -10.48890 -10.9362 3 125 0.000000
182 10 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
201 11 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
44 2 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
22 1 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
45 2 5218 -33.4931 -9.89040 -10.8902 1 125 0.014635
26 1 5218 -33.4931 -9.89040 -10.8902 2 125 0.014635
9 0 5218 -33.4931 -9.89040 -10.8902 2 125 0.014635
18 1 5217 -33.0372 -9.89157 -11.2163 3 125 0.017284

In [62]:
sns.tsplot(df_data_with_thetas_sorted['theta'])


Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b27b250>

In [63]:
sns.tsplot(df_data_with_thetas_sorted['MeshScalar'])


Out[63]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b55df10>

In [ ]:


In [64]:
df_data_with_thetas_sorted.head(15)


Out[64]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar theta
0 0 5215 -33.6203 -10.48890 -10.9362 0 125 0.000000
1 0 5215 -33.6203 -10.48890 -10.9362 3 125 0.000000
182 10 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
201 11 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
44 2 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
22 1 5215 -33.6203 -10.48890 -10.9362 1 125 0.000000
45 2 5218 -33.4931 -9.89040 -10.8902 1 125 0.014635
26 1 5218 -33.4931 -9.89040 -10.8902 2 125 0.014635
9 0 5218 -33.4931 -9.89040 -10.8902 2 125 0.014635
18 1 5217 -33.0372 -9.89157 -11.2163 3 125 0.017284
17 1 5217 -33.0372 -9.89157 -11.2163 0 125 0.017284
42 2 5217 -33.0372 -9.89157 -11.2163 2 125 0.017284
7 0 5217 -33.0372 -9.89157 -11.2163 1 125 0.017284
12 0 5508 -34.3497 -11.04190 -10.4954 2 133 0.019808
185 11 5508 -34.3497 -11.04190 -10.4954 3 133 0.019808

In [65]:
df_data_with_thetas_sorted['MeshScalar'].std()


Out[65]:
19.772934852313647

In [66]:
df_data_with_thetas_sorted.head()


Out[66]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar theta
0 0 5215 -33.6203 -10.4889 -10.9362 0 125 0.0
1 0 5215 -33.6203 -10.4889 -10.9362 3 125 0.0
182 10 5215 -33.6203 -10.4889 -10.9362 1 125 0.0
201 11 5215 -33.6203 -10.4889 -10.9362 1 125 0.0
44 2 5215 -33.6203 -10.4889 -10.9362 1 125 0.0

In [67]:
df_data_with_thetas_sorted_nodupes = df_data_with_thetas_sorted.drop_duplicates(subset='VertexID', keep='last');

In [68]:
sns.tsplot(pd.rolling_mean(df_data_with_thetas_sorted_nodupes['MeshScalar'],50))


Out[68]:
<matplotlib.axes._subplots.AxesSubplot at 0x120af2210>

In [69]:
df_data_with_thetas_sorted_nodupes.describe()


Out[69]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar theta
count 577.000000 577.000000 577.000000 577.000000 577.000000 577.000000 577.000000 577.000000
mean 56.665511 9938.922010 -33.096223 -10.133744 3.403164 1.169844 142.282496 0.460977
std 33.770273 3467.783114 2.978921 8.438816 10.234730 0.525983 19.696788 0.248268
min 0.000000 4941.000000 -40.462500 -23.121200 -12.477900 0.000000 102.000000 0.000000
25% 26.000000 6654.000000 -35.093300 -18.540500 -6.257040 1.000000 126.000000 0.239194
50% 57.000000 9676.000000 -32.836800 -9.890400 3.088680 1.000000 142.000000 0.498209
75% 85.000000 12971.000000 -31.034400 -1.706820 12.859300 1.000000 157.000000 0.684318
max 116.000000 16047.000000 -27.047900 2.138300 19.653700 3.000000 184.000000 0.845595

In [70]:
''' 
This function is not complete yet, it tries to compute the distance between each point in the list to its
point on the line (vertex depth = v for all points within the same neighbourhood) 
'''
def scar_width(df, threshold):
    width = []; 
    is_scar = []; 
    #Ref_pt = [A.iloc[0]['X'], A.iloc[0]['Y'], A.iloc[0]['Z']] 
    
    for index, row in df.iterrows():
        if row['VertexDepth'] == 0:
            pt_on_line = (row['X'], row['Y'], row['Z'])
            
        pt = (row['X'], row['Y'], row['Z'])
        #distance = math.hypot(pt[0]-pt_on_line[0], pt[1]-pt_on_line[1], pt[1]-pt_on_line[1])
        distance = math.sqrt((pt[0]-pt_on_line[0])**2 + (pt[1]-pt_on_line[1])**2 + (pt[2]-pt_on_line[2])**2)
        
        
        if row['MeshScalar'] > threshold: 
            is_scar.append(1) 
            width.append(distance)
        else:
            is_scar.append(0)
            width.append(0)
    
    width_df = pd.Series(width, name='scar_width');
    is_scar_df = pd.Series(is_scar, name='scar_bin');
    
    df = pd.concat([df, width_df], axis=1)
    df = pd.concat([df, is_scar_df], axis=1)

    return df

In [ ]:


In [71]:
df_data_with_thetas_sorted = scar_width(df_data_with_thetas_sorted, 135)

In [72]:
df_data_with_thetas_sorted.head(20)


Out[72]:
MainVertexSeq VertexID X Y Z VertexDepth MeshScalar theta scar_width scar_bin
0 0 5215 -33.6203 -10.48890 -10.9362 0 125 0.000000 0.0 0
1 0 5215 -33.6203 -10.48890 -10.9362 3 125 0.000000 0.0 0
2 0 5214 -33.0064 -10.77150 -11.4352 2 125 0.021121 0.0 0
3 0 5211 -33.1530 -11.81900 -11.4638 1 125 0.040570 0.0 0
4 0 5212 -33.7237 -11.43020 -11.0110 1 125 0.023311 0.0 0
5 0 4941 -32.5331 -11.28820 -11.8478 1 118 0.042627 0.0 0
6 0 4945 -32.4010 -10.56970 -11.8393 1 118 0.035449 0.0 0
7 0 5217 -33.0372 -9.89157 -11.2163 1 125 0.017284 0.0 0
8 0 5219 -32.4449 -9.96539 -11.6660 1 125 0.030273 0.0 0
9 0 5218 -33.4931 -9.89040 -10.8902 2 125 0.014635 0.0 0
10 0 5221 -33.2501 -9.25943 -10.8596 1 125 0.029649 0.0 0
11 0 5511 -33.9944 -9.96485 -10.5272 1 128 0.020553 0.0 0
12 0 5508 -34.3497 -11.04190 -10.4954 2 133 0.019808 0.0 0
13 0 5507 -34.7533 -10.41080 -10.0778 1 133 0.032040 0.0 0
14 0 5506 -34.9666 -11.14760 -10.0660 1 126 0.033921 0.0 0
15 0 5501 -34.9005 -11.82950 -10.1817 1 126 0.039250 0.0 0
16 0 5503 -34.3340 -12.08160 -10.6189 1 125 0.038480 0.0 0
17 1 5217 -33.0372 -9.89157 -11.2163 0 125 0.017284 0.0 0
18 1 5217 -33.0372 -9.89157 -11.2163 3 125 0.017284 0.0 0
19 1 5214 -33.0064 -10.77150 -11.4352 2 125 0.021121 0.0 0

In [80]:
sns.tsplot(pd.rolling_mean(df_data_with_thetas_sorted['scar_bin'],50))


Out[80]:
<matplotlib.axes._subplots.AxesSubplot at 0x1240ebb50>

In [74]:
sns.tsplot(pd.rolling_mean(df_data_with_thetas_sorted['scar_width'],200)) # Uses mean windowing=100, so 0 widths are smoothed


Out[74]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d504950>