In [1]:

    
import os
import re
import pickle
import time
import datetime

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.cluster import KMeans

from scipy.sparse import csr_matrix

%matplotlib inline

# Custom modules
import const
import func

Load data



In [2]:

    
# Load look-up table
lut = pd.read_csv(const.LOOK_UP_TABLE)
lut.head()









    Out[2]:






  
    
      
      line
      station
      feature_nr
      feat_nr_dat
      name_dat
      name_cat
      name_num
      col_dat
      col_num
      col_cat
      station_V2
      line_V2
    
  
  
    
      0
      0
      0
      0
      1.0
      L0_S0_D1
      NaN
      L0_S0_F0
      0.0
      0.0
      NaN
      0.0
      1.0
    
    
      1
      0
      0
      2
      3.0
      L0_S0_D3
      NaN
      L0_S0_F2
      1.0
      1.0
      NaN
      0.0
      1.0
    
    
      2
      0
      0
      4
      5.0
      L0_S0_D5
      NaN
      L0_S0_F4
      2.0
      2.0
      NaN
      0.0
      1.0
    
    
      3
      0
      0
      6
      7.0
      L0_S0_D7
      NaN
      L0_S0_F6
      3.0
      3.0
      NaN
      0.0
      1.0
    
    
      4
      0
      0
      8
      9.0
      L0_S0_D9
      NaN
      L0_S0_F8
      4.0
      4.0
      NaN
      0.0
      1.0



In [3]:

    
lut.station_V2.unique()









    Out[3]:





array([  0.   ,   1.   ,   2.   ,   3.   ,   4.   ,   5.   ,   6.   ,
         7.   ,   8.   ,   9.   ,  10.   ,  11.   ,  12.   ,  13.   ,
        14.   ,  15.   ,  16.   ,  17.   ,  18.   ,  19.   ,  20.   ,
        21.   ,  22.   ,  23.   ,  24.1  ,  24.101,  24.102,  24.103,
        24.104,  24.105,  24.106,  24.107,  24.108,  24.109,  24.11 ,
        24.111,  24.2  ,  24.201,  24.202,  24.203,  24.204,  24.205,
        24.206,  24.207,  24.208,  24.209,  24.21 ,  24.211,  24.3  ,
        24.301,  24.302,  24.303,  24.304,  24.305,  24.306,  24.307,
        24.308,  24.309,  24.31 ,  24.311,  25.1  ,  25.101,  25.102,
        25.103,  25.104,  25.105,  25.106,  25.107,  25.108,  25.109,
        25.11 ,  25.2  ,  25.201,  25.202,  25.203,  25.204,  25.205,
        25.206,  25.207,  25.208,  25.209,  25.21 ,  25.211,  25.212,
        25.213,  25.214,  25.215,  25.216,  25.217,  25.218,  25.219,
        25.22 ,  25.221,  25.222,  25.223,  25.224,  25.225,  25.226,
        25.227,  25.228,  25.229,  25.23 ,  26.   ,  27.   ,  28.   ,
        29.   ,  30.   ,  31.   ,  32.   ,  33.   ,  34.   ,  35.   ,
        36.   ,  37.   ,  38.   ,  39.   ,  40.   ,  41.   ,  42.   ,
        43.   ,  44.   ,  45.   ,  46.   ,  47.   ,  48.   ,  49.   ,
        50.   ,  51.   ])



In [14]:

    
lut.groupby('station_V2').station_V2.first().reset_index(drop=True).values.shape









    Out[14]:





(128,)



In [16]:

    
# Load unique paths and covert them to dataframe
paths = pd.read_csv(os.path.join(const.DATA_PATH, 'eda_product_flow_unique_paths_station.csv'), index_col=0)

# Convert strings to int array (could not find a way to split the characters in string other than this...)
paths = (paths.u_str.str.replace('0','0,').str.replace('1','1,').str.split(',')).apply(pd.Series)

# Drop last column (residue after split)
paths.drop(paths.columns[-1], axis=1, inplace=True)

# To ints
paths = paths.astype(int)

# Replace index by station_V2 numbers
paths.columns = lut.groupby('station_V2').station_V2.first().reset_index(drop=True).values

paths.head(3)









    Out[16]:






  
    
      
      0.0
      1.0
      2.0
      3.0
      4.0
      5.0
      6.0
      7.0
      8.0
      9.0
      ...
      42.0
      43.0
      44.0
      45.0
      46.0
      47.0
      48.0
      49.0
      50.0
      51.0
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      1
      0
      1
      0
      1
      1
      0
      1
      1
    
    
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      1
      0
      0
      1
      1
      0
      1
      1
    
  

3 rows × 128 columns



In [17]:

    
paths.shape









    Out[17]:





(15210, 128)



In [18]:

    
# Load unique path per sample ID
string_id_to_pid = pd.read_csv(os.path.join(const.DATA_PATH, 'eda_product_flow_sample_paths_station.csv'))



In [19]:

    
string_id_to_pid.head(3)



In [20]:

    
# Load response
y=func.read_last_column('train_numeric.csv')#[:500000]
y.head(3)

Calculate clusters



In [21]:



In [28]:

    
def cluster_unique_paths(unique_paths, n_clus):
    ''' Function to cluster set of unique paths '''
    
    # Cluster using Kmeans
    km = KMeans(n_clus, n_init=100, max_iter=100, random_state=11111)
    km.fit(unique_paths)
    
    # Return DataFrame
    clusters = pd.DataFrame(pd.Series(km.labels_), 
                            index=unique_paths.index, 
                            columns=['cluster'])
    
    return clusters

def visualize_cluster_result(unique_paths, clusters, sample_to_path, y):
    
    nclus = clusters['cluster'].nunique()
    
    if 'id' in sample_to_path.columns:
        sample_to_path.set_index('id')
    
    cluster_per_sample = sample_to_path.merge(unique_paths, 
                                   left_on='u_arr_ix', 
                                   right_index=True, 
                                   how='left') \
                            .merge(clusters,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left') \
                            .drop(['u_arr_ix'], axis=1) \
                            .set_index('id')
    
    
    cluster_grouped = cluster_per_sample.groupby('cluster').mean()
    # Fix weird rounding bug
    cluster_grouped.columns = [round(n,3) for n in cluster_grouped.columns]
    
                    
    # Get sorting index based on major thresholds
    try:
        so = (cluster_grouped>0.85).sort_values([0.0, 12.0, 24.1, 24.2, 24.3, 25.1, 25.202, 25.212, 25.222, 26.0, 27.0])
    except:
        print('Error sorting values')
        so = (cluster_grouped>0.85)
    
    
    # Visualize major stations per cluster
    plt.figure(figsize=(16,6))
    gs = gridspec.GridSpec(1, 3,width_ratios=[5,1,1])

    ax2 = plt.subplot(gs[0])
    ax1 = plt.subplot(gs[1])
    ax3 = plt.subplot(gs[2])


    ax1.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='on',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='on',
        labelleft='off') # labels along the bottom edge are off
    ax2.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off',
        labelleft='on') # labels along the bottom edge are off
    ax3.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='on',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='on',
        labelleft='off') # labels along the bottom edge are off
    
    sns.heatmap(cluster_per_sample.groupby('cluster').mean().loc[so.index,:], cmap='Blues', ax=ax2, cbar=False)
    ax2.set_title('Number of clusters: {}'.format(nclus))
    
    cluster_per_sample['R'] = y
    
    # Create dataframe with results
    cluster_summary = pd.DataFrame({'meanR': cluster_per_sample.groupby('cluster').R.mean(),
                                'sample_count': cluster_per_sample.groupby('cluster').R.count()},
                               index=range(nclus))
    cluster_summary.sort_values('sample_count', ascending=False, inplace=True)
    
    # Plot error rates (trick to plot from right to left)
    (cluster_summary['meanR']*100).loc[so.index[::-1]].plot(kind='barh', ax=ax1)

    labs = [1*x for x in ax1.get_xticks().tolist()]
    ax1.set_xticklabels(labs, rotation='vertical')
    ax1.set_xlabel('Error rate (%)')
    ax1.set_xlim([0, ax1.get_xlim()[1]])
    ax1.set_ylabel('')
    
    # Plot sample count
    cluster_summary['sample_count'].loc[so.index[::-1]].apply(lambda x: np.log10(x)).plot(kind='barh', ax=ax3)
    ax3.set_xlabel('log10(Sample count)')
    ax3.set_ylabel('')
    
    plt.tight_layout()
    
    return cluster_summary



In [29]:

    
clusters_8 = cluster_unique_paths(paths, 8)
summary_8 = visualize_cluster_result(paths, clusters_8, string_id_to_pid, y)



In [31]:

    
clusters_15 = cluster_unique_paths(paths, 15)
summary_15 = visualize_cluster_result(paths, clusters_15, string_id_to_pid, y)



In [30]:

    
clusters_25 = cluster_unique_paths(paths, 25)
summary_25 = visualize_cluster_result(paths, clusters_25, string_id_to_pid, y)



In [32]:

    
clusters_50 = cluster_unique_paths(paths, 50)
summary_50 = visualize_cluster_result(paths, clusters_50, string_id_to_pid, y)



In [33]:

    
clusters_100 = cluster_unique_paths(paths, 100)
summary_100 = visualize_cluster_result(paths, clusters_100, string_id_to_pid, y)



In [34]:

    
clusters_150 = cluster_unique_paths(paths, 150)
summary_150 = visualize_cluster_result(paths, clusters_150, string_id_to_pid, y)



In [35]:

    
clusters_500 = cluster_unique_paths(paths, 500)
summary_500 = visualize_cluster_result(paths, clusters_500, string_id_to_pid, y)



In [43]:

    
pid_to_cluster = string_id_to_pid.merge(clusters_8,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left')
pid_to_cluster = pid_to_cluster.merge(clusters_15,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left')
pid_to_cluster = pid_to_cluster.merge(clusters_25,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left')
pid_to_cluster = pid_to_cluster.merge(clusters_50,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left')
pid_to_cluster = pid_to_cluster.merge(clusters_150,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left') 
pid_to_cluster = pid_to_cluster.merge(clusters_500,
                                   left_on='u_arr_ix',
                                   right_index=True,
                                   how='left') 
pid_to_cluster.set_index('id', inplace=True)
pid_to_cluster.columns = ['unique_path','cluster_n8','cluster_n15','cluster_n25',
                          'cluster_n50', 'cluster_n150', 'cluster_n500']



In [44]:

    
pid_to_cluster.head(3)









    Out[44]:






  
    
      
      unique_path
      cluster_n8
      cluster_n15
      cluster_n25
      cluster_n50
      cluster_n150
      cluster_n500
    
    
      id
      
      
      
      
      
      
      
    
  
  
    
      4
      13409
      1
      2
      3
      47
      36
      369
    
    
      6
      7029
      2
      10
      0
      35
      63
      14
    
    
      7
      12763
      1
      2
      19
      3
      132
      477



In [45]:

    
pid_to_cluster.to_csv(os.path.join(const.DATA_PATH, 'eda_sample_clusters.csv'), index_label='Id')



In [46]:

    
summary_500.to_csv(os.path.join(const.DATA_PATH, 'eda_summary_cluster_500.csv'), index_label='cluster')

Try to cluster only line 3



In [480]:

    
paths.columns.get_loc(26.)









    Out[480]:





103



In [490]:

    
clusters_line_3 = cluster_unique_paths(paths.iloc[:, 103:], 100)
summary_line_3 = visualize_cluster_result(paths.iloc[:, 103:], clusters_line_3, string_id_to_pid, y)









    



Error sorting values

Experimental stuff

Find out whether number of paths plays a role



In [150]:

    
pid_to_cluster = pd.read_csv(os.path.join(const.DATA_PATH, 'eda_sample_clusters.csv'), index_col=0)
pid_to_cluster['R']=y
pid_to_cluster=pid_to_cluster[~pid_to_cluster.R.isnull()]
pid_to_cluster.head(3)









    Out[150]:






  
    
      
      unique_path
      cluster_n8
      cluster_n50
      cluster_n100
      cluster_n500
      R
    
    
      Id
      
      
      
      
      
      
    
  
  
    
      4
      13409
      3
      25
      3
      127
      0.0
    
    
      6
      7029
      1
      45
      80
      20
      0.0
    
    
      7
      12763
      3
      40
      11
      348
      0.0



In [174]:

    
for col in pid_to_cluster.columns[:-1]:

    cnt_agg = pid_to_cluster.groupby(col).agg({col:'count','R':'mean'})
    cnt_agg[col] = np.log10(cnt_agg[col])
    cnt_agg.set_index(col, drop=True, inplace=True)
    cnt_agg.sort_index(inplace=True)
    plt.figure(figsize=(16,4))
    cnt_agg.plot(kind='bar', figsize=(16,4))









    





<matplotlib.figure.Figure at 0x2d35ed410>






    












    





<matplotlib.figure.Figure at 0x2ab01a790>






    












    





<matplotlib.figure.Figure at 0x2ab5500d0>






    












    





<matplotlib.figure.Figure at 0x2cbe69050>






    












    





<matplotlib.figure.Figure at 0x282701dd0>



In [168]:

    
cnt_agg.plot(kind='bar')









    Out[168]:





<matplotlib.axes._subplots.AxesSubplot at 0x2775b22d0>

Find out what the uncommon paths have in common

Could not find specific paths or subpaths they have in common.



In [49]:

    
clust_high = summary_500[summary_500.meanR>0.008]
print clust_high.shape
clust_high.head(3)









    



(64, 2)






    Out[49]:






  
    
      
      meanR
      sample_count
    
  
  
    
      125
      0.008495
      4473
    
    
      429
      0.008213
      4018
    
    
      294
      0.008294
      1929



In [50]:

    
cluster_per_sample_500 = string_id_to_pid.merge(paths, 
                               left_on='u_arr_ix', 
                               right_index=True, 
                               how='left') \
                        .merge(clusters_500,
                               left_on='u_arr_ix',
                               right_index=True,
                               how='left') \
                        .drop(['u_arr_ix','id'], axis=1)



In [65]:

    
exa=cluster_per_sample_500[(cluster_per_sample_500.iloc[:, 12]==1) & (cluster_per_sample_500.iloc[:, 0]==1)]



In [69]:

    
exa_high = exa[exa.cluster.isin(clust_high.index)]
exa_low = exa[~exa.cluster.isin(clust_high.index)]



In [72]:

    
exa_high.head(3)









    Out[72]:






  
    
      
      0.0
      1.0
      2.0
      3.0
      4.0
      5.0
      6.0
      7.0
      8.0
      9.0
      ...
      43.0
      44.0
      45.0
      46.0
      47.0
      48.0
      49.0
      50.0
      51.0
      cluster
    
  
  
    
      795
      1
      1
      0
      1
      1
      0
      0
      1
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      443
    
    
      1822
      1
      1
      0
      1
      1
      0
      0
      1
      1
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      443
    
    
      5152
      1
      1
      0
      1
      0
      1
      0
      1
      1
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      369
    
  

3 rows × 130 columns



In [82]:

    
exa_high.shape









    Out[82]:





(218, 130)



In [117]:

    
exa_low.shape









    Out[117]:





(1317, 130)



In [138]:

    
n_s = 0
n_e = 12

diffs = []

for j in range(150):
    mean_high = float((exa_high.iloc[:,(n_s+j):(n_e+j)]>0).sum().sum())/218
    mean_low = float((exa_low.iloc[:,(n_s+j):(n_e+j)]>0).sum().sum())/1317
    diffs.append(np.divide(mean_high,mean_low))
    #print mean_low
    #print mean_high
    #print np.divide(mean_high,mean_low)
    #print ('')

print(max(diffs))
print(min(diffs))









    



1.28310465211
0.0



In [83]:

    
exa_low.shape









    Out[83]:





(1317, 130)



In [85]:









    Out[85]:





21



In [76]:

    
exa_high.mean().iloc[:-1].plot(kind='bar', figsize=(16,4))









    Out[76]:





<matplotlib.axes._subplots.AxesSubplot at 0x1db928f50>



In [78]:

    
(exa_high.mean().iloc[:-1] / exa_low.mean().iloc[:-1]).plot(kind='bar', figsize=(16,4))









    Out[78]:





<matplotlib.axes._subplots.AxesSubplot at 0x26e35fb50>



In [56]:

    
mean_per_cluster = cluster_per_sample_500.groupby('cluster').mean()



In [57]:

    
mean_per_cluster.head(3)









    Out[57]:






  
    
      
      0.0
      1.0
      2.0
      3.0
      4.0
      5.0
      6.0
      7.0
      8.0
      9.0
      ...
      42.0
      43.0
      44.0
      45.0
      46.0
      47.0
      48.0
      49.0
      50.0
      51.0
    
    
      cluster
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      1.000000
      0.990909
      1.000000
      0.000000
      1.0
      0.0
      0.509091
      0.490909
      1.000000
      0.390909
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      0.998335
      0.998631
      0.998335
      0.000037
      0.0
      1.0
      0.000000
      1.000000
      0.998779
      0.000000
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      0.000000
      0.000000
      0.000000
      0.000000
      0.0
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 129 columns



In [ ]:



In [ ]:

    
mean_per_cluster



In [61]:

    
(mean_per_cluster>0).sum(1).hist(bins=50)









    Out[61]:





<matplotlib.axes._subplots.AxesSubplot at 0x271507d90>



In [28]:

    
cluster_per_sample_100.columns









    Out[28]:





Index([       0.0,        1.0,        2.0,        3.0,        4.0,        5.0,
              6.0,        7.0,        8.0,        9.0,
       ...
             43.0,       44.0,       45.0,       46.0,       47.0,       48.0,
             49.0,       50.0,       51.0, u'cluster'],
      dtype='object', length=130)



In [53]:

    
cluster_per_sample_500.iloc[:, :-1].mean().plot(kind='bar', figsize=(13,4))









    Out[53]:





<matplotlib.axes._subplots.AxesSubplot at 0x12f3a4e50>



In [54]:

    
cluster_per_sample_500.loc[clust_high.index].iloc[:, :-1].mean().plot(kind='bar', figsize=(13,4))









    Out[54]:





<matplotlib.axes._subplots.AxesSubplot at 0x26f8281d0>



In [55]:

    
(cluster_per_sample_500.loc[clust_high.index].iloc[:, :-1].mean()/cluster_per_sample_500.iloc[:, :-1].mean()).plot(kind='bar', figsize=(13,4))









    Out[55]:





<matplotlib.axes._subplots.AxesSubplot at 0x2731c2a50>

Visualize cluster 3



In [108]:

    
clusters_line_3 = cluster_unique_paths(paths.iloc[:, 107:112], 10)
summary_line_3 = visualize_cluster_result(paths.iloc[:, 107:112], clusters_line_3, string_id_to_pid, y)









    



    30.0  31.0  32.0  33.0  34.0  cluster
id                                       
4      1     1     0     1     1        4
6      1     0     0     1     1        2
7      1     0     0     1     1        2
9      1     0     0     1     1        2
11     1     0     0     1     1        2
Error sorting values

	feature_nr	feat_nr_dat	name_dat	name_cat	name_num	col_dat	col_num	col_cat	line_V2
0	0	1.0	L0_S0_D1	NaN	L0_S0_F0	0.0	0.0	NaN	1.0
1	2	3.0	L0_S0_D3	NaN	L0_S0_F2	1.0	1.0	NaN	1.0
2	4	5.0	L0_S0_D5	NaN	L0_S0_F4	2.0	2.0	NaN	1.0
3	6	7.0	L0_S0_D7	NaN	L0_S0_F6	3.0	3.0	NaN	1.0
4	8	9.0	L0_S0_D9	NaN	L0_S0_F8	4.0	4.0	NaN	1.0

	0.0	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	...	42.0	43.0	44.0	45.0	46.0	47.0	48.0	49.0	50.0	51.0
id
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	1	0	1	0	1	1	0	1	1
2	0	0	0	0	0	0	0	0	0	0	...	0	0	1	0	0	1	1	0	1	1

	0.0	1.0	3.0	4.0	5.0	7.0	8.0	9.0	...	cluster
795	1	1	1	1	0	1	1	1	...	443
1822	1	1	1	1	0	1	1	0	...	443
5152	1	1	1	0	1	1	1	0	...	369

	0.0	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	...	42.0	43.0	44.0	45.0	46.0	47.0	48.0	49.0	50.0	51.0
cluster
0	1.000000	0.990909	1.000000	0.000000	1.0	0.0	0.509091	0.490909	1.000000	0.390909	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.998335	0.998631	0.998335	0.000037	0.0	1.0	0.000000	1.000000	0.998779	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.000000	0.000000	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	0.0	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	...	42.0	43.0	44.0	45.0	46.0	47.0	48.0	49.0	50.0	51.0
id
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	1	0	1	0	1	1	0	1	1
2	0	0	0	0	0	0	0	0	0	0	...	0	0	1	0	0	1	1	0	1	1

	0.0	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	...	42.0	43.0	44.0	45.0	46.0	47.0	48.0	49.0	50.0	51.0
id
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	1	0	1	0	1	1	0	1	1
2	0	0	0	0	0	0	0	0	0	0	...	0	0	1	0	0	1	1	0	1	1