notebook.community

Edit and run



In [43]:

    
%matplotlib inline



In [44]:

    
import seaborn as sns
import glob
import os
import numpy as np



In [45]:

    
import csv
from collections import defaultdict

def read_features_from_csv(csv_file):
    features={}
    columns=[]
    with open(csv_file, 'r') as f:
        reader = csv.reader(f)
        for i in reader:
            try:
                features[int(float(i[0]))]=float(i[1])
            except ValueError as e:
                columns = i
    return features



In [46]:

    
def get_X_from_features(files):
    data = []
    for i in files:
        neuron_name = os.path.basename(i).split('.')[0]
        features = read_features_from_csv(i)
        data.append((neuron_name,features))
    keys = []
    names = []
    for i in data:
        names.append(i[0])
        for j in i[1].keys():
            if int(float(j)) not in keys:
                keys.append(int(float(j)))
    feature_array = np.zeros((len(data),len(keys)))
    for i,j in enumerate(data):
        for k,l in enumerate(keys):
            feature_array[i,k] = j[1].setdefault(l,0)
    return feature_array



In [47]:

    
files = np.sort(glob.glob('./neuron_features/*_axon*.csv'))



In [48]:

    
feature_array = get_X_from_features(files)



In [50]:

    
from pymeda import Meda

import pandas as pd



In [51]:

    
columns = ['dimension {}'.format(i) for i in range(feature_array.shape[1])]



In [52]:

    
df= pd.DataFrame(data=feature_array, columns=columns)



In [53]:

    
meda = Meda(df, 'mouselight_neurons')



In [54]:

    
meda.run_all()



In [ ]:



In [ ]:



In [25]:

    
from sklearn.mixture import GaussianMixture
from tqdm import tqdm_notebook, tnrange



In [26]:

    
bics = []
gmms = []
n_comp = np.arange(2,30)
n_trials = 50
for i in tnrange(n_trials):
    b=[]
    g=[]
    for i in n_comp:
        gmm = GaussianMixture(n_components=int(i),init_params='random').fit(feature_array)
        bic = gmm.bic(feature_array)
        b.append(bic)
        g.append(gmm)
    bics.append(b)
    gmms.append(g)



In [27]:

    
b2 = np.array(bics)
bics_mean = np.mean(b2, axis=0)



In [28]:

    
import matplotlib.pyplot as plt



In [29]:

    
plt.plot(n_comp,bics_mean)
plt.title('BIC for n_comp=[2,52] averaged over 50 trials')
plt.xlabel('Number of clusters')
plt.ylabel('BIC value')









    Out[29]:





Text(0, 0.5, 'BIC value')



In [30]:

    
n_comp2 = 22
gmm = GaussianMixture(n_components=int(n_comp2),init_params='kmeans').fit(feature_array)



In [31]:

    
labels = gmm.predict(feature_array)



In [32]:

    
assert(len(labels) == len(feature_array))



In [ ]: