In [43]:
%matplotlib inline

In [44]:
import seaborn as sns
import glob
import os
import numpy as np

In [45]:
import csv
from collections import defaultdict

def read_features_from_csv(csv_file):
    features={}
    columns=[]
    with open(csv_file, 'r') as f:
        reader = csv.reader(f)
        for i in reader:
            try:
                features[int(float(i[0]))]=float(i[1])
            except ValueError as e:
                columns = i
    return features

In [46]:
def get_X_from_features(files):
    data = []
    for i in files:
        neuron_name = os.path.basename(i).split('.')[0]
        features = read_features_from_csv(i)
        data.append((neuron_name,features))
    keys = []
    names = []
    for i in data:
        names.append(i[0])
        for j in i[1].keys():
            if int(float(j)) not in keys:
                keys.append(int(float(j)))
    feature_array = np.zeros((len(data),len(keys)))
    for i,j in enumerate(data):
        for k,l in enumerate(keys):
            feature_array[i,k] = j[1].setdefault(l,0)
    return feature_array

In [47]:
files = np.sort(glob.glob('./neuron_features/*_axon*.csv'))

In [48]:
feature_array = get_X_from_features(files)

In [50]:
from pymeda import Meda

import pandas as pd

In [51]:
columns = ['dimension {}'.format(i) for i in range(feature_array.shape[1])]

In [52]:
df= pd.DataFrame(data=feature_array, columns=columns)

In [53]:
meda = Meda(df, 'mouselight_neurons')



In [54]:
meda.run_all()



In [ ]:


In [ ]:


In [25]:
from sklearn.mixture import GaussianMixture
from tqdm import tqdm_notebook, tnrange

In [26]:
bics = []
gmms = []
n_comp = np.arange(2,30)
n_trials = 50
for i in tnrange(n_trials):
    b=[]
    g=[]
    for i in n_comp:
        gmm = GaussianMixture(n_components=int(i),init_params='random').fit(feature_array)
        bic = gmm.bic(feature_array)
        b.append(bic)
        g.append(gmm)
    bics.append(b)
    gmms.append(g)




In [27]:
b2 = np.array(bics)
bics_mean = np.mean(b2, axis=0)

In [28]:
import matplotlib.pyplot as plt

In [29]:
plt.plot(n_comp,bics_mean)
plt.title('BIC for n_comp=[2,52] averaged over 50 trials')
plt.xlabel('Number of clusters')
plt.ylabel('BIC value')


Out[29]:
Text(0, 0.5, 'BIC value')

In [30]:
n_comp2 = 22
gmm = GaussianMixture(n_components=int(n_comp2),init_params='kmeans').fit(feature_array)

In [31]:
labels = gmm.predict(feature_array)

In [32]:
assert(len(labels) == len(feature_array))

In [ ]: