In [43]:
%matplotlib inline
In [44]:
import seaborn as sns
import glob
import os
import numpy as np
In [45]:
import csv
from collections import defaultdict
def read_features_from_csv(csv_file):
features={}
columns=[]
with open(csv_file, 'r') as f:
reader = csv.reader(f)
for i in reader:
try:
features[int(float(i[0]))]=float(i[1])
except ValueError as e:
columns = i
return features
In [46]:
def get_X_from_features(files):
data = []
for i in files:
neuron_name = os.path.basename(i).split('.')[0]
features = read_features_from_csv(i)
data.append((neuron_name,features))
keys = []
names = []
for i in data:
names.append(i[0])
for j in i[1].keys():
if int(float(j)) not in keys:
keys.append(int(float(j)))
feature_array = np.zeros((len(data),len(keys)))
for i,j in enumerate(data):
for k,l in enumerate(keys):
feature_array[i,k] = j[1].setdefault(l,0)
return feature_array
In [47]:
files = np.sort(glob.glob('./neuron_features/*_axon*.csv'))
In [48]:
feature_array = get_X_from_features(files)
In [50]:
from pymeda import Meda
import pandas as pd
In [51]:
columns = ['dimension {}'.format(i) for i in range(feature_array.shape[1])]
In [52]:
df= pd.DataFrame(data=feature_array, columns=columns)
In [53]:
meda = Meda(df, 'mouselight_neurons')
In [54]:
meda.run_all()
In [ ]:
In [ ]:
In [25]:
from sklearn.mixture import GaussianMixture
from tqdm import tqdm_notebook, tnrange
In [26]:
bics = []
gmms = []
n_comp = np.arange(2,30)
n_trials = 50
for i in tnrange(n_trials):
b=[]
g=[]
for i in n_comp:
gmm = GaussianMixture(n_components=int(i),init_params='random').fit(feature_array)
bic = gmm.bic(feature_array)
b.append(bic)
g.append(gmm)
bics.append(b)
gmms.append(g)
In [27]:
b2 = np.array(bics)
bics_mean = np.mean(b2, axis=0)
In [28]:
import matplotlib.pyplot as plt
In [29]:
plt.plot(n_comp,bics_mean)
plt.title('BIC for n_comp=[2,52] averaged over 50 trials')
plt.xlabel('Number of clusters')
plt.ylabel('BIC value')
Out[29]:
In [30]:
n_comp2 = 22
gmm = GaussianMixture(n_components=int(n_comp2),init_params='kmeans').fit(feature_array)
In [31]:
labels = gmm.predict(feature_array)
In [32]:
assert(len(labels) == len(feature_array))
In [ ]: