In [23]:

    
import numpy as np
import pandas as pd
from collections import defaultdict
import math
%matplotlib inline
import matplotlib.pyplot as plt

Parsing ABA experimental data



In [24]:

    
aba_onto = pd.read_table('mouse_regions.tsv',sep='\t')

#mapping acronym -> id
aba_onto_to_aba_id = aba_onto.set_index('acronym')['id'].to_dict()
assert aba_onto_to_aba_id['TMv'] == 1

aba_onto.head()









    Out[24]:






  
    
      
      id
      myid
      atlas_id
      acronym
      name
      parent_structure_id
    
  
  
    
      0
       1
       1
       424
           TMv
            Tuberomammillary nucleus, ventral part
        557
    
    
      1
       2
       2
       990
       SSp-m6b
       Primary somatosensory area, mouth, layer 6b
        345
    
    
      2
       3
       3
       707
           sec
                                 secondary fissure
       1040
    
    
      3
       4
       4
       141
            IC
                               Inferior colliculus
        339
    
    
      4
       6
       5
       566
           int
                                  internal capsule
        784
    
  

5 rows × 6 columns



In [25]:

    
# data created with script from Lydia@ABA
aba_raw = pd.read_csv('normalized_projection_volume.csv')

# regions on the y-axis
y_es = aba_raw.primary_injection_structure.unique()
aba_raw.head()









    Out[25]:






  
    
      
      data_set_id
      primary_injection_structure
      FRP-R
      MOp-R
      MOs-R
      SSp-n-R
      SSp-bfd-R
      SSp-ll-R
      SSp-m-R
      SSp-ul-R
      SSp-tr-R
      SSp-un-R
      SSs-R
      GU-R
      VISC-R
      AUDd-R
      AUDp-R
      AUDpo-R
      AUDv-R
      VISal-R
      
    
  
  
    
      0
       180719293
       MOp
       0.068580
       2.468630
       0.815549
       0.357992
       0.454036
       0.081658
       0.979404
       0.527580
       0.017429
       1.087990
       0.684646
       0.478943
       0.183690
       0.002365
       0.001693
       0.000000e+00
       0.018263
       0.000406
      ...
    
    
      1
       180709942
       MOp
       0.057397
       2.925960
       1.021330
       0.522288
       0.515159
       0.041720
       1.248320
       0.454361
       0.001301
       1.499530
       1.304780
       0.718774
       0.325738
       0.000419
       0.001437
       2.569000e-08
       0.026881
       0.000009
      ...
    
    
      2
       166082128
       MOp
       0.112593
       4.741260
       1.894710
       0.337731
       1.394310
       0.124539
       1.330040
       1.263350
       0.008491
       1.431670
       1.415500
       0.313845
       0.213622
       0.004193
       0.006252
       0.000000e+00
       0.041305
       0.000922
      ...
    
    
      3
       180720175
       MOp
       0.002641
       0.823139
       0.725565
       0.004429
       0.136374
       0.728642
       0.006659
       0.209588
       1.106390
       0.044225
       0.088390
       0.016314
       0.010245
       0.228062
       0.241397
       4.405670e-03
       0.136536
       0.067005
      ...
    
    
      4
       120814821
       MOp
       0.291335
       9.751350
       4.992740
       0.098949
       5.297970
       0.854422
       0.685045
       2.701720
       0.282305
       0.395045
       1.864880
       0.145435
       0.090214
       0.172363
       0.058344
       1.695270e-05
       0.225992
       0.014359
      ...
    
  

5 rows × 594 columns



In [26]:

    
# group all data_sets (experiments)
# TODO .max() or .mean()?
aba_raw = aba_raw.groupby('primary_injection_structure').max()

del aba_raw['data_set_id'] # not needed anymore
# remove hemisphere (trailing -R or -L)
aba_raw.columns = [c[:-2] for c in aba_raw.columns] 

# regions on the x-axis
x_es = aba_raw.columns
aba_raw.head()









    Out[26]:






  
    
      
      FRP
      MOp
      MOs
      SSp-n
      SSp-bfd
      SSp-ll
      SSp-m
      SSp-ul
      SSp-tr
      SSp-un
      SSs
      GU
      VISC
      AUDd
      AUDp
      AUDpo
      AUDv
      VISal
      VISam
      VISl
      
    
    
      primary_injection_structure
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      AAA
       0.008259
       0.039756
       0.020719
       0.002492
       0.006706
       0.043079
       0.004413
       0.021725
       0.001265
       0.004855
       0.009930
       0.043916
       0.015284
       0.002637
       0.001815
       0.000090
       0.005926
       0.000785
       0.000085
       0.001074
      ...
    
    
      ACA
       0.019118
       0.497297
       1.000740
       0.039186
       0.209422
       0.027633
       0.024815
       0.049324
       0.073476
       0.023429
       0.146186
       0.040924
       0.031911
       0.045518
       0.035344
       0.010326
       0.038093
       0.095197
       0.897499
       0.041352
      ...
    
    
      ACAd
       0.051942
       1.513790
       3.999290
       0.048974
       0.238441
       0.572160
       0.045477
       0.321856
       0.427127
       0.189453
       0.196698
       0.126714
       0.034762
       0.194629
       0.074409
       0.008546
       0.047286
       0.842452
       1.472160
       0.258359
      ...
    
    
      ACAv
       0.019947
       0.336812
       1.246400
       0.034205
       0.046766
       0.020294
       0.017483
       0.007384
       0.075899
       0.020424
       0.093040
       0.011684
       0.026757
       0.021256
       0.007016
       0.002764
       0.012144
       0.065850
       0.618113
       0.024922
      ...
    
    
      ACB
       0.010619
       0.048564
       0.153678
       0.001667
       0.002769
       0.001848
       0.001419
       0.003253
       0.001753
       0.001518
       0.004475
       0.005439
       0.002025
       0.000952
       0.000700
       0.000239
       0.002160
       0.001265
       0.001310
       0.000415
      ...
    
  

5 rows × 592 columns



In [27]:

    
# regions on the matrix axis
x_es[:5] , y_es[:5]









    Out[27]:





(Index([u'FRP', u'MOp', u'MOs', u'SSp-n', u'SSp-bfd'], dtype='object'),
 array(['MOp', 'MOs', 'SSp', 'SSp-bfd', 'SSp-ll'], dtype=object))



In [28]:

    
# flatten matrix into tuples (region_a, region_b, projection_volume)
aba_conn_dto = []
for x in x_es:
    for y in y_es:
        x_id = aba_onto_to_aba_id[x]
        y_id = aba_onto_to_aba_id[y]
        # TODO again: .max() or .mean()?
        value = aba_raw.loc[y][x].max() # has 2 items, since X-R and X-L, just take max
        aba_conn_dto.append( (x_id, y_id, value ) )
aba_gold = pd.DataFrame(aba_conn_dto)

aba_gold.drop_duplicates(inplace = True)
print len(aba_conn_dto)
print len(aba_gold)

aba_gold.columns = ['a_id','b_id', 'gold_score']
# ensure a < b (for sorting later)
aba_gold['a'] = aba_gold['a_id'].combine(aba_gold['b_id'], min, 0)
aba_gold['b'] = aba_gold['a_id'].combine(aba_gold['b_id'], max, 0)
del aba_gold['a_id']
del aba_gold['b_id']



In [29]:

    
aba_gold.sort('gold_score', ascending=False).head()









    Out[29]:






  
    
      
      gold_score
      a
      b
    
  
  
    
      69839
       41.7725
       773
       1009
    
    
      12089
       38.6056
       382
        463
    
    
      69751
       33.0490
       223
       1009
    
    
      17153
       28.0085
       672
        749
    
    
      17167
       26.4854
       374
        672
    
  

5 rows × 3 columns



In [30]:

    
aba_gold.gold_score.describe()









    Out[30]:





count    69856.000000
mean         0.106288
std          0.629020
min          0.000000
25%          0.000295
50%          0.003611
75%          0.030513
max         41.772500
Name: gold_score, dtype: float64



In [31]:

    
fig = plt.figure(figsize=(18, 18))
aba_gold.sort('gold_score', ascending=False).gold_score.plot(style='.',use_index=False, title='Distribution of projection_volume values')









    Out[31]:





<matplotlib.axes.AxesSubplot at 0x67f6810>



In [31]:

	id	myid	atlas_id	acronym	name	parent_structure_id
0	1	1	424	TMv	Tuberomammillary nucleus, ventral part	557
1	2	2	990	SSp-m6b	Primary somatosensory area, mouth, layer 6b	345
2	3	3	707	sec	secondary fissure	1040
3	4	4	141	IC	Inferior colliculus	339
4	6	5	566	int	internal capsule	784

	data_set_id	primary_injection_structure	FRP-R	MOp-R	MOs-R	SSp-n-R	SSp-bfd-R	SSp-ll-R	SSp-m-R	SSp-ul-R	SSp-tr-R	SSp-un-R	SSs-R	GU-R	VISC-R	AUDd-R	AUDp-R	AUDpo-R	AUDv-R	VISal-R
0	180719293	MOp	0.068580	2.468630	0.815549	0.357992	0.454036	0.081658	0.979404	0.527580	0.017429	1.087990	0.684646	0.478943	0.183690	0.002365	0.001693	0.000000e+00	0.018263	0.000406	...
1	180709942	MOp	0.057397	2.925960	1.021330	0.522288	0.515159	0.041720	1.248320	0.454361	0.001301	1.499530	1.304780	0.718774	0.325738	0.000419	0.001437	2.569000e-08	0.026881	0.000009	...
2	166082128	MOp	0.112593	4.741260	1.894710	0.337731	1.394310	0.124539	1.330040	1.263350	0.008491	1.431670	1.415500	0.313845	0.213622	0.004193	0.006252	0.000000e+00	0.041305	0.000922	...
3	180720175	MOp	0.002641	0.823139	0.725565	0.004429	0.136374	0.728642	0.006659	0.209588	1.106390	0.044225	0.088390	0.016314	0.010245	0.228062	0.241397	4.405670e-03	0.136536	0.067005	...
4	120814821	MOp	0.291335	9.751350	4.992740	0.098949	5.297970	0.854422	0.685045	2.701720	0.282305	0.395045	1.864880	0.145435	0.090214	0.172363	0.058344	1.695270e-05	0.225992	0.014359	...

	FRP	MOp	MOs	SSp-n	SSp-bfd	SSp-ll	SSp-m	SSp-ul	SSp-tr	SSp-un	SSs	GU	VISC	AUDd	AUDp	AUDpo	AUDv	VISal	VISam	VISl
primary_injection_structure
AAA	0.008259	0.039756	0.020719	0.002492	0.006706	0.043079	0.004413	0.021725	0.001265	0.004855	0.009930	0.043916	0.015284	0.002637	0.001815	0.000090	0.005926	0.000785	0.000085	0.001074	...
ACA	0.019118	0.497297	1.000740	0.039186	0.209422	0.027633	0.024815	0.049324	0.073476	0.023429	0.146186	0.040924	0.031911	0.045518	0.035344	0.010326	0.038093	0.095197	0.897499	0.041352	...
ACAd	0.051942	1.513790	3.999290	0.048974	0.238441	0.572160	0.045477	0.321856	0.427127	0.189453	0.196698	0.126714	0.034762	0.194629	0.074409	0.008546	0.047286	0.842452	1.472160	0.258359	...
ACAv	0.019947	0.336812	1.246400	0.034205	0.046766	0.020294	0.017483	0.007384	0.075899	0.020424	0.093040	0.011684	0.026757	0.021256	0.007016	0.002764	0.012144	0.065850	0.618113	0.024922	...
ACB	0.010619	0.048564	0.153678	0.001667	0.002769	0.001848	0.001419	0.003253	0.001753	0.001518	0.004475	0.005439	0.002025	0.000952	0.000700	0.000239	0.002160	0.001265	0.001310	0.000415	...

	gold_score	a	b
69839	41.7725	773	1009
12089	38.6056	382	463
69751	33.0490	223	1009
17153	28.0085	672	749
17167	26.4854	374	672