Let's connect to the api.
In [1]:
import getpass
from transmart_api import TransmartApi
api = TransmartApi(host = 'http://localhost:8080', user = 'admin', password = getpass.getpass())
········
The below output has not the best data shape.
In [3]:
observations = api.get_observations(study = 'GSE8581')
observations[0:5]
Out[3]:
[{u'label': u'\\Public Studies\\GSE8581\\Biomarker Data\\GPL570\\',
u'subject': {u'age': 65,
u'birthDate': None,
u'deathDate': None,
u'id': 1000384597,
u'inTrialId': u'GSE8581GSM210006',
u'maritalStatus': None,
u'race': u'Afro American',
u'religion': None,
u'sex': u'FEMALE',
u'trial': u'GSE8581'},
u'value': None},
{u'label': u'\\Public Studies\\GSE8581\\Endpoints\\Diagnosis\\',
u'subject': {u'age': 65,
u'birthDate': None,
u'deathDate': None,
u'id': 1000384597,
u'inTrialId': u'GSE8581GSM210006',
u'maritalStatus': None,
u'race': u'Afro American',
u'religion': None,
u'sex': u'FEMALE',
u'trial': u'GSE8581'},
u'value': u'non-small cell adenocarcinoma'},
{u'label': u'\\Public Studies\\GSE8581\\Endpoints\\FEV1\\',
u'subject': {u'age': 65,
u'birthDate': None,
u'deathDate': None,
u'id': 1000384597,
u'inTrialId': u'GSE8581GSM210006',
u'maritalStatus': None,
u'race': u'Afro American',
u'religion': None,
u'sex': u'FEMALE',
u'trial': u'GSE8581'},
u'value': 1.41},
{u'label': u'\\Public Studies\\GSE8581\\Endpoints\\Forced Expiratory Volume Ratio\\',
u'subject': {u'age': 65,
u'birthDate': None,
u'deathDate': None,
u'id': 1000384597,
u'inTrialId': u'GSE8581GSM210006',
u'maritalStatus': None,
u'race': u'Afro American',
u'religion': None,
u'sex': u'FEMALE',
u'trial': u'GSE8581'},
u'value': 51.0},
{u'label': u'\\Public Studies\\GSE8581\\Subjects\\Age\\',
u'subject': {u'age': 65,
u'birthDate': None,
u'deathDate': None,
u'id': 1000384597,
u'inTrialId': u'GSE8581GSM210006',
u'maritalStatus': None,
u'race': u'Afro American',
u'religion': None,
u'sex': u'FEMALE',
u'trial': u'GSE8581'},
u'value': 65.0}]
We would like to use pandas for flatten above json output.
In [4]:
import pandas
from pandas.io.json import json_normalize
df = json_normalize(observations)
df
Out[4]:
label
subject.age
subject.birthDate
subject.deathDate
subject.id
subject.inTrialId
subject.maritalStatus
subject.race
subject.religion
subject.sex
subject.trial
value
0
\Public Studies\GSE8581\Biomarker Data\GPL570\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
None
1
\Public Studies\GSE8581\Endpoints\Diagnosis\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
non-small cell adenocarcinoma
2
\Public Studies\GSE8581\Endpoints\FEV1\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
1.41
3
\Public Studies\GSE8581\Endpoints\Forced Expir...
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
51
4
\Public Studies\GSE8581\Subjects\Age\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
65
5
\Public Studies\GSE8581\Subjects\Height (inch)\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
66
6
\Public Studies\GSE8581\Subjects\Lung Disease\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
chronic obstructive pulmonary disease
7
\Public Studies\GSE8581\Subjects\Organism\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
Homo sapiens
8
\Public Studies\GSE8581\Subjects\Race\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
Afro American
9
\Public Studies\GSE8581\Subjects\Sex\
65
None
None
1000384597
GSE8581GSM210006
None
Afro American
None
FEMALE
GSE8581
female
10
\Public Studies\GSE8581\Biomarker Data\GPL570\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
E
11
\Public Studies\GSE8581\Endpoints\Diagnosis\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
non-small cell squamous cell carcinoma
12
\Public Studies\GSE8581\Endpoints\FEV1\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
1.29
13
\Public Studies\GSE8581\Endpoints\Forced Expir...
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
53
14
\Public Studies\GSE8581\Subjects\Age\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
77
15
\Public Studies\GSE8581\Subjects\Height (inch)\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
67
16
\Public Studies\GSE8581\Subjects\Lung Disease\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
chronic obstructive pulmonary disease
17
\Public Studies\GSE8581\Subjects\Organism\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
Homo sapiens
18
\Public Studies\GSE8581\Subjects\Race\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
Caucasian
19
\Public Studies\GSE8581\Subjects\Sex\
77
None
None
1000384598
GSE8581GSM212788
None
Caucasian
None
FEMALE
GSE8581
female
20
\Public Studies\GSE8581\Biomarker Data\GPL570\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
E
21
\Public Studies\GSE8581\Endpoints\Diagnosis\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
inflammation
22
\Public Studies\GSE8581\Endpoints\FEV1\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
4.04
23
\Public Studies\GSE8581\Endpoints\Forced Expir...
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
79
24
\Public Studies\GSE8581\Subjects\Age\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
55
25
\Public Studies\GSE8581\Subjects\Height (inch)\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
69
26
\Public Studies\GSE8581\Subjects\Lung Disease\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
control
27
\Public Studies\GSE8581\Subjects\Organism\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
Homo sapiens
28
\Public Studies\GSE8581\Subjects\Race\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
Caucasian
29
\Public Studies\GSE8581\Subjects\Sex\
55
None
None
1000384599
GSE8581GSM212067
None
Caucasian
None
MALE
GSE8581
male
...
...
...
...
...
...
...
...
...
...
...
...
...
550
\Public Studies\GSE8581\Biomarker Data\GPL570\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
E
551
\Public Studies\GSE8581\Endpoints\Diagnosis\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
non-small cell adenocarcinoma
552
\Public Studies\GSE8581\Endpoints\FEV1\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
1.28
553
\Public Studies\GSE8581\Endpoints\Forced Expir...
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
89
554
\Public Studies\GSE8581\Subjects\Age\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
61
555
\Public Studies\GSE8581\Subjects\Height (inch)\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
65
556
\Public Studies\GSE8581\Subjects\Lung Disease\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
not specified
557
\Public Studies\GSE8581\Subjects\Organism\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
Homo sapiens
558
\Public Studies\GSE8581\Subjects\Race\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
Caucasian
559
\Public Studies\GSE8581\Subjects\Sex\
61
None
None
1000384652
GSE8581GSM212809
None
Caucasian
None
FEMALE
GSE8581
female
560
\Public Studies\GSE8581\Biomarker Data\GPL570\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
E
561
\Public Studies\GSE8581\Endpoints\Diagnosis\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
non-small cell adenocarcinoma
562
\Public Studies\GSE8581\Endpoints\FEV1\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
2.3
563
\Public Studies\GSE8581\Endpoints\Forced Expir...
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
74
564
\Public Studies\GSE8581\Subjects\Age\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
57
565
\Public Studies\GSE8581\Subjects\Height (inch)\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
65
566
\Public Studies\GSE8581\Subjects\Lung Disease\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
control
567
\Public Studies\GSE8581\Subjects\Organism\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
Homo sapiens
568
\Public Studies\GSE8581\Subjects\Race\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
Caucasian
569
\Public Studies\GSE8581\Subjects\Sex\
57
None
None
1000384653
GSE8581GSM210196
None
Caucasian
None
FEMALE
GSE8581
female
570
\Public Studies\GSE8581\Biomarker Data\GPL570\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
E
571
\Public Studies\GSE8581\Endpoints\Diagnosis\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
non-small cell squamous cell carcinoma
572
\Public Studies\GSE8581\Endpoints\FEV1\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
2.83
573
\Public Studies\GSE8581\Endpoints\Forced Expir...
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
75
574
\Public Studies\GSE8581\Subjects\Age\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
54
575
\Public Studies\GSE8581\Subjects\Height (inch)\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
75
576
\Public Studies\GSE8581\Subjects\Lung Disease\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
control
577
\Public Studies\GSE8581\Subjects\Organism\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
Homo sapiens
578
\Public Studies\GSE8581\Subjects\Race\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
Caucasian
579
\Public Studies\GSE8581\Subjects\Sex\
54
None
None
1000384654
GSE8581GSM212790
None
Caucasian
None
FEMALE
GSE8581
female
580 rows × 12 columns
In [5]:
dfp = df.pivot(index = 'subject.inTrialId', columns = 'label', values = 'value')
dfp
Out[5]:
label
\Public Studies\GSE8581\Biomarker Data\GPL570\
\Public Studies\GSE8581\Endpoints\Diagnosis\
\Public Studies\GSE8581\Endpoints\FEV1\
\Public Studies\GSE8581\Endpoints\Forced Expiratory Volume Ratio\
\Public Studies\GSE8581\Subjects\Age\
\Public Studies\GSE8581\Subjects\Height (inch)\
\Public Studies\GSE8581\Subjects\Lung Disease\
\Public Studies\GSE8581\Subjects\Organism\
\Public Studies\GSE8581\Subjects\Race\
\Public Studies\GSE8581\Subjects\Sex\
subject.inTrialId
GSE8581GSM210004
None
non-small cell squamous cell carcinoma
2.54
58
63
72
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM210005
None
non-small cell adenocarcinoma
1.69
83.66
84
60
control
Homo sapiens
Afro American
female
GSE8581GSM210006
None
non-small cell adenocarcinoma
1.41
51
65
66
chronic obstructive pulmonary disease
Homo sapiens
Afro American
female
GSE8581GSM210007
E
non-small cell adenocarcinoma
2.51
80.96
46
66
not specified
Homo sapiens
Caucasian
male
GSE8581GSM210008
E
non-small cell adenocarcinoma
1.64
57
53
65
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
female
GSE8581GSM210009
E
non-small cell squamous cell carcinoma
2.72
74
53
64
control
Homo sapiens
Caucasian
female
GSE8581GSM210010
E
non-small cell adenocarcinoma
1.45
73
77
63
not specified
Homo sapiens
Caucasian
female
GSE8581GSM210011
E
non-small cell squamous cell carcinoma
1.87
56
56
72
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM210012
E
non-small cell adenocarcinoma
2.76
70.58
61
69
not specified
Homo sapiens
Caucasian
male
GSE8581GSM210014
E
non-small cell adenocarcinoma
1.98
78
71
63
control
Homo sapiens
Caucasian
female
GSE8581GSM210015
E
non-small cell adenocarcinoma
2.59
74
68
65
control
Homo sapiens
Caucasian
male
GSE8581GSM210071
E
emphysema
1.16
43
68
67
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM210087
E
non-small cell adenocarcinoma
2.59
74
68
65
control
Homo sapiens
Caucasian
female
GSE8581GSM210090
E
hematoma
4.08
68
50
72
not specified
Homo sapiens
Caucasian
male
GSE8581GSM210188
E
metastatic non-small cell adenocarcinoma
1.2
72
65
63
not specified
Homo sapiens
Caucasian
female
GSE8581GSM210192
E
non-small cell adenocarcinoma
2.31
75
50
65
control
Homo sapiens
Caucasian
female
GSE8581GSM210193
E
Unknown
1.63
80.78
67
66
not specified
Homo sapiens
Caucasian
female
GSE8581GSM210194
E
non-small cell squamous cell carcinoma
0.52
58
56
60
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
female
GSE8581GSM210196
E
non-small cell adenocarcinoma
2.3
74
57
65
control
Homo sapiens
Caucasian
female
GSE8581GSM210978
E
non-small cell adenocarcinoma
1.53
74
73
66
not specified
Homo sapiens
Caucasian
female
GSE8581GSM210979
E
non-small cell adenocarcinoma
2.17
76
55
63
control
Homo sapiens
Caucasian
female
GSE8581GSM210992
E
non-small cell squamous cell carcinoma
0.9
55.9
70
71
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM210993
E
giant bullae
0.4
45
61
73
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM210994
E
carcinoid
1.69
76
64
63
not specified
Homo sapiens
Caucasian
female
GSE8581GSM211007
E
non-small cell squamous cell carcinoma
1.7
59
61
66
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM211008
E
non-small cell adenocarcinoma
3.66
72
71
71
control
Homo sapiens
Caucasian
male
GSE8581GSM211009
E
non-small cell adenocarcinoma
1.64
41
75
71
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM211010
E
non-small cell adenocarcinoma
1.44
86
61
60
not specified
Homo sapiens
Caucasian
female
GSE8581GSM211865
E
non-small cell squamous cell carcinoma
2.13
71
69
67
not specified
Homo sapiens
Caucasian
male
GSE8581GSM211872
E
non-small cell squamous cell carcinoma
2.61
70
74
70
not specified
Homo sapiens
Caucasian
male
GSE8581GSM212067
E
inflammation
4.04
79
55
69
control
Homo sapiens
Caucasian
male
GSE8581GSM212068
E
metastatic renal cell carcinoma
1.99
85
78
63
control
Homo sapiens
Caucasian
female
GSE8581GSM212069
E
no malignancy
1.6
77
81
66
not specified
Homo sapiens
Caucasian
female
GSE8581GSM212070
E
carcinoid
3.7
81.86
40
67
control
Homo sapiens
Caucasian
male
GSE8581GSM212074
E
non-small cell squamous cell carcinoma
2.06
54
64
65
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM212075
E
non-small cell squamous cell carcinoma
1.3
78
79
63
chronic obstructive pulmonary disease
Homo sapiens
Afro American
female
GSE8581GSM212787
E
NSC-Mixed
3.66
78
78
70
control
Homo sapiens
Caucasian
male
GSE8581GSM212788
E
non-small cell squamous cell carcinoma
1.29
53
77
67
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
female
GSE8581GSM212789
E
non-small cell squamous cell carcinoma
1.94
86
62
62
control
Homo sapiens
Caucasian
female
GSE8581GSM212790
E
non-small cell squamous cell carcinoma
2.83
75
54
75
control
Homo sapiens
Caucasian
female
GSE8581GSM212809
E
non-small cell adenocarcinoma
1.28
89
61
65
not specified
Homo sapiens
Caucasian
female
GSE8581GSM212810
E
non-small cell squamous cell carcinoma
0.88
52
52
66
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
female
GSE8581GSM212811
E
non-small cell squamous cell carcinoma
1.37
82
77
58
control
Homo sapiens
Caucasian
female
GSE8581GSM212848
E
non-small cell squamous cell carcinoma
0.55
44
59
58
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
female
GSE8581GSM212849
E
non-small cell adenocarcinoma
2.73
80
39
73
not specified
Homo sapiens
Caucasian
male
GSE8581GSM212850
E
carcinoid
2.32
76
71
69
not specified
Homo sapiens
Caucasian
male
GSE8581GSM212852
E
Giant Cell Tumor
0.99
63
72
60
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
female
GSE8581GSM212853
E
lymphoma
2.42
80
71
66
control
Homo sapiens
Caucasian
female
GSE8581GSM212854
E
non-small cell adenocarcinoma
2.26
83
64
66
not specified
Homo sapiens
Caucasian
male
GSE8581GSM212855
E
non-small cell adenocarcinoma
2.43
69
73
68
not specified
Homo sapiens
Caucasian
male
GSE8581GSM213017
E
non-small cell adenocarcinoma
2.12
73
79
69
not specified
Homo sapiens
Caucasian
male
GSE8581GSM213018
E
non-small cell adenocarcinoma
2.34
81
82
68
not specified
Homo sapiens
Caucasian
male
GSE8581GSM213019
E
non-small cell adenocarcinoma
1.7
72
71
71
not specified
Homo sapiens
Caucasian
female
GSE8581GSM213020
E
non-small cell adenocarcinoma
0.67
42
75
63
chronic obstructive pulmonary disease
Homo sapiens
Caucasian
male
GSE8581GSM213034
E
non-small cell adenocarcinoma
2.56
61
70
69
not specified
Homo sapiens
Caucasian
male
GSE8581GSM213035
E
non-small cell squamous cell carcinoma
3.11
73
67
71
control
Homo sapiens
Caucasian
male
GSE8581GSM213036
E
non-small cell squamous cell carcinoma
2.68
71
59
69
control
Homo sapiens
Caucasian
female
GSE8581GSM213037
E
non-small cell squamous cell carcinoma
2.25
72
77
66
not specified
Homo sapiens
Caucasian
male
In [6]:
%load_ext rpy2.ipython
In [7]:
%%R -i dfp
plot(c(dfp$X.Public.Studies.GSE8581.Subjects.Age), c(dfp$X.Public.Studies.GSE8581.Subjects.Height..inch..))
str(dfp)
'data.frame': 58 obs. of 10 variables:
$ X.Public.Studies.GSE8581.Biomarker.Data.GPL570. : Factor w/ 2 levels "E","None": 2 2 2 1 1 1 1 1 1 1 ...
$ X.Public.Studies.GSE8581.Endpoints.Diagnosis. : Factor w/ 14 levels "carcinoid","emphysema",..: 12 11 11 11 11 12 11 12 11 11 ...
$ X.Public.Studies.GSE8581.Endpoints.FEV1. : Factor w/ 53 levels "0.4","0.52","0.55",..: 40 21 14 39 20 45 16 23 47 25 ...
$ X.Public.Studies.GSE8581.Endpoints.Forced.Expiratory.Volume.Ratio.: Factor w/ 40 levels "41.0","42.0",..: 13 37 6 32 12 24 23 11 20 28 ...
$ X.Public.Studies.GSE8581.Subjects.Age. : Factor w/ 31 levels "39.0","40.0",..: 14 31 16 3 6 6 26 9 12 21 ...
$ X.Public.Studies.GSE8581.Subjects.Height..inch.. : Factor w/ 15 levels "58.0","60.0",..: 13 2 7 7 6 5 4 13 10 4 ...
$ X.Public.Studies.GSE8581.Subjects.Lung.Disease. : Factor w/ 3 levels "chronic obstructive pulmonary disease",..: 1 2 1 3 1 2 3 1 3 2 ...
$ X.Public.Studies.GSE8581.Subjects.Organism. : Factor w/ 1 level "Homo sapiens": 1 1 1 1 1 1 1 1 1 1 ...
$ X.Public.Studies.GSE8581.Subjects.Race. : Factor w/ 2 levels "Afro American",..: 2 1 1 2 2 2 2 2 2 2 ...
$ X.Public.Studies.GSE8581.Subjects.Sex. : Factor w/ 2 levels "female","male": 2 1 1 2 1 1 1 2 2 1 ...
In [2]:
(hdHeader, hdRows) = api.get_hd_node_data(study = 'GSE8581', node_name = 'Lung')
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-2-bf1385ab0859> in <module>()
----> 1 (hdHeader, hdRows) = api.get_hd_node_data(study = 'GSE8581', node_name = 'Lung')
/home/vagrant/workspace/transmart_api.py in get_hd_node_data(self, study, node_name, genes)
77 hd_node_data_url = hd_node_data_url + \
78 '&' + urllib.urlencode({'dataConstraints': {'genes': [{'names': genes}]}})
---> 79 hd_data = self._get_protobuf(hd_node_data_url, self._get_access_token())
80 return hd_data
/home/vagrant/workspace/transmart_api.py in _get_protobuf(self, url, access_token)
50 headers['Authorization'] = 'Bearer ' + access_token
51 req = urllib2.Request(url, headers = headers)
---> 52 return self._parse_protobuf(urllib2.urlopen(req).read())
53
54 def _get_access_token(self):
/home/vagrant/workspace/transmart_api.py in _parse_protobuf(self, proto)
24
25 def _parse_protobuf(self, proto):
---> 26 proto = list(proto)
27 hdHeader = HighDimHeader()
28 (size, position) = decoder._DecodeVarint(proto, 0)
MemoryError:
In [ ]:
#1 - double
#2 - string
[(x.name, x.type) for x in hdHeader.columnSpec]
In [ ]:
hdDataDic = {row.label: row.value[1].doubleValue for row in hdRows}
In [ ]:
from pandas import DataFrame
hdDataDic['patientId'] = [assay.patientId for assay in hdHeader.assay]
assayIds = [assay.assayId for assay in hdHeader.assay]
DataFrame(data=hdDataDic, index = assayIds)
Content source: thehyve/transmart-api-training
Similar notebooks: