In [1]:
# Create Spark SQL context
import pyspark
sqc = pyspark.sql.SQLContext(sc)
In [2]:
# Load data from HDFS
mdata = sqc.read.parquet("hdfs://localhost:9000/TiltSeries_NanoParticle_doi_10.1021-nl103400a.parq")
In [ ]:
# Convert to Pandas
pdata = mdata.toPandas()
In [34]:
# Print columns
pdata.columns
Out[34]:
Index([u'index', u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8',
...
u'249', u'250', u'251', u'252', u'253', u'254', u'255', u'tiltangles',
u'dimensions', u'pixelsize'],
dtype='object', length=260)
In [35]:
pdata.iloc[0:255]
Out[35]:
index
0
1
2
3
4
5
6
7
8
...
249
250
251
252
253
254
255
tiltangles
dimensions
pixelsize
0
0
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
0
[74, 256, 256]
10
1
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
[74, 256, 256]
10
2
2
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
2
[74, 256, 256]
10
3
3
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
3
[74, 256, 256]
10
4
4
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
4
[74, 256, 256]
10
5
5
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
5
[74, 256, 256]
10
6
6
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
6
[74, 256, 256]
10
7
7
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
7
[74, 256, 256]
10
8
8
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
8
[74, 256, 256]
10
9
9
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
9
[74, 256, 256]
10
10
10
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
10
[74, 256, 256]
10
11
11
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
11
[74, 256, 256]
10
12
12
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
12
[74, 256, 256]
10
13
13
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
13
[74, 256, 256]
10
14
14
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
14
[74, 256, 256]
10
15
15
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
15
[74, 256, 256]
10
16
16
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
16
[74, 256, 256]
10
17
17
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
17
[74, 256, 256]
10
18
18
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
18
[74, 256, 256]
10
19
19
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
19
[74, 256, 256]
10
20
20
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
20
[74, 256, 256]
10
21
21
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
21
[74, 256, 256]
10
22
22
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
22
[74, 256, 256]
10
23
23
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
23
[74, 256, 256]
10
24
24
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
24
[74, 256, 256]
10
25
25
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
25
[74, 256, 256]
10
26
26
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
26
[74, 256, 256]
10
27
27
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
27
[74, 256, 256]
10
28
28
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
28
[74, 256, 256]
10
29
29
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
29
[74, 256, 256]
10
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
225
225
2
1
2
1
1
1
2
2
2
...
2
1
1
2
2
2
1
3
[74, 256, 256]
10
226
226
2
2
1
2
2
2
1
1
1
...
2
2
2
2
2
2
2
4
[74, 256, 256]
10
227
227
2
1
1
2
2
1
1
1
1
...
2
2
2
2
2
2
2
5
[74, 256, 256]
10
228
228
2
1
2
1
2
1
1
2
2
...
2
2
2
2
2
2
2
6
[74, 256, 256]
10
229
229
1
1
2
2
1
1
2
2
2
...
2
1
2
2
2
2
2
7
[74, 256, 256]
10
230
230
2
2
2
2
1
2
1
2
1
...
2
2
2
2
2
2
1
8
[74, 256, 256]
10
231
231
2
2
2
2
1
2
1
1
2
...
2
2
2
2
2
1
1
9
[74, 256, 256]
10
232
232
1
1
2
2
1
2
1
1
2
...
2
2
2
2
1
2
1
10
[74, 256, 256]
10
233
233
1
1
2
1
1
1
1
1
1
...
1
2
2
1
1
1
2
11
[74, 256, 256]
10
234
234
2
1
2
1
1
1
2
1
2
...
2
2
1
2
1
1
2
12
[74, 256, 256]
10
235
235
1
2
2
2
2
1
2
1
2
...
1
1
1
1
2
1
2
13
[74, 256, 256]
10
236
236
1
1
1
2
2
2
1
1
2
...
1
1
1
1
2
1
2
14
[74, 256, 256]
10
237
237
1
1
1
1
2
2
2
1
2
...
2
2
2
2
2
2
2
15
[74, 256, 256]
10
238
238
2
2
2
2
1
2
2
2
2
...
2
2
1
2
2
2
1
16
[74, 256, 256]
10
239
239
2
1
1
2
2
2
2
2
2
...
2
2
2
2
2
2
2
17
[74, 256, 256]
10
240
240
1
1
1
1
2
1
1
1
2
...
2
2
2
1
1
1
1
18
[74, 256, 256]
10
241
241
2
2
2
1
2
1
1
1
2
...
2
2
2
1
1
2
2
19
[74, 256, 256]
10
242
242
2
2
2
2
2
1
1
2
1
...
2
2
2
2
2
2
2
20
[74, 256, 256]
10
243
243
2
2
2
2
2
2
1
2
2
...
1
2
2
2
2
2
2
21
[74, 256, 256]
10
244
244
2
2
2
2
2
2
2
2
2
...
2
2
2
1
1
2
2
22
[74, 256, 256]
10
245
245
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
23
[74, 256, 256]
10
246
246
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
24
[74, 256, 256]
10
247
247
1
1
1
1
2
2
2
2
2
...
2
2
2
2
2
2
2
25
[74, 256, 256]
10
248
248
2
2
1
1
2
2
2
1
2
...
2
2
2
2
1
2
2
26
[74, 256, 256]
10
249
249
1
1
1
2
2
2
2
1
1
...
1
1
2
2
2
2
2
27
[74, 256, 256]
10
250
250
2
1
1
2
2
2
2
1
1
...
2
2
2
2
1
1
2
28
[74, 256, 256]
10
251
251
1
2
2
2
2
1
1
1
2
...
1
1
2
2
1
2
2
29
[74, 256, 256]
10
252
252
2
2
1
1
1
1
1
1
1
...
2
1
1
2
1
2
2
30
[74, 256, 256]
10
253
253
2
2
2
2
2
1
1
1
1
...
1
2
1
1
1
2
2
31
[74, 256, 256]
10
254
254
1
1
1
1
2
1
1
1
1
...
2
2
2
2
2
2
2
32
[74, 256, 256]
10
255 rows × 260 columns
In [36]:
pdata.drop(['index', 'tiltangles', 'pixelsize', 'dimensions'], axis=1, inplace=True)
In [37]:
# Now run a loop to first 15 slices
for i in range(0, 16):
slice = pdata.iloc[i*256:(i+1) * 255]
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
plt.imshow(slice)
plt.show()
In [ ]:
Content source: OpenDataAnalytics/etl
Similar notebooks: