This notebook demonstrate verification of data loaded into HDFS


In [1]:
# Create Spark SQL context
import pyspark
sqc = pyspark.sql.SQLContext(sc)

In [2]:
# Load data from HDFS
mdata = sqc.read.parquet("hdfs://localhost:9000/TiltSeries_NanoParticle_doi_10.1021-nl103400a.parq")

In [ ]:
# Convert to Pandas
pdata = mdata.toPandas()

In [34]:
# Print columns
pdata.columns


Out[34]:
Index([u'index', u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8',
       ...
       u'249', u'250', u'251', u'252', u'253', u'254', u'255', u'tiltangles',
       u'dimensions', u'pixelsize'],
      dtype='object', length=260)

In [35]:
pdata.iloc[0:255]


Out[35]:
index 0 1 2 3 4 5 6 7 8 ... 249 250 251 252 253 254 255 tiltangles dimensions pixelsize
0 0 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 0 [74, 256, 256] 10
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 [74, 256, 256] 10
2 2 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 2 [74, 256, 256] 10
3 3 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 3 [74, 256, 256] 10
4 4 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 4 [74, 256, 256] 10
5 5 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 5 [74, 256, 256] 10
6 6 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 6 [74, 256, 256] 10
7 7 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 7 [74, 256, 256] 10
8 8 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 8 [74, 256, 256] 10
9 9 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 9 [74, 256, 256] 10
10 10 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 10 [74, 256, 256] 10
11 11 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 11 [74, 256, 256] 10
12 12 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 12 [74, 256, 256] 10
13 13 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 13 [74, 256, 256] 10
14 14 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 14 [74, 256, 256] 10
15 15 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 15 [74, 256, 256] 10
16 16 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 16 [74, 256, 256] 10
17 17 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 17 [74, 256, 256] 10
18 18 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 18 [74, 256, 256] 10
19 19 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 19 [74, 256, 256] 10
20 20 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 20 [74, 256, 256] 10
21 21 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 21 [74, 256, 256] 10
22 22 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 22 [74, 256, 256] 10
23 23 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 23 [74, 256, 256] 10
24 24 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 24 [74, 256, 256] 10
25 25 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 25 [74, 256, 256] 10
26 26 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 26 [74, 256, 256] 10
27 27 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 27 [74, 256, 256] 10
28 28 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 28 [74, 256, 256] 10
29 29 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 29 [74, 256, 256] 10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
225 225 2 1 2 1 1 1 2 2 2 ... 2 1 1 2 2 2 1 3 [74, 256, 256] 10
226 226 2 2 1 2 2 2 1 1 1 ... 2 2 2 2 2 2 2 4 [74, 256, 256] 10
227 227 2 1 1 2 2 1 1 1 1 ... 2 2 2 2 2 2 2 5 [74, 256, 256] 10
228 228 2 1 2 1 2 1 1 2 2 ... 2 2 2 2 2 2 2 6 [74, 256, 256] 10
229 229 1 1 2 2 1 1 2 2 2 ... 2 1 2 2 2 2 2 7 [74, 256, 256] 10
230 230 2 2 2 2 1 2 1 2 1 ... 2 2 2 2 2 2 1 8 [74, 256, 256] 10
231 231 2 2 2 2 1 2 1 1 2 ... 2 2 2 2 2 1 1 9 [74, 256, 256] 10
232 232 1 1 2 2 1 2 1 1 2 ... 2 2 2 2 1 2 1 10 [74, 256, 256] 10
233 233 1 1 2 1 1 1 1 1 1 ... 1 2 2 1 1 1 2 11 [74, 256, 256] 10
234 234 2 1 2 1 1 1 2 1 2 ... 2 2 1 2 1 1 2 12 [74, 256, 256] 10
235 235 1 2 2 2 2 1 2 1 2 ... 1 1 1 1 2 1 2 13 [74, 256, 256] 10
236 236 1 1 1 2 2 2 1 1 2 ... 1 1 1 1 2 1 2 14 [74, 256, 256] 10
237 237 1 1 1 1 2 2 2 1 2 ... 2 2 2 2 2 2 2 15 [74, 256, 256] 10
238 238 2 2 2 2 1 2 2 2 2 ... 2 2 1 2 2 2 1 16 [74, 256, 256] 10
239 239 2 1 1 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 17 [74, 256, 256] 10
240 240 1 1 1 1 2 1 1 1 2 ... 2 2 2 1 1 1 1 18 [74, 256, 256] 10
241 241 2 2 2 1 2 1 1 1 2 ... 2 2 2 1 1 2 2 19 [74, 256, 256] 10
242 242 2 2 2 2 2 1 1 2 1 ... 2 2 2 2 2 2 2 20 [74, 256, 256] 10
243 243 2 2 2 2 2 2 1 2 2 ... 1 2 2 2 2 2 2 21 [74, 256, 256] 10
244 244 2 2 2 2 2 2 2 2 2 ... 2 2 2 1 1 2 2 22 [74, 256, 256] 10
245 245 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 23 [74, 256, 256] 10
246 246 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 24 [74, 256, 256] 10
247 247 1 1 1 1 2 2 2 2 2 ... 2 2 2 2 2 2 2 25 [74, 256, 256] 10
248 248 2 2 1 1 2 2 2 1 2 ... 2 2 2 2 1 2 2 26 [74, 256, 256] 10
249 249 1 1 1 2 2 2 2 1 1 ... 1 1 2 2 2 2 2 27 [74, 256, 256] 10
250 250 2 1 1 2 2 2 2 1 1 ... 2 2 2 2 1 1 2 28 [74, 256, 256] 10
251 251 1 2 2 2 2 1 1 1 2 ... 1 1 2 2 1 2 2 29 [74, 256, 256] 10
252 252 2 2 1 1 1 1 1 1 1 ... 2 1 1 2 1 2 2 30 [74, 256, 256] 10
253 253 2 2 2 2 2 1 1 1 1 ... 1 2 1 1 1 2 2 31 [74, 256, 256] 10
254 254 1 1 1 1 2 1 1 1 1 ... 2 2 2 2 2 2 2 32 [74, 256, 256] 10

255 rows × 260 columns


In [36]:
pdata.drop(['index', 'tiltangles', 'pixelsize', 'dimensions'], axis=1, inplace=True)

In [37]:
# Now run a loop to first 15 slices
for i in range(0, 16):
    slice = pdata.iloc[i*256:(i+1) * 255]    
    import matplotlib.pyplot as plt
    import matplotlib.image as mpimg
    plt.imshow(slice)
    plt.show()



In [ ]: