In [16]:
import pymongo
import os,csv,sys,glob
import openslide
import dsa_mongo_common_functions as dsa
import cdsa_loader_helper_functions as cdsa_helpers
import pprint
client = pymongo.MongoClient('localhost',27017)
dsa_load_errors_db = client['DSA_LoadErrors']

In [14]:
XML_LocalRoot = '/home/dagutman/devel/ADRCPathViewer_2.0/static/'

xml_files = glob.glob(XML_LocalRoot+'xmls/*/*.xml')
#print xml_files

xml_data_set = {}

for x in xml_files:
    xobj = {}
    xobj['localPath'] = x
    xobj['webPath'] = x.replace(XML_LocalRoot,'')
    slideName = os.path.basename(x).replace('.xml','')
    xobj['slideName'] = slideName
    xml_data_set[slideName] = xobj

In [17]:
bsd = { 'slideID': '222222', 'slideName': '1000676.svs'}


dsa_load_errors_db['BadSlides'].insert_one(bsd)


Out[17]:
<pymongo.results.InsertOneResult at 0x7f4dd005d0f0>

In [6]:
dsa_load_errors_db['BadSlides'].distinct('slideName')


Out[6]:
[u'1000676.svs']

In [18]:
## This is specific to a given fle system and or structurme
slide_root = '/GLOBAL_SCRATCH/ADRC/'  ##Base Path for Slides
dsa_slide_db = client['ADRC_DSA_Slide_DB']  ### These need to be configured for the specific project

### To generalize this, need to describe organization, most common will be  PATIENT/STAIN_TYPE as subdirectories
subj_dir_list = [x for x in os.listdir(slide_root) if os.path.isdir(os.path.join(slide_root,x))]
print len(subj_dir_list),"Potential Patient directories were identified"


104 Potential Patient directories were identified

In [3]:
dsa_slide_db.collection_names()


Out[3]:
[u'system.indexes', u'RawSlideData', u'DSA_Slide_Data']

In [8]:
def find_rawslide_lists( slide_root_path ):
        """project_name is passed along with the potentially more than one root image path for ndpi files"""
        slide_files = []

        slide_root_path  = slide_root_path.rstrip('/')
        print slide_root_path
        for dpath, dnames, fnames in os.walk( slide_root_path, followlinks=True):
                
                for file in fnames:
                    if '.ndpi' in file or '.svs' in file:
                                slide_files.append(dpath +'/'+file)
        print len(slide_files),"SVS or NDPI files were located"
        return slide_files

In [9]:
### So this creates a document WITHIN the current database to store raw information about the slides
## Now that I think about this, I really should not store the filename but the File Hash as I.. want to change the filenames

for sd in subj_dir_list:
    print sd,"is being processed",
    curr_svs_slide_list = find_rawslide_lists(  os.path.join(slide_root,sd)  )
    slides_processed = newly_processed = dup_slide = rescanned_slides =  0
    for sld in curr_svs_slide_list:

        slide_name = os.path.basename(sld)
        qry = dsa_slide_db['RawSlideData'].find_one( {'slide_name':slide_name})
        #print qry
        if not qry:
            fs = os.path.getsize(sld)
            #md5Checksum = dsa.md5sum(sld)
            (openslide_could_open, width, height, filesize, orig_resolution, slide_name,md5, sld_properties) = cdsa_helpers.openslide_test_file_mongo( sld, 'ndpi', client)
            if openslide_could_open:
                prep_type = 'Unknown'
                slide_metadata = { 'slide_w_path': sld, 'slide_name': slide_name, 'file_size':fs, 'width':width, 'height':height,
                                 'orig_resolution': orig_resolution, 'sld_properties': cdsa_helpers.clean_openslide_keys ( sld_properties), 'slide_md5': md5, 'prep_type': prep_type
                                 }
                dsa_slide_db['RawSlideData'].insert_one(slide_metadata)
                newly_processed +=1 
            else:
                print "UNABLE TO OPEN FILE??",sld
                ###Need to flag/load this in to an error database
        else:
            fs = os.path.getsize(sld)
            ### I had some weird fringe cases where the filesize was different.. but the filename was the same... so I needed to flag these
            ### This was the least expensive operation I had to find a "weird" file..
        slides_processed +=1        
        output = "Total Processed: %d  Newly Processed: %d Dup Slides or Already Loaded: %d  RESCANNED Slides %d" % (slides_processed, newly_processed, dup_slide, rescanned_slides )
        dsa.LinePrinter(output)


ADRC61-124 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-124
106 SVS or NDPI files were located
Total Processed: 106  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-10 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-10
58 SVS or NDPI files were located
Total Processed: 58  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-128 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-128
99 SVS or NDPI files were located
Total Processed: 99  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-40 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-40
59 SVS or NDPI files were located
Total Processed: 59  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-60 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-60
10 SVS or NDPI files were located
Total Processed: 10  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0@eaDir is being processed /GLOBAL_SCRATCH/ADRC/@eaDir
0 SVS or NDPI files were located
ADRC60-150 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-150
112 SVS or NDPI files were located
Total Processed: 112  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-120 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-120
2 SVS or NDPI files were located
Total Processed: 2  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC57-85 is being processed /GLOBAL_SCRATCH/ADRC/ADRC57-85
25 SVS or NDPI files were located
Total Processed: 25  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC55-200 is being processed /GLOBAL_SCRATCH/ADRC/ADRC55-200
14 SVS or NDPI files were located
Total Processed: 14  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC49-07 is being processed /GLOBAL_SCRATCH/ADRC/ADRC49-07
70 SVS or NDPI files were located
Total Processed: 70  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-11 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-11
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-2 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-2
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-134 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-134
11 SVS or NDPI files were located
Total Processed: 11  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0.ipynb_checkpoints is being processed /GLOBAL_SCRATCH/ADRC/.ipynb_checkpoints
0 SVS or NDPI files were located
SamMarch20 is being processed /GLOBAL_SCRATCH/ADRC/SamMarch20
28 SVS or NDPI files were located
Total Processed: 28  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-139 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-139
91 SVS or NDPI files were located
Total Processed: 91  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-125 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-125
150 SVS or NDPI files were located
Total Processed: 150  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC53-300 is being processed /GLOBAL_SCRATCH/ADRC/ADRC53-300
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC56-112 is being processed /GLOBAL_SCRATCH/ADRC/ADRC56-112
21 SVS or NDPI files were located
Total Processed: 21  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC43-24 is being processed /GLOBAL_SCRATCH/ADRC/ADRC43-24
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-73 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-73
89 SVS or NDPI files were located
Total Processed: 89  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-74 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-74
112 SVS or NDPI files were located
Total Processed: 47  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-74/Biels/ADRC62-74_A4_biels.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-74/Biels/ADRC62-74_A4_biels.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC62-74/Biels/ADRC62-74_A4_biels.ndpi
Total Processed: 112  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-84 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-84
104 SVS or NDPI files were located
Total Processed: 103  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
Verify failed with: (u"Can't validate JPEG for directory 0: Expected marker at 4294972165, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC61-84/Tau/ADRC61-84_A15_US-tau.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC61-84/Tau/ADRC61-84_A15_US-tau.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC61-84/Tau/ADRC61-84_A15_US-tau.ndpi
Total Processed: 104  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-14 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-14
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-179 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-179
96 SVS or NDPI files were located
Total Processed: 96  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC49-04 is being processed /GLOBAL_SCRATCH/ADRC/ADRC49-04
18 SVS or NDPI files were located
Total Processed: 18  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC56-97 is being processed /GLOBAL_SCRATCH/ADRC/ADRC56-97
116 SVS or NDPI files were located
Total Processed: 116  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-06 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-06
64 SVS or NDPI files were located
Total Processed: 26  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972345, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC51-06/BIELS/ADRC51-06_10_BIELS.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC51-06/BIELS/ADRC51-06_10_BIELS.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC51-06/BIELS/ADRC51-06_10_BIELS.ndpi
Total Processed: 27  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC51-06/BIELS/ADRC51-06_8_BIELS.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC51-06/BIELS/ADRC51-06_8_BIELS.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC51-06/BIELS/ADRC51-06_8_BIELS.ndpi
Total Processed: 37  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC51-06/To_Be_Sorted/ADRC51-06_8_S-M.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC51-06/To_Be_Sorted/ADRC51-06_8_S-M.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC51-06/To_Be_Sorted/ADRC51-06_8_S-M.ndpi
Total Processed: 64  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC36-04 is being processed /GLOBAL_SCRATCH/ADRC/ADRC36-04
3 SVS or NDPI files were located
Total Processed: 3  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0NeuroPathology is being processed /GLOBAL_SCRATCH/ADRC/NeuroPathology
291 SVS or NDPI files were located
Total Processed: 207  Newly Processed: 207 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972181, found none",)
Verify failed with: (u"Can't validate JPEG for directory 0: Expected marker at 4294972180, found none",)
Verify failed with: (u"Can't validate JPEG for directory 0: Expected marker at 4294972358, found none",)
Verify failed with:
Openslide returned an error om tje StandardError block /GLOBAL_SCRATCH/ADRC/NeuroPathology/2015-12-18/1000871.svs
SHIT IT DIED!
insert into `corrupt_or_unreadable_ndpi_files` (full_file_name,filesize) Values ('/GLOBAL_SCRATCH/ADRC/NeuroPathology/2015-12-18/1000871.svs',4748214272) 
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/NeuroPathology/2015-12-18/1000871.svs
Total Processed: 291  Newly Processed: 289 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC59-164 is being processed /GLOBAL_SCRATCH/ADRC/ADRC59-164
115 SVS or NDPI files were located
Total Processed: 80  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 ("cannot identify image file '/GLOBAL_SCRATCH/ADRC/NeuroPathology/2015-12-18/1000871.svs'",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC59-164/To_Be_Sorted/ADRC59-164_1J.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC59-164/To_Be_Sorted/ADRC59-164_1J.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC59-164/To_Be_Sorted/ADRC59-164_1J.ndpi
Total Processed: 115  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC56-18 is being processed /GLOBAL_SCRATCH/ADRC/ADRC56-18
78 SVS or NDPI files were located
Total Processed: 78  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC59-81 is being processed /GLOBAL_SCRATCH/ADRC/ADRC59-81
126 SVS or NDPI files were located
Total Processed: 126  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-110 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-110
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-143 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-143
106 SVS or NDPI files were located
Total Processed: 106  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC47-60 is being processed /GLOBAL_SCRATCH/ADRC/ADRC47-60
20 SVS or NDPI files were located
Total Processed: 20  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-6 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-6
92 SVS or NDPI files were located
Total Processed: 92  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972165, found none",)
Verify failed with:
ADRC60-48 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-48
96 SVS or NDPI files were located
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC60-48/ADRC60-48_1B_AB.svs
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC60-48/ADRC60-48_1B_AB.svs
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC60-48/ADRC60-48_1B_AB.svs
Total Processed: 96  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-97 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-97
98 SVS or NDPI files were located
Total Processed: 98  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-64 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-64
26 SVS or NDPI files were located
Total Processed: 26  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC54-125 is being processed /GLOBAL_SCRATCH/ADRC/ADRC54-125
56 SVS or NDPI files were located
Total Processed: 56  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0newcerebptstoload is being processed /GLOBAL_SCRATCH/ADRC/newcerebptstoload
19 SVS or NDPI files were located
Total Processed: 19  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-71 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-71
105 SVS or NDPI files were located
Total Processed: 105  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-110 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-110
87 SVS or NDPI files were located
Total Processed: 87  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-20 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-20
91 SVS or NDPI files were located
Total Processed: 91  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC56-06 is being processed /GLOBAL_SCRATCH/ADRC/ADRC56-06
129 SVS or NDPI files were located
Total Processed: 129  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC53-55 is being processed /GLOBAL_SCRATCH/ADRC/ADRC53-55
11 SVS or NDPI files were located
Total Processed: 11  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC59-8 is being processed /GLOBAL_SCRATCH/ADRC/ADRC59-8
90 SVS or NDPI files were located
Total Processed: 90  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-137 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-137
86 SVS or NDPI files were located
Total Processed: 86  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-160 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-160
118 SVS or NDPI files were located
Total Processed: 118  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-70 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-70
99 SVS or NDPI files were located
Total Processed: 99  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-75 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-75
100 SVS or NDPI files were located
Total Processed: 100  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC55-100 is being processed /GLOBAL_SCRATCH/ADRC/ADRC55-100
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC56-160 is being processed /GLOBAL_SCRATCH/ADRC/ADRC56-160
22 SVS or NDPI files were located
Total Processed: 22  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-10 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-10
2 SVS or NDPI files were located
Total Processed: 2  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC39-44 is being processed /GLOBAL_SCRATCH/ADRC/ADRC39-44
8 SVS or NDPI files were located
Total Processed: 8  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC58-146 is being processed /GLOBAL_SCRATCH/ADRC/ADRC58-146
15 SVS or NDPI files were located
Total Processed: 15  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-88 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-88
85 SVS or NDPI files were located
Total Processed: 85  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-41 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-41
98 SVS or NDPI files were located
Total Processed: 98  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-62 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-62
53 SVS or NDPI files were located
Total Processed: 18  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u'Unsupported TIFF compression: 33005',)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-62/Biels/ADRC62-62_A1_BIELS.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-62/Biels/ADRC62-62_A1_BIELS.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC62-62/Biels/ADRC62-62_A1_BIELS.ndpi
Total Processed: 53  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-112 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-112
84 SVS or NDPI files were located
Total Processed: 28  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972165, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC61-112/Biels/ADRC61-112_A6_biels.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC61-112/Biels/ADRC61-112_A6_biels.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC61-112/Biels/ADRC61-112_A6_biels.ndpi
Total Processed: 84  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC55-194 is being processed /GLOBAL_SCRATCH/ADRC/ADRC55-194
82 SVS or NDPI files were located
Total Processed: 82  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-83 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-83
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-41 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-41
3 SVS or NDPI files were located
Total Processed: 3  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-38 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-38
120 SVS or NDPI files were located
Total Processed: 120  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-60 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-60
95 SVS or NDPI files were located
Total Processed: 95  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-32 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-32
111 SVS or NDPI files were located
Total Processed: 111  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC54-76 is being processed /GLOBAL_SCRATCH/ADRC/ADRC54-76
11 SVS or NDPI files were located
Total Processed: 11  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0NickSlidesFeb20 is being processed /GLOBAL_SCRATCH/ADRC/NickSlidesFeb20
0 SVS or NDPI files were located
ADRC59-35 is being processed /GLOBAL_SCRATCH/ADRC/ADRC59-35
12 SVS or NDPI files were located
Total Processed: 12  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC47-61 is being processed /GLOBAL_SCRATCH/ADRC/ADRC47-61
3 SVS or NDPI files were located
Total Processed: 3  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC55-108 is being processed /GLOBAL_SCRATCH/ADRC/ADRC55-108
16 SVS or NDPI files were located
Total Processed: 16  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-83 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-83
61 SVS or NDPI files were located
Total Processed: 61  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-94 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-94
7 SVS or NDPI files were located
Total Processed: 7  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC59-91 is being processed /GLOBAL_SCRATCH/ADRC/ADRC59-91
111 SVS or NDPI files were located
Total Processed: 23  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972004, found none",)
/home/dagutman/anaconda2/envs/ADRCFlask/lib/python2.7/site-packages/PIL/TiffImagePlugin.py:694: UserWarning: Corrupt EXIF data.  Expecting to read 2 bytes but only got 0. 
  warnings.warn(str(msg))
Openslide returned an error om tje StandardError block /GLOBAL_SCRATCH/ADRC/ADRC59-91/HE/ADRC59-91_1F_HE.ndpi
SHIT IT DIED!
insert into `corrupt_or_unreadable_ndpi_files` (full_file_name,filesize) Values ('/GLOBAL_SCRATCH/ADRC/ADRC59-91/HE/ADRC59-91_1F_HE.ndpi',2710044672) 
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC59-91/HE/ADRC59-91_1F_HE.ndpi
Total Processed: 111  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC58-119 is being processed /GLOBAL_SCRATCH/ADRC/ADRC58-119
25 SVS or NDPI files were located
Total Processed: 25  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC53-95 is being processed /GLOBAL_SCRATCH/ADRC/ADRC53-95
13 SVS or NDPI files were located
Total Processed: 13  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC53-385 is being processed /GLOBAL_SCRATCH/ADRC/ADRC53-385
7 SVS or NDPI files were located
Total Processed: 7  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-151 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-151
109 SVS or NDPI files were located
Total Processed: 109  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC56-52 is being processed /GLOBAL_SCRATCH/ADRC/ADRC56-52
10 SVS or NDPI files were located
Total Processed: 10  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-81 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-81
205 SVS or NDPI files were located
Total Processed: 86  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
Verify failed with: ("cannot identify image file '/GLOBAL_SCRATCH/ADRC/ADRC59-91/HE/ADRC59-91_1F_HE.ndpi'",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC60-81/HE/CHECK_FOR_DUPS/A10-81_1F_BLANK.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC60-81/HE/CHECK_FOR_DUPS/A10-81_1F_BLANK.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC60-81/HE/CHECK_FOR_DUPS/A10-81_1F_BLANK.ndpi
Total Processed: 205  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-21 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-21
16 SVS or NDPI files were located
Total Processed: 16  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-14 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-14
49 SVS or NDPI files were located
Total Processed: 49  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC50-19 is being processed /GLOBAL_SCRATCH/ADRC/ADRC50-19
19 SVS or NDPI files were located
Total Processed: 19  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-56 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-56
94 SVS or NDPI files were located
Total Processed: 94  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-152 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-152
7 SVS or NDPI files were located
Total Processed: 7  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-59 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-59
92 SVS or NDPI files were located
Total Processed: 92  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC58-162 is being processed /GLOBAL_SCRATCH/ADRC/ADRC58-162
10 SVS or NDPI files were located
Total Processed: 10  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC54-99 is being processed /GLOBAL_SCRATCH/ADRC/ADRC54-99
13 SVS or NDPI files were located
Total Processed: 13  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-85 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-85
113 SVS or NDPI files were located
Total Processed: 44  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294971805, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-85/Biels/ADRC62-85_A11_biels.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-85/Biels/ADRC62-85_A11_biels.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC62-85/Biels/ADRC62-85_A11_biels.ndpi
Total Processed: 46  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-85/Biels/ADRC62-85_A1_biels_ihp.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-85/Biels/ADRC62-85_A1_biels_ihp.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC62-85/Biels/ADRC62-85_A1_biels_ihp.ndpi
Total Processed: 113  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0Neuro_Degen_Training_Slides is being processed /GLOBAL_SCRATCH/ADRC/Neuro_Degen_Training_Slides
152 SVS or NDPI files were located
Total Processed: 143  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294971985, found none",)
Verify failed with: (u"Can't validate JPEG for directory 0: Expected marker at 4294972165, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/Neuro_Degen_Training_Slides/PSP-27.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/Neuro_Degen_Training_Slides/PSP-27.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/Neuro_Degen_Training_Slides/PSP-27.ndpi
Total Processed: 152  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC59-155 is being processed /GLOBAL_SCRATCH/ADRC/ADRC59-155
96 SVS or NDPI files were located
Total Processed: 96  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC61-81 is being processed /GLOBAL_SCRATCH/ADRC/ADRC61-81
86 SVS or NDPI files were located
Total Processed: 86  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC62-120 is being processed /GLOBAL_SCRATCH/ADRC/ADRC62-120
106 SVS or NDPI files were located
Total Processed: 10  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294971812, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-120/Uncategorized/ADRC62-120_A4_blank.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-120/Uncategorized/ADRC62-120_A4_blank.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC62-120/Uncategorized/ADRC62-120_A4_blank.ndpi
Total Processed: 51  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972165, found none",)
Verify failed with:
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-120/Biels/ADRC62-120_A3_BIELS.ndpi
Openslide returned an error /GLOBAL_SCRATCH/ADRC/ADRC62-120/Biels/ADRC62-120_A3_BIELS.ndpi
SHIT IT DIED!
UNABLE TO OPEN FILE?? /GLOBAL_SCRATCH/ADRC/ADRC62-120/Biels/ADRC62-120_A3_BIELS.ndpi
Total Processed: 106  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC54-155 is being processed /GLOBAL_SCRATCH/ADRC/ADRC54-155
25 SVS or NDPI files were located
Total Processed: 25  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0Miscellaneous is being processed /GLOBAL_SCRATCH/ADRC/Miscellaneous
31 SVS or NDPI files were located
Total Processed: 31  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-129 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-129
112 SVS or NDPI files were located
Total Processed: 112  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC53-138 is being processed /GLOBAL_SCRATCH/ADRC/ADRC53-138
8 SVS or NDPI files were located
Total Processed: 8  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC57-48 is being processed /GLOBAL_SCRATCH/ADRC/ADRC57-48
23 SVS or NDPI files were located
Total Processed: 23  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC60-63 is being processed /GLOBAL_SCRATCH/ADRC/ADRC60-63
82 SVS or NDPI files were located
Total Processed: 82  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0NickMarch18 is being processed /GLOBAL_SCRATCH/ADRC/NickMarch18
0 SVS or NDPI files were located
ADRC54-48 is being processed /GLOBAL_SCRATCH/ADRC/ADRC54-48
1 SVS or NDPI files were located
Total Processed: 1  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0ADRC51-33 is being processed /GLOBAL_SCRATCH/ADRC/ADRC51-33
16 SVS or NDPI files were located
Total Processed: 16  Newly Processed: 0 Dup Slides or Already Loaded: 0  RESCANNED Slides 0
 (u"Can't validate JPEG for directory 0: Expected marker at 4294972345, found none",)

In [ ]:
#dsa_slide_db['DSA_Slide_Data'].delete_many({})
#print dsa_slide_db['DSA_Slide_Data'].count()

In [ ]:
### I now want to reformat all of this data to make it more useful for DSA ...

In [25]:
## Since this is pretty easy to recreate, I'm going to empty the current database
dsa_slide_db['DSA_Slide_Data'].delete_many({}) ### DELETE THE CURRENT DSA COLLECTION SINCE I AM GOING TO UPDATE IT
all_slides = dsa_slide_db['RawSlideData'].find()


FacetSearchFields = {}
FacetSearchFields['Annotated'] = False
FacetSearchFields['Annotated'] = False


for s in all_slides:
    slide_dict = {}
    #pt_id = s['slide_w_path'].split('/')[-1]
    slideGroup = s['slide_w_path'].split('/')[-3]  ### THIS WILL ALMOST CERTAINLY CHANGE FOR EACH IMAGE SET
    pt_id = s['slide_w_path'].split('/')[-1].split('.')[0]
    
    stain_type = 'UNK'
    stain_type = s['slide_w_path'].split('/')[-2]

    slide_dict = s.copy()  ### BEWARE!!!! I WAS originally passing a reference not a copy
    slide_dict.pop('_id',None)
    
    slide_dict['pt_id'] = pt_id
    slide_dict['stain_type'] = stain_type
    ### Obfuscating the global file path so everything is relative to some base path for the archive/
    slide_dict['thumbnail_image'] = '/thumbnail/' + s['slide_w_path'].replace(slide_root,'')
    slide_dict['iip_slide_w_path'] = "/fcgi-bin/iipsrvOpenslide.fcgi?DeepZoom=" +s['slide_w_path']+'.dzi'

    slide_dict['slideLabel_image'] = '/macroImage/' + s['slide_w_path'].replace(slide_root,'')
    
    slide_dict['slide_w_path'] = '/DZIMS/' + s['slide_w_path'].replace(slide_root,'')+'.dzi'
    slide_dict['slideGroup'] = slideGroup
    slide_dict['FacetSet'] = FacetSearchFields 
    slide_dict['slideLabel_image'] = '/labelImage/' + s['slide_w_path'].replace(slide_root,'')

    slide_dict['iip_thumbnail'] =  "/fcgi-bin/iipsrvOpenslide.fcgi?FIF=" +s['slide_w_path']+'&WID=200&CVT=jpeg'
    ## See fi I can find an aperioFile to match
    
    sln = slide_dict['slide_name'].replace('.ndpi','')
    if sln in xml_data_set:

        slide_dict['HasAperioXML'] = True
        slide_dict['AperioXMLUrl'] = xml_data_set[sln]['webPath']
    else:
        slide_dict['HasAperioXML'] = False


    
    dsa_slide_db['DSA_Slide_Data'].insert_one(slide_dict)

In [ ]:


In [23]:
dsa_slide_db['DSA_Slide_Data'].find_one({'HasAperioXML':True})


Out[23]:
{u'AperioXMLUrl': u'xmls/ADRC60-129/ADRC60-129_1F_Abeta.xml',
 u'FacetSet': {u'Annotated': False},
 u'HasAperioXML': True,
 u'_id': ObjectId('5755e6012f9b2e52995970a5'),
 u'file_size': 1688107315,
 u'height': 81408,
 u'iip_slide_w_path': u'/fcgi-bin/iipsrvOpenslide.fcgi?DeepZoom=/GLOBAL_SCRATCH/ADRC/ADRC60-129/aBeta/ADRC60-129_1F_Abeta.ndpi.dzi',
 u'orig_resolution': 40,
 u'prep_type': u'Unknown',
 u'pt_id': u'ADRC60-129_1F_Abeta',
 u'sld_properties': {u'hamamatsu_AHEX[0]': u'74270C57AC1B78130C3FAC0B781B083FAC0D7421083FAC0F70210847A40D703F086BA83370270847A41170230043A0137027084BA01B701F0843A4116C270C47A417781B0C47A80B742B1047AC177823104FAC197833104FA8237447186FB0396CB0',
  u'hamamatsu_AHEX[0]_fluorescence': u'004F0057005B0037003F00470043003F0047004B003F004B004F004B004F00730073007F0057004B004F004F0043004F004F004B0053004700430047004B004B004F003F00470043005700470053004F00570057005F005700630073007B00830F60',
  u'hamamatsu_AHEX[0]_ploidy': u'004F0057005B0037003F00470043003F0047004B003F004B004F004B004F00730073007F0057004B004F004F0043004F004F004B0053004700430047004B004B004F003F00470043005700470053004F00570057005F005700630073007B00830F60',
  u'hamamatsu_AHEX[1]': u'7C2B1857B41D8413183FB40B801F143FB40F8023143FB0137C231447AC0F78411867B0337C271447AC1378271043A815782B144BA81B7C1F1843AC1378271847AC19801B1847AC0D802F2047B01B84272053B01D84371C53B02780472473B43D54FE',
  u'hamamatsu_AHEX[1]_fluorescence': u'98271C5BB42398131C43B413981B1447B41794211447B0179021144FAC17903D146FB03B9027104FAC1990231047A81B9023104FA81F901D1447AC178C23144FAC1D941B144BAC1394251C47B01B981F1C53B01F982F1C53B027943F2473B43F954E',
  u'hamamatsu_AHEX[1]_ploidy': u'98271C5BB42398131C43B413981B1447B41794211447B0179021144FAC17903D146FB03B9027104FAC1990231047A81B9023104FA81F901D1447AC178C23144FAC1D941B144BAC1394251C47B01B981F1C53B01F982F1C53B027943F2473B43F954E',
  u'hamamatsu_AHEX[2]': u'004F0057005B0037003F00470043003F0047004B003F004B004F004B004F00730073007F0057004B004F004F0043004F004F004B0053004700430047004B004B004F003F00470043005700470053004F00570057005F005700630073007B00830F60',
  u'hamamatsu_AHEX[2]_fluorescence': u'004F0057005B0037003F00470043003F0047004B003F004B004F004B004F00730073007F0057004B004F004F0043004F004F004B0053004700430047004B004B004F003F00470043005700470053004F00570057005F005700630073007B00830F60',
  u'hamamatsu_AHEX[2]_ploidy': u'004F0057005B0037003F00470043003F0047004B003F004B004F004B004F00730073007F0057004B004F004F0043004F004F004B0053004700430047004B004B004F003F00470043005700470053004F00570057005F005700630073007B00830F60',
  u'hamamatsu_Created': u'2009/11/10',
  u'hamamatsu_MHLN[0]_24bit': u'_1357CJJHqKOOTjx',
  u'hamamatsu_MHLN[0]_36bit': u'_1357GGEHqKOOTjx',
  u'hamamatsu_MHLN[1]_24bit': u'_1357FHEHqKOOTjx',
  u'hamamatsu_MHLN[1]_36bit': u'_1359DEEHqKOOTjx',
  u'hamamatsu_MHLN[2]_24bit': u'_1357CJJHqKOOTjx',
  u'hamamatsu_MHLN[2]_36bit': u'_1357DEEHqKOOTjx',
  u'hamamatsu_Macro_S/N': u'1Y0257',
  u'hamamatsu_NDP_S/N': u'1Y0203',
  u'hamamatsu_Objective_Lens_Magnificant': u'35.16',
  u'hamamatsu_PSHV': u'228',
  u'hamamatsu_PSHV_10x': u'170',
  u'hamamatsu_PSHV_40x': u'228',
  u'hamamatsu_PSHV_ploidy': u'200',
  u'hamamatsu_PSHV_ploidy_10x': u'140',
  u'hamamatsu_Product': u'C9600-12',
  u'hamamatsu_Reference': u'A10-129-1F_Abeta_10.27.10',
  u'hamamatsu_SourceLens': u'40',
  u'hamamatsu_Updated': u'2011/12/15',
  u'hamamatsu_XOffsetFromSlideCentre': u'8563374',
  u'hamamatsu_YOffsetFromSlideCentre': u'818385',
  u'hamamatsu_YRNP[0]': u'0,0,0,0',
  u'hamamatsu_YRNP[1]': u'0,0,0,0',
  u'hamamatsu_YRNP[2]': u'0,0,0,0',
  u'hamamatsu_calibration_version': u'0',
  u'hamamatsu_ccd_height': u'0',
  u'hamamatsu_ccd_width': u'9280',
  u'hamamatsu_ccd_width_ploidy': u'9302',
  u'hamamatsu_coarse_focus_pitch': u'4870',
  u'hamamatsu_colorfilterID': u'0',
  u'hamamatsu_cube_kind': u'0',
  u'hamamatsu_exposure_barcode_macro': u'320',
  u'hamamatsu_exposure_slide_darkfield_macro': u'10',
  u'hamamatsu_exposure_slide_macro': u'20',
  u'hamamatsu_fine_focus_pitch': u'250',
  u'hamamatsu_focalplane_leftbottom': u'92401,691978,112623',
  u'hamamatsu_focalplane_lefttop': u'92401,491978,112204',
  u'hamamatsu_focalplane_rightbottom': u'492401,691978,113325',
  u'hamamatsu_focalplane_righttop': u'492401,491978,112970',
  u'hamamatsu_lane_shift_amount': u'1',
  u'hamamatsu_roi_barcode_macro': u'978,272,1258,594',
  u'hamamatsu_roi_slide_macro': u'54,243,1261,656',
  u'hamamatsu_slant_leftbottom': u'91970,693576,112627',
  u'hamamatsu_slant_lefttop': u'91970,493576,112169',
  u'hamamatsu_slant_rightbottom': u'491970,493576,112974',
  u'hamamatsu_slant_righttop': u'491970,693576,113367',
  u'hamamatsu_slide_tickness': u'0',
  u'hamamatsu_stage_center': u'182401,591978',
  u'hamamatsu_system_version': u'1.0',
  u'hamamatsu_target_white_intensity': u'235',
  u'hamamatsu_valid_DDKP': u'1',
  u'hamamatsu_valid_DLTP': u'0',
  u'hamamatsu_valid_DSHP': u'1',
  u'hamamatsu_variable_exposuretime': u'0',
  u'hamamatsu_zCoarse[0]': u'1,7392,0,100',
  u'hamamatsu_zCoarse[1]': u'1,1946,0,100',
  u'hamamatsu_zCoarse[2]': u'0,0,0,0',
  u'hamamatsu_zFine[0]': u'1,295680,0,100',
  u'hamamatsu_zFine[1]': u'1,77824,0,100',
  u'hamamatsu_zFine[2]': u'0,0,0,0',
  u'openslide_level-count': u'9',
  u'openslide_level[0]_downsample': u'1',
  u'openslide_level[0]_height': u'81408',
  u'openslide_level[0]_tile-height': u'8',
  u'openslide_level[0]_tile-width': u'3968',
  u'openslide_level[0]_width': u'134912',
  u'openslide_level[1]_downsample': u'2',
  u'openslide_level[1]_height': u'40704',
  u'openslide_level[1]_tile-height': u'4',
  u'openslide_level[1]_tile-width': u'1984',
  u'openslide_level[1]_width': u'67456',
  u'openslide_level[2]_downsample': u'4',
  u'openslide_level[2]_height': u'20352',
  u'openslide_level[2]_tile-height': u'8',
  u'openslide_level[2]_tile-width': u'992',
  u'openslide_level[2]_width': u'33728',
  u'openslide_level[3]_downsample': u'8',
  u'openslide_level[3]_height': u'10176',
  u'openslide_level[3]_tile-height': u'4',
  u'openslide_level[3]_tile-width': u'496',
  u'openslide_level[3]_width': u'16864',
  u'openslide_level[4]_downsample': u'16',
  u'openslide_level[4]_height': u'5088',
  u'openslide_level[4]_tile-height': u'8',
  u'openslide_level[4]_tile-width': u'248',
  u'openslide_level[4]_width': u'8432',
  u'openslide_level[5]_downsample': u'32',
  u'openslide_level[5]_height': u'2544',
  u'openslide_level[5]_tile-height': u'4',
  u'openslide_level[5]_tile-width': u'124',
  u'openslide_level[5]_width': u'4216',
  u'openslide_level[6]_downsample': u'64',
  u'openslide_level[6]_height': u'1272',
  u'openslide_level[6]_tile-height': u'1272',
  u'openslide_level[6]_tile-width': u'2108',
  u'openslide_level[6]_width': u'2108',
  u'openslide_level[7]_downsample': u'128',
  u'openslide_level[7]_height': u'636',
  u'openslide_level[7]_tile-height': u'636',
  u'openslide_level[7]_tile-width': u'1054',
  u'openslide_level[7]_width': u'1054',
  u'openslide_level[8]_downsample': u'256',
  u'openslide_level[8]_height': u'318',
  u'openslide_level[8]_tile-height': u'318',
  u'openslide_level[8]_tile-width': u'527',
  u'openslide_level[8]_width': u'527',
  u'openslide_mpp-x': u'0.22656727915354463',
  u'openslide_mpp-y': u'0.22753128555176336',
  u'openslide_objective-power': u'40',
  u'openslide_quickhash-1': u'935e559d105f411b47674905ec903b94532af1c66af4949de9bd29515915cf40',
  u'openslide_vendor': u'hamamatsu',
  u'tiff_DateTime': u'2012:06:19 18:53:45',
  u'tiff_Make': u'Hamamatsu',
  u'tiff_Model': u'C9600-12',
  u'tiff_ResolutionUnit': u'centimeter',
  u'tiff_Software': u'NDP.scan 2.3.25',
  u'tiff_XResolution': u'44137',
  u'tiff_YResolution': u'43950'},
 u'slideGroup': u'ADRC60-129',
 u'slideLabel_image': u'/labelImage/ADRC60-129/aBeta/ADRC60-129_1F_Abeta.ndpi',
 u'slide_md5': None,
 u'slide_name': u'ADRC60-129_1F_Abeta.ndpi',
 u'slide_w_path': u'/DZIMS/ADRC60-129/aBeta/ADRC60-129_1F_Abeta.ndpi.dzi',
 u'stain_type': u'aBeta',
 u'thumbnail_image': u'/thumbnail/ADRC60-129/aBeta/ADRC60-129_1F_Abeta.ndpi',
 u'width': 134912}

In [ ]:
print dsa_slide_db['DSA_Slide_Data'].distinct('slideGroup')

In [ ]:
cur= dsa_slide_db['DSA_Slide_Data'].find({'pt_id':'ADRC50-10'})
for c in cur:
    print c

In [ ]:
dsa_slide_db['DSA_Slide_Data'].find_one()