In [ ]:
#Parse the fileInfo sheet and return the fileIDs and sample grouping values
#     export_fileIds,myArray,fileInfo,filename = metatlas_func.get_FileInfo(myFileInfo)

#Add the group names to each file at NERSC
#     metatlas_func.addGroupInfoToFiles(client,fileInfo)

#get the specification for each compound
#     dictData = metatlas_func.getAtlasData(client,dictId)

#get the EIC, for each compound (in reality, it should get peak summary, MSMS, EIC, and Spectrum)
#     data = metatlas_func.getData(export_fileIds,dictData,myArray,client,polarity,extraTime)

#get the total intensity chromatogram for each file
#     ticData = metatlas_func.getAllTICS(export_fileIds,polarity,myArray,client)

#pickle everything in case NERSC shuts down
#     metatlas_func.saveData(myFolder,ticData,dictId,myExperimentID,dictData,data,fileInfo,export_fileIds)

In [ ]:
import json
import getpass
import re
import csv
import os
import numpy as np
import pickle
from matplotlib import pyplot as plt
from itertools import groupby
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
import codecs

In [ ]:
import requests
client = requests.Session()
client = metatlas_func.authenticateUser(client,'bpb')
if len(client.cookies)>1:
    print "Successfully authenticated"
else:
    print "Login failed.  Either try again or contact NERSC to reset your password.  Use http://nim.nersc.gov to manage user account settings."

In [ ]:
def authenticateUser(client,username):
    password = getpass.getpass()
    authURL = 'https://metatlas.nersc.gov/client/login/'
    # Retrieve the CSRF token first
    client.get(authURL) # sets cookie
    csrftoken = client.cookies['csrftoken']
    login_data = dict(username=username, password=password, csrfmiddlewaretoken=csrftoken, next='/')
    r = client.post(authURL, data=login_data, headers=dict(Referer=authURL))
    return client

def shareExperiment(client,username,myExperimentID):
    # # Share the experiment and dictionary with another user.
    payload = {"user":username,"perms":["read","write"]}
    sendData=json.dumps(payload)
    url = 'https://metatlas.nersc.gov/api/experiment/%s/share/' % myExperimentID
    r = client.post(url, data=sendData)
    # # print r.content

def shareAtlas(client,username,dictId):
    payload = {"user":username,"perms":["read","write"]}
    sendData=json.dumps(payload)
    url = 'https://metatlas.nersc.gov/api/dict/%s/share/' % dictId
    r = client.post(url, data=sendData)

def makeFileInfoSheet(client,myExperimentID,finfo_filename):
    url = 'https://metatlas.nersc.gov/api/experiment/%s' % myExperimentID
    r = client.get(url)
    files = json.loads(r.content)
    fileInfo = {'fid':[],'name':[],'status':[]};
    fid = open(finfo_filename,'wb')
    fid.write('index\tstatus\tname\tfid\tpolarity\tgroup\tinclusion_order\tnormalization_factor\tretention_correction\n')
    for i,myRun in enumerate(files[u'runs']):
        splitPathToFile = os.path.split(myRun[u'in_file'])
        fid.write('%d\t%d\t%s\t%d\tpos\tgroup1\n' % (i,myRun[u'pending'],splitPathToFile[1],myRun[u'_id'][u'file_id']))
        if myRun[u'pending'] == 0:
            fileInfo['fid'].append(myRun[u'_id'][u'file_id'])
            fileInfo['name'].append(splitPathToFile[1])
            fileInfo['status'].append(myRun[u'pending']) #only keep if status is 0
    pathYouWant = splitPathToFile[0] # TODO: we will have to see what this will do on a window's computer.  taking a linux path and using os.
    fid.close()

def exportAtlas(client,atlasID,filename):
    url = 'https://metatlas.nersc.gov/api/dict/%s/' % atlasID
    r = client.get(url)
    dictData = json.loads(r.text)
    # export an atlas
    myList = ['name','pubchem_id','formula','neutral_mass','mz','mz_threshold','adducts','rt_max','rt_min','rt_peak']
    import csv
    fid = open(filename,'wb')
    for listItem in myList:
        fid.write('%s\t' % listItem)
    fid.write('\n')
    for i,compound in enumerate(dictData[u'compounds']):
        for listItem in myList:
            if listItem == 'name':
                fid.write('%s\t' % compound[listItem].encode('utf-8'))
            else:
                fid.write('%s\t' % compound[listItem])
        fid.write('\n')
    fid.close()

# import the fileInfo sheet with annotated group information, polarity, and plot order
def get_FileInfo(myFileInfo):
    myArray = 'lcms_test_1' #files[u'runs'][0][u'_id'][u'array_name']
    filename = '%s' % (myFileInfo)
    with open(filename,'rU') as file_object:
        newfileInfo = list(csv.DictReader(file_object, dialect='excel-tab'))
    keys = newfileInfo[0].iterkeys()
    fileInfo = {key: [d[key] for d in newfileInfo] for key in keys}
    fileInfo['fid'] = map(int, fileInfo['fid'])
    fileInfo['index'] = map(int, fileInfo['index'])
    fileInfo['inclusion_order'] = map(int, fileInfo['inclusion_order'])
    fileInfo['status'] = map(int, fileInfo['status'])
    fileInfo['normalization_factor'] = map(float, fileInfo['normalization_factor'])
    fileInfo['retention_correction'] = map(float, fileInfo['retention_correction'])

    idx = np.argsort(fileInfo['inclusion_order'])
    export_fileIds = np.asarray(fileInfo['fid'])[idx]
    return export_fileIds,myArray,fileInfo,filename

def addGroupInfoToFiles(client,fileInfo):
    for i,f in enumerate(fileInfo['fid']):
        url = 'https://metatlas.nersc.gov/api/metadata/lcms_test_1/%d/' % f
        r = client.patch(url, data=json.dumps({"sample_type": fileInfo['group'][i]}))

def createNewAtlas(client,atlasName,sampleDescription,methodDescription):
    # {"name":"","sample":"","method":""}
    payload = {"name":atlasName,"sample":sampleDescription,"method":methodDescription}
    sendData=json.dumps(payload)
    client.headers.update({'referer': 'https://metatlas.nersc.gov/dict/create'})
    url = 'https://metatlas.nersc.gov/api/dict/'
    r = client.post(url, data=sendData)
    retData = json.loads(r.text)
    return retData['id']


# curl 'https://metatlas.nersc.gov/api/dict/' -H 'Cookie: __utma=25799074.440104800.1398372604.1436306658.1436966664.89; __utmc=25799074; __utmz=25799074.1436966664.89.62.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); _ga=GA1.2.440104800.1398372604; __utmt=1; csrftoken=4WbUyL4Sg6GH5H11w7ggPHnp1Poh3VGq; sessionid=i8s8b49woppom27dy8u0c9fhwyom6n15; __utma=250901914.440104800.1398372604.1437097534.1437176628.217; __utmb=250901914.4.9.1437176632539; __utmc=250901914; __utmz=250901914.1434753096.197.3.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)' -H 'Origin: https://metatlas.nersc.gov' -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: en-US,en;q=0.8' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'Accept: */*' -H 'Referer: https://metatlas.nersc.gov/dict/create/' -H 'X-Requested-With: XMLHttpRequest' -H 'Connection: keep-alive' --data '{"name":"","sample":"","method":""}' --compressed

def addFromSpreadsheetToAtlas(client,filename,dictId):
    #add compounds to an atlas from a well formated spreadsheet that are new and
    #update any that were already there, but have changed

    with open(filename,'rU') as file_object:
        sheetData = list(csv.DictReader(file_object, dialect='excel-tab'))
    url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
    r = client.get(url)
    dictData = json.loads(r.text)

    for compound in sheetData:
    #     print compound
        compound['name'] = re.sub("\xca",'',compound['name'])
        if compound['name'][0] == ' ':
            compound['name'] = compound['name'][1:]
        cID = filter( lambda x: x[u'name']==compound['name'], dictData[u'compounds'])
    #     print cID
        if not cID:
            # a new entry is created if that compound name doesn't exist
            url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
    #                 url = 'https://metatlas.nersc.gov/api/compound/%s/' % cID[0][u'_id']
            r = client.post(url, data=json.dumps([compound]))
            print(r.text)
        else:
            # edit the entry if it already exists
            url = 'https://metatlas.nersc.gov/api/compound/%s/' % cID[0][u'_id']
    #         url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
            r = client.patch(url, data=json.dumps(compound))
            print(r.text)

def getAtlasData(client,dictId):
    url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
    r = client.get(url)
    dictData = json.loads(r.text)
    return dictData

def getExperimentData(client,myExperimentID):
    url = 'https://metatlas.nersc.gov/api/experiment/%s' % myExperimentID
    r = client.get(url)
    return json.loads(r.content)

def getEICForCompounds(compound,myArray,files_I_want,rtTol,client,polarity):
    if isinstance(files_I_want,int):
        myList = str(files_I_want)
    else:
        myList = ','.join(map(str, files_I_want))



    mz = float(compound[u'mz'])
    mzTol = float(compound[u'mz_threshold'])
    mzMin = mz - mz*mzTol/1.0e6
    mzMax = mz + mz*mzTol/1.0e6
    rtMin = float(compound[u'rt_min'])-rtTol
    rtMax = float(compound[u'rt_max'])+rtTol
    rtPeak = float(compound[u'rt_peak'])

    payload = {'L':1,'P':polarity,'arrayname':myArray,'fileidlist':myList,
              'max_mz':mzMax,'min_mz':mzMin,
              'min_rt':rtMin,'max_rt':rtMax,
              'nsteps':20000,'queryType':'XICofFile_mf'}
    url = 'https://metatlas.nersc.gov/api/run'
    r = client.get(url,params=payload)
    if r.content:
        data = np.asarray(json.loads(r.content))
        return data
    else:
        return []

def getData(export_fileIds,dictData,myArray,client,polarity,extraTime):
    import time
    data = []
    for i,compound in enumerate(dictData[u'compounds']):
        print i, compound['name']
        data.append(getEICForCompounds(compound,myArray,export_fileIds,extraTime,client,polarity))
        time.sleep(4) 
    return data

def getAllTICS(export_fileIds,polarity,myArray,client):
    # prototype function to get TIC for all runs
    # get TIC for all runs
    if isinstance(export_fileIds,int):
        myList = str(export_fileIds)
    else:
        myList = ','.join(map(str, export_fileIds))

    payload = {'L':1,'P':polarity,'arrayname':myArray,'fileidlist':myList,
              'max_mz':2000,'min_mz':100,
              'min_rt':1,'max_rt':300,
              'nsteps':20000,'queryType':'XICofFile_mf'}
    url = 'https://metatlas.nersc.gov/api/run'
    r = client.get(url,params=payload)
    ticData = np.asarray(json.loads(r.content))
    return ticData

def clusterRTCentroids(rt,cutoff):
    rt = np.asarray(rt)
    dists = np.abs(rt - rt[:, None])
    Y = linkage(dists, method='single', metric='euclidean')
    C = fcluster(Y,cutoff)
    return C

def makeDataMat(export_fileIds,data,fileInfo,dictData):
    # Build the datamat.  
    # Each row is a metabolite.  
    # Each column is a run that is indexed by its filename and group.
    dataMat = np.zeros((len(data),len(export_fileIds)))
    rtMat = np.zeros((len(data),len(export_fileIds)))
    rowLabels = []
    colLabels = []
    rowGroups = []
    colGroups = []

    rtCorr = []
    for f in export_fileIds:
        for i,f2 in enumerate(fileInfo['fid']):
            if f2 == f:
                rtCorr.append(fileInfo['retention_correction'][i])
    # do it once for all files (used below in clustergram)
    for i,myFile in enumerate(export_fileIds):
        for j,fid in enumerate(fileInfo['fid']):
            if fid == myFile:
                colLabels.append(fileInfo['name'][j])
                colGroups.append(fileInfo['group'][j])
                
    for i,datum in enumerate(data):
        rowLabels.append(dictData[u'compounds'][i]['name'])
        mz = float(dictData[u'compounds'][i][u'mz'])
        mzTol = float(dictData[u'compounds'][i][u'mz_threshold'])
        mzMin = mz - mz*mzTol/1.0e6
        mzMax = mz + mz*mzTol/1.0e6
        rtMin = float(dictData[u'compounds'][i][u'rt_min'])
        rtMax = float(dictData[u'compounds'][i][u'rt_max'])
        rowGroups.append('Metabolite')
        for j,myFile in enumerate(export_fileIds):
            if datum.size>3:

                idx = np.logical_and( datum[:,2]==myFile, datum[:,0]>=(rtMin+float(rtCorr[j])), datum[:,0]<=(rtMax+float(rtCorr[j])) )
                if np.sum(idx)>0:
                    x1 = datum[:,0][idx]
                    y1 = datum[:,1][idx]
                    # y1 = y1 - np.min(y1)
    #                 y1 = y1[:] / fileInfo['normalization_factor'][j]
                    dataMat[i,j] = np.sum(y1)
                    if dataMat[i,j] > 0:
                        rtMat[i,j] = np.sum(np.multiply(x1,y1)) / np.sum(y1)
    return rowLabels,rowGroups,colLabels,colGroups,dataMat,rtMat

def calcGroupVals(colGroups,rowLabels,dataMat):
    # From Datamat Build the mat of means and std for the groups
    uGroups = np.unique(colGroups)
    meanMat = np.zeros((len(rowLabels),len(uGroups)))
    stdevMat = np.zeros((len(rowLabels),len(uGroups)))
    cvMat = np.zeros((len(rowLabels),len(uGroups)))
    stderrMat = np.zeros((len(rowLabels),len(uGroups)))
    numinMat = np.zeros((len(rowLabels),len(uGroups)))

    for i,met in enumerate(rowLabels):
        for j,gro in enumerate(uGroups):
            idx = [ii for ii, jj in enumerate(colGroups) if jj == gro]
            meanMat[i,j] = np.mean(dataMat[i,idx])
            stdevMat[i,j] = np.std(dataMat[i,idx])
            stderrMat[i,j] = np.std(dataMat[i,idx]) / len(idx)**0.5
            numinMat[i,j] = len(idx)
            if meanMat[i,j] > 0:
                cvMat[i,j] = stdevMat[i,j] / meanMat[i,j]
    return uGroups,meanMat,stdevMat,cvMat,stderrMat,numinMat

def exportCompoundAreas(myLabelString,uGroups,rowLabels,colLabels,dataMat,meanMat,rtMat,stdevMat,cvMat,stderrMat,numinMat,dictData):
    output_filename = 'data/%s/peakHeight_Table_%s.tab' % (myLabelString,myLabelString) #re.sub('fileInfo','peakArea_Table_',re.sub('txt','tab',filename))
    export_filenames = []
    myList = ['name','pubchem_id','formula','neutral_mass','mz','mz_threshold','adducts','rt_max','rt_min','rt_peak']

    with codecs.open(output_filename, 'w', encoding='utf-8') as fid:  
        for listItem in myList:
            fid.write('%s\t' % listItem)

        for j,gro in enumerate(uGroups):
            fid.write('Mean %s\t' % gro)

        for j,gro in enumerate(uGroups):
            fid.write('STDEV %s\t' % gro)

        for j,gro in enumerate(uGroups):
            fid.write('STDERR %s\t' % gro)

        for j,gro in enumerate(uGroups):
            fid.write('CV %s\t' % gro)
            
        for j,gro in enumerate(uGroups):
            fid.write('Num in %s\t' % gro)

        for filename in colLabels:
            fid.write('%s Peak Area\t' % filename)

        for filename in colLabels:
            fid.write('%s Retention Time in\t' % filename)

        fid.write('\n')

        for i,met in enumerate(rowLabels):
            compound = dictData[u'compounds'][i]
            for listItem in myList:
                fid.write('%s\t' % compound[listItem])

            for j,gro in enumerate(uGroups):
                fid.write('%5.2f\t' % meanMat[i,j])
            for j,gro in enumerate(uGroups):
                fid.write('%5.2f\t' % stdevMat[i,j])
            for j,gro in enumerate(uGroups):
                fid.write('%5.2f\t' % stderrMat[i,j])
            for j,gro in enumerate(uGroups):
                fid.write('%5.2f\t' % cvMat[i,j])
            for j,gro in enumerate(uGroups):
                fid.write('%5.2f\t' % numinMat[i,j])
                
            for j,f in enumerate(colLabels):
                fid.write('%5.2f\t' % dataMat[i,j])
            for j,f in enumerate(colLabels):
                fid.write('%5.2f\t' % rtMat[i,j])
            fid.write('\n')

def plotTICs(colLabels,export_fileIds,ticData,myLabelString):
    # Plot All the Tics
    for i,f in enumerate(colLabels):
        fig = plt.figure(1, figsize=(18, 8))
        ax = fig.add_subplot(111)
        x = ticData[ticData[:,2]==export_fileIds[i],0]
        y = ticData[ticData[:,2]==export_fileIds[i],1]
        idx = np.argsort(x)
        ax.plot(x[idx],y[idx])
        ax.set_xlabel('Time (min)')
        ax.set_ylabel('Magnitude (TIC)')
        fname = 'data/%s/TICs/TIC_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '_', f),'.pdf')
        fig.savefig(fname)
        fig.clear()

def plotChromatograms(rowLabels,myLabelString,export_fileIds,fileInfo,dictData,numCols,data,equalaxis):
    # for each compound, make a chromatogram for each file
    # each filename is going to be a compound name
    plt.rcParams['pdf.fonttype']=42
    # plt.rcParams['pdf.useafm'] = True
    plt.rcParams['pdf.use14corefonts'] = True
    plt.rcParams['text.usetex'] = False
    plt.rcParams.update({'font.size': 12})
    plt.rcParams.update({'font.weight': 'bold'})
    plt.rcParams['axes.linewidth'] = 2 # set the value globally

    export_filenames = []
    for i,cname in enumerate(rowLabels):
        export_filenames.append('data/%s/Chromatograms/Chromatograms_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '_', cname),'.pdf'))

    subplot_titles = []
    for i,myFile in enumerate(export_fileIds):
        for j,fid in enumerate(fileInfo['fid']):
            if fid == myFile:
                subplot_titles.append(fileInfo['name'][j].replace('.mzML',''))

    # %config InlineBackend.figure_format = 'png'
    rtCorr = []
    for f in export_fileIds:
        for i,f2 in enumerate(fileInfo['fid']):
            if f2 == f:
                rtCorr.append(fileInfo['retention_correction'][i])
    
    nRows = int(np.ceil(len(export_fileIds)/numCols))

    for i,compound in enumerate(dictData[u'compounds']):
        fig, ax = plt.subplots(nRows, int(numCols),figsize=(8*numCols,nRows * 6))
        min_x_val = 1000000
        max_x_val = 0
        max_y_val = 0
        myname = dictData[u'compounds'][i]['name']
        for j,a in enumerate(ax.flat):
            a.plot(float(compound[u'rt_peak'])+rtCorr[j],1e12,'.')
            a.axvline(float(compound[u'rt_min'])+rtCorr[j],linewidth=2, color='k') #original rtMin
            a.axvline(float(compound[u'rt_max'])+rtCorr[j],linewidth=2, color='k') #original rtMax
            a.axvline(float(compound[u'rt_peak'])+rtCorr[j],linewidth=2, color='r') #original rtPeak
            a.set_xlabel('Time (min)',weight='bold')
            a.set_ylabel('Intensity (au)',weight='bold')
            a.set_title(subplot_titles[j],fontsize=12,weight='bold')
            if j<len(export_fileIds):
                if len(data[i])>3:
                    x1 = data[i][:,0][(data[i][:,2]==export_fileIds[j])]
                    y1 = data[i][:,1][(data[i][:,2]==export_fileIds[j])]
                    if x1.size>0:
    #                     if myname.startswith('IST'):
    #                         y1 = y1[:]
    #                     else:
    #                         y1 = y1[:] / fileInfo['normalization_factor'][j]
                        idx = np.argsort(x1)
                        x1 = x1[idx]
                        y1 = y1[idx]
                        y1 = y1 - np.min(y1)
                        a.plot(x1,y1,'k-',linewidth=2.0,alpha=1.0)
                        
                        myWhere = np.logical_and(x1>=(float(compound[u'rt_min'])+float(rtCorr[j])), x1<=(float(compound[u'rt_max'])+float(rtCorr[j])) )

                        a.fill_between(x1,0,y1,myWhere, facecolor='c', alpha=0.3) #new rtBounds
                        

                        
                        if np.min(data[i][:,0])<min_x_val:
                            min_x_val = np.min(data[i][:,0])
                        if np.max(data[i][:,0])>max_x_val:
                            max_x_val = np.max(data[i][:,0])
                        if np.max(y1)>max_y_val:
                            max_y_val = np.max(y1)
        for j,a in enumerate(ax.flat):
            a.set_xlim([min_x_val,max_x_val])
        if equalaxis == 1:
            for j,a in enumerate(ax.flat):
                a.set_ylim([0,max_y_val])
        fig.tight_layout()        
        fig.savefig(export_filenames[i])
        fig.clear()
        plt.close('all')

def plotBoxPlots(myLabelString,rowLabels,colGroups,dataMat,scale,fontsize):
    plt.rcParams['pdf.fonttype']=42
    # plt.rcParams['pdf.useafm'] = True
    plt.rcParams['pdf.use14corefonts'] = True
    plt.rcParams['text.usetex'] = False
    plt.rcParams.update({'font.size': 18})
    plt.rcParams.update({'font.weight': 'bold'})
    plt.rcParams['axes.linewidth'] = 2 # set the value globally
    for i,cname in enumerate(rowLabels):
        myVals = []
        for j,mygroup in enumerate(colGroups):
            myVals.append((mygroup, dataMat[i,j]+1))
        myVals = sorted(myVals, key=lambda x: x[0]) 
        data_to_plot = []
        groupName = []
        for key, group in groupby(myVals, lambda x: x[0]):
            L = list(zip(*group)[1])
            data_to_plot.append(L)
            groupName.append(key)

        fig = plt.figure(1, figsize=(18, 18))
        ax = fig.add_subplot(111,yscale=scale)
        bp = ax.boxplot(data_to_plot)
        ax.set_xticklabels(groupName,rotation=40, ha='right',fontsize=fontsize)
        ax.set_title(cname)
        ax.grid('on',alpha=0.5)
        ax.set_ylabel('Peak Area')
        plt.tight_layout()
        fig.savefig('data/%s/Boxplots/Boxplot_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '', cname),'.pdf'))
        fig.clear()
    # plt.rcParams['ps.fonttype']=42
    # plt.rcParams['ps.useafm']= True


def plotChromatograms(rowLabels,myLabelString,export_fileIds,fileInfo,dictData,numCols,data,equalaxis):
    # for each compound, make a chromatogram for each file
    # each filename is going to be a compound name
    plt.rcParams['pdf.fonttype']=42
    # plt.rcParams['pdf.useafm'] = True
    plt.rcParams['pdf.use14corefonts'] = True
    plt.rcParams['text.usetex'] = False
    plt.rcParams.update({'font.size': 12})
    plt.rcParams.update({'font.weight': 'bold'})
    plt.rcParams['axes.linewidth'] = 2 # set the value globally

    export_filenames = []
    for i,cname in enumerate(rowLabels):
        export_filenames.append('data/%s/Chromatograms/Chromatograms_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '_', cname),'.pdf'))

    subplot_titles = []
    for i,myFile in enumerate(export_fileIds):
        for j,fid in enumerate(fileInfo['fid']):
            if fid == myFile:
                subplot_titles.append(fileInfo['name'][j].replace('.mzML',''))

    # %config InlineBackend.figure_format = 'png' 
    
    nRows = int(np.ceil(len(export_fileIds)/numCols))

    for i,compound in enumerate(dictData[u'compounds']):
        fig, ax = plt.subplots(nRows, int(numCols),figsize=(8*numCols,nRows * 6))
        min_x_val = 1000000
        max_x_val = 0
        max_y_val = 0
        myname = dictData[u'compounds'][i]['name']
        for j,a in enumerate(ax.flat):
            if j<len(export_fileIds):
                a.set_xlabel('Time (min)',weight='bold')
                a.set_ylabel('Intensity (au)',weight='bold')
                a.set_title(subplot_titles[j],fontsize=12,weight='bold')
                a.axvline(float(compound[u'rt_min']),linewidth=2, color='k') #original rtMin
                a.axvline(float(compound[u'rt_max']),linewidth=2, color='k') #original rtMax
                a.axvline(float(compound[u'rt_peak']),linewidth=2, color='r') #original rtPeak
                if len(data[i])>3:
                    x1 = data[i][:,0][(data[i][:,2]==export_fileIds[j])]
                    y1 = data[i][:,1][(data[i][:,2]==export_fileIds[j])]
                    if x1.size>0:
    #                     if myname.startswith('IST'):
    #                         y1 = y1[:]
    #                     else:
    #                         y1 = y1[:] / fileInfo['normalization_factor'][j]
                        idx = np.argsort(x1)
                        x1 = x1[idx]
                        y1 = y1[idx]
                        y1 = y1 - np.min(y1)
                        a.plot(x1,y1,'k-',linewidth=2.0,alpha=1.0)

                        myWhere = np.logical_and((x1>=float(compound[u'rt_min'])), (x1<=float(compound[u'rt_max'])) )

                        a.fill_between(x1,0,y1,myWhere, facecolor='c', alpha=0.3) #new rtBounds
                        
                        if np.min(data[i][:,0])<min_x_val:
                            min_x_val = np.min(data[i][:,0])
                        if np.max(data[i][:,0])>max_x_val:
                            max_x_val = np.max(data[i][:,0])
                        if np.max(y1)>max_y_val:
                            max_y_val = np.max(y1)
        for j,a in enumerate(ax.flat):
            a.set_xlim([min_x_val,max_x_val])
        if equalaxis == 1:
            for j,a in enumerate(ax.flat):
                a.set_ylim([0,max_y_val])
        fig.tight_layout() 
        print export_filenames[i] 
        fig.savefig(export_filenames[i])
        fig.clear()
        plt.close('all')

def plotBoxPlots(myLabelString,rowLabels,colGroups,dataMat,scale,fontsize):
    plt.rcParams['pdf.fonttype']=42
    # plt.rcParams['pdf.useafm'] = True
    plt.rcParams['pdf.use14corefonts'] = True
    plt.rcParams['text.usetex'] = False
    plt.rcParams.update({'font.size': 18})
    plt.rcParams.update({'font.weight': 'bold'})
    plt.rcParams['axes.linewidth'] = 2 # set the value globally
    for i,cname in enumerate(rowLabels):
        myVals = []
        for j,mygroup in enumerate(colGroups):
            myVals.append((mygroup, dataMat[i,j]+1))
        myVals = sorted(myVals, key=lambda x: x[0]) 

        data_to_plot = []
        groupName = []
        for key, group in groupby(myVals, lambda x: x[0]):
            L = list(zip(*group)[1])
            data_to_plot.append(L)
            groupName.append(key)

        fig = plt.figure(1, figsize=(18, 18))
        ax = fig.add_subplot(111,yscale=scale)
        bp = ax.boxplot(data_to_plot)
        ax.set_xticklabels(groupName,rotation=40, ha='right',fontsize=fontsize)
        ax.set_title(cname)
        ax.grid('on',alpha=0.5)
        ax.set_ylabel('Peak Area')
        plt.tight_layout()
        fig.savefig('data/%s/Boxplots/Boxplot_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '', cname),'.pdf'))
        fig.clear()
    # plt.rcParams['ps.fonttype']=42
    # plt.rcParams['ps.useafm']= True


def saveData(myFolder,ticData,dictId,myExperimentID,dictData,data,fileInfo,export_fileIds):
    metatlas_data = {'myLabelString':myFolder,'ticData':ticData,'dictId':dictId,'myExperimentID':myExperimentID,'dictData':dictData, 'data':data,'fileInfo':fileInfo,'export_fileIds':export_fileIds}
    pickle.dump( metatlas_data, open( 'data/%s/dataset_%s.pkl' % (myFolder,myFolder), "wb" ) )