In [ ]:
#Parse the fileInfo sheet and return the fileIDs and sample grouping values
# export_fileIds,myArray,fileInfo,filename = metatlas_func.get_FileInfo(myFileInfo)
#Add the group names to each file at NERSC
# metatlas_func.addGroupInfoToFiles(client,fileInfo)
#get the specification for each compound
# dictData = metatlas_func.getAtlasData(client,dictId)
#get the EIC, for each compound (in reality, it should get peak summary, MSMS, EIC, and Spectrum)
# data = metatlas_func.getData(export_fileIds,dictData,myArray,client,polarity,extraTime)
#get the total intensity chromatogram for each file
# ticData = metatlas_func.getAllTICS(export_fileIds,polarity,myArray,client)
#pickle everything in case NERSC shuts down
# metatlas_func.saveData(myFolder,ticData,dictId,myExperimentID,dictData,data,fileInfo,export_fileIds)
In [ ]:
import json
import getpass
import re
import csv
import os
import numpy as np
import pickle
from matplotlib import pyplot as plt
from itertools import groupby
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
import codecs
In [ ]:
import requests
client = requests.Session()
client = metatlas_func.authenticateUser(client,'bpb')
if len(client.cookies)>1:
print "Successfully authenticated"
else:
print "Login failed. Either try again or contact NERSC to reset your password. Use http://nim.nersc.gov to manage user account settings."
In [ ]:
def authenticateUser(client,username):
password = getpass.getpass()
authURL = 'https://metatlas.nersc.gov/client/login/'
# Retrieve the CSRF token first
client.get(authURL) # sets cookie
csrftoken = client.cookies['csrftoken']
login_data = dict(username=username, password=password, csrfmiddlewaretoken=csrftoken, next='/')
r = client.post(authURL, data=login_data, headers=dict(Referer=authURL))
return client
def shareExperiment(client,username,myExperimentID):
# # Share the experiment and dictionary with another user.
payload = {"user":username,"perms":["read","write"]}
sendData=json.dumps(payload)
url = 'https://metatlas.nersc.gov/api/experiment/%s/share/' % myExperimentID
r = client.post(url, data=sendData)
# # print r.content
def shareAtlas(client,username,dictId):
payload = {"user":username,"perms":["read","write"]}
sendData=json.dumps(payload)
url = 'https://metatlas.nersc.gov/api/dict/%s/share/' % dictId
r = client.post(url, data=sendData)
def makeFileInfoSheet(client,myExperimentID,finfo_filename):
url = 'https://metatlas.nersc.gov/api/experiment/%s' % myExperimentID
r = client.get(url)
files = json.loads(r.content)
fileInfo = {'fid':[],'name':[],'status':[]};
fid = open(finfo_filename,'wb')
fid.write('index\tstatus\tname\tfid\tpolarity\tgroup\tinclusion_order\tnormalization_factor\tretention_correction\n')
for i,myRun in enumerate(files[u'runs']):
splitPathToFile = os.path.split(myRun[u'in_file'])
fid.write('%d\t%d\t%s\t%d\tpos\tgroup1\n' % (i,myRun[u'pending'],splitPathToFile[1],myRun[u'_id'][u'file_id']))
if myRun[u'pending'] == 0:
fileInfo['fid'].append(myRun[u'_id'][u'file_id'])
fileInfo['name'].append(splitPathToFile[1])
fileInfo['status'].append(myRun[u'pending']) #only keep if status is 0
pathYouWant = splitPathToFile[0] # TODO: we will have to see what this will do on a window's computer. taking a linux path and using os.
fid.close()
def exportAtlas(client,atlasID,filename):
url = 'https://metatlas.nersc.gov/api/dict/%s/' % atlasID
r = client.get(url)
dictData = json.loads(r.text)
# export an atlas
myList = ['name','pubchem_id','formula','neutral_mass','mz','mz_threshold','adducts','rt_max','rt_min','rt_peak']
import csv
fid = open(filename,'wb')
for listItem in myList:
fid.write('%s\t' % listItem)
fid.write('\n')
for i,compound in enumerate(dictData[u'compounds']):
for listItem in myList:
if listItem == 'name':
fid.write('%s\t' % compound[listItem].encode('utf-8'))
else:
fid.write('%s\t' % compound[listItem])
fid.write('\n')
fid.close()
# import the fileInfo sheet with annotated group information, polarity, and plot order
def get_FileInfo(myFileInfo):
myArray = 'lcms_test_1' #files[u'runs'][0][u'_id'][u'array_name']
filename = '%s' % (myFileInfo)
with open(filename,'rU') as file_object:
newfileInfo = list(csv.DictReader(file_object, dialect='excel-tab'))
keys = newfileInfo[0].iterkeys()
fileInfo = {key: [d[key] for d in newfileInfo] for key in keys}
fileInfo['fid'] = map(int, fileInfo['fid'])
fileInfo['index'] = map(int, fileInfo['index'])
fileInfo['inclusion_order'] = map(int, fileInfo['inclusion_order'])
fileInfo['status'] = map(int, fileInfo['status'])
fileInfo['normalization_factor'] = map(float, fileInfo['normalization_factor'])
fileInfo['retention_correction'] = map(float, fileInfo['retention_correction'])
idx = np.argsort(fileInfo['inclusion_order'])
export_fileIds = np.asarray(fileInfo['fid'])[idx]
return export_fileIds,myArray,fileInfo,filename
def addGroupInfoToFiles(client,fileInfo):
for i,f in enumerate(fileInfo['fid']):
url = 'https://metatlas.nersc.gov/api/metadata/lcms_test_1/%d/' % f
r = client.patch(url, data=json.dumps({"sample_type": fileInfo['group'][i]}))
def createNewAtlas(client,atlasName,sampleDescription,methodDescription):
# {"name":"","sample":"","method":""}
payload = {"name":atlasName,"sample":sampleDescription,"method":methodDescription}
sendData=json.dumps(payload)
client.headers.update({'referer': 'https://metatlas.nersc.gov/dict/create'})
url = 'https://metatlas.nersc.gov/api/dict/'
r = client.post(url, data=sendData)
retData = json.loads(r.text)
return retData['id']
# curl 'https://metatlas.nersc.gov/api/dict/' -H 'Cookie: __utma=25799074.440104800.1398372604.1436306658.1436966664.89; __utmc=25799074; __utmz=25799074.1436966664.89.62.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); _ga=GA1.2.440104800.1398372604; __utmt=1; csrftoken=4WbUyL4Sg6GH5H11w7ggPHnp1Poh3VGq; sessionid=i8s8b49woppom27dy8u0c9fhwyom6n15; __utma=250901914.440104800.1398372604.1437097534.1437176628.217; __utmb=250901914.4.9.1437176632539; __utmc=250901914; __utmz=250901914.1434753096.197.3.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)' -H 'Origin: https://metatlas.nersc.gov' -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: en-US,en;q=0.8' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'Accept: */*' -H 'Referer: https://metatlas.nersc.gov/dict/create/' -H 'X-Requested-With: XMLHttpRequest' -H 'Connection: keep-alive' --data '{"name":"","sample":"","method":""}' --compressed
def addFromSpreadsheetToAtlas(client,filename,dictId):
#add compounds to an atlas from a well formated spreadsheet that are new and
#update any that were already there, but have changed
with open(filename,'rU') as file_object:
sheetData = list(csv.DictReader(file_object, dialect='excel-tab'))
url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
r = client.get(url)
dictData = json.loads(r.text)
for compound in sheetData:
# print compound
compound['name'] = re.sub("\xca",'',compound['name'])
if compound['name'][0] == ' ':
compound['name'] = compound['name'][1:]
cID = filter( lambda x: x[u'name']==compound['name'], dictData[u'compounds'])
# print cID
if not cID:
# a new entry is created if that compound name doesn't exist
url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
# url = 'https://metatlas.nersc.gov/api/compound/%s/' % cID[0][u'_id']
r = client.post(url, data=json.dumps([compound]))
print(r.text)
else:
# edit the entry if it already exists
url = 'https://metatlas.nersc.gov/api/compound/%s/' % cID[0][u'_id']
# url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
r = client.patch(url, data=json.dumps(compound))
print(r.text)
def getAtlasData(client,dictId):
url = 'https://metatlas.nersc.gov/api/dict/%s/' % dictId
r = client.get(url)
dictData = json.loads(r.text)
return dictData
def getExperimentData(client,myExperimentID):
url = 'https://metatlas.nersc.gov/api/experiment/%s' % myExperimentID
r = client.get(url)
return json.loads(r.content)
def getEICForCompounds(compound,myArray,files_I_want,rtTol,client,polarity):
if isinstance(files_I_want,int):
myList = str(files_I_want)
else:
myList = ','.join(map(str, files_I_want))
mz = float(compound[u'mz'])
mzTol = float(compound[u'mz_threshold'])
mzMin = mz - mz*mzTol/1.0e6
mzMax = mz + mz*mzTol/1.0e6
rtMin = float(compound[u'rt_min'])-rtTol
rtMax = float(compound[u'rt_max'])+rtTol
rtPeak = float(compound[u'rt_peak'])
payload = {'L':1,'P':polarity,'arrayname':myArray,'fileidlist':myList,
'max_mz':mzMax,'min_mz':mzMin,
'min_rt':rtMin,'max_rt':rtMax,
'nsteps':20000,'queryType':'XICofFile_mf'}
url = 'https://metatlas.nersc.gov/api/run'
r = client.get(url,params=payload)
if r.content:
data = np.asarray(json.loads(r.content))
return data
else:
return []
def getData(export_fileIds,dictData,myArray,client,polarity,extraTime):
import time
data = []
for i,compound in enumerate(dictData[u'compounds']):
print i, compound['name']
data.append(getEICForCompounds(compound,myArray,export_fileIds,extraTime,client,polarity))
time.sleep(4)
return data
def getAllTICS(export_fileIds,polarity,myArray,client):
# prototype function to get TIC for all runs
# get TIC for all runs
if isinstance(export_fileIds,int):
myList = str(export_fileIds)
else:
myList = ','.join(map(str, export_fileIds))
payload = {'L':1,'P':polarity,'arrayname':myArray,'fileidlist':myList,
'max_mz':2000,'min_mz':100,
'min_rt':1,'max_rt':300,
'nsteps':20000,'queryType':'XICofFile_mf'}
url = 'https://metatlas.nersc.gov/api/run'
r = client.get(url,params=payload)
ticData = np.asarray(json.loads(r.content))
return ticData
def clusterRTCentroids(rt,cutoff):
rt = np.asarray(rt)
dists = np.abs(rt - rt[:, None])
Y = linkage(dists, method='single', metric='euclidean')
C = fcluster(Y,cutoff)
return C
def makeDataMat(export_fileIds,data,fileInfo,dictData):
# Build the datamat.
# Each row is a metabolite.
# Each column is a run that is indexed by its filename and group.
dataMat = np.zeros((len(data),len(export_fileIds)))
rtMat = np.zeros((len(data),len(export_fileIds)))
rowLabels = []
colLabels = []
rowGroups = []
colGroups = []
rtCorr = []
for f in export_fileIds:
for i,f2 in enumerate(fileInfo['fid']):
if f2 == f:
rtCorr.append(fileInfo['retention_correction'][i])
# do it once for all files (used below in clustergram)
for i,myFile in enumerate(export_fileIds):
for j,fid in enumerate(fileInfo['fid']):
if fid == myFile:
colLabels.append(fileInfo['name'][j])
colGroups.append(fileInfo['group'][j])
for i,datum in enumerate(data):
rowLabels.append(dictData[u'compounds'][i]['name'])
mz = float(dictData[u'compounds'][i][u'mz'])
mzTol = float(dictData[u'compounds'][i][u'mz_threshold'])
mzMin = mz - mz*mzTol/1.0e6
mzMax = mz + mz*mzTol/1.0e6
rtMin = float(dictData[u'compounds'][i][u'rt_min'])
rtMax = float(dictData[u'compounds'][i][u'rt_max'])
rowGroups.append('Metabolite')
for j,myFile in enumerate(export_fileIds):
if datum.size>3:
idx = np.logical_and( datum[:,2]==myFile, datum[:,0]>=(rtMin+float(rtCorr[j])), datum[:,0]<=(rtMax+float(rtCorr[j])) )
if np.sum(idx)>0:
x1 = datum[:,0][idx]
y1 = datum[:,1][idx]
# y1 = y1 - np.min(y1)
# y1 = y1[:] / fileInfo['normalization_factor'][j]
dataMat[i,j] = np.sum(y1)
if dataMat[i,j] > 0:
rtMat[i,j] = np.sum(np.multiply(x1,y1)) / np.sum(y1)
return rowLabels,rowGroups,colLabels,colGroups,dataMat,rtMat
def calcGroupVals(colGroups,rowLabels,dataMat):
# From Datamat Build the mat of means and std for the groups
uGroups = np.unique(colGroups)
meanMat = np.zeros((len(rowLabels),len(uGroups)))
stdevMat = np.zeros((len(rowLabels),len(uGroups)))
cvMat = np.zeros((len(rowLabels),len(uGroups)))
stderrMat = np.zeros((len(rowLabels),len(uGroups)))
numinMat = np.zeros((len(rowLabels),len(uGroups)))
for i,met in enumerate(rowLabels):
for j,gro in enumerate(uGroups):
idx = [ii for ii, jj in enumerate(colGroups) if jj == gro]
meanMat[i,j] = np.mean(dataMat[i,idx])
stdevMat[i,j] = np.std(dataMat[i,idx])
stderrMat[i,j] = np.std(dataMat[i,idx]) / len(idx)**0.5
numinMat[i,j] = len(idx)
if meanMat[i,j] > 0:
cvMat[i,j] = stdevMat[i,j] / meanMat[i,j]
return uGroups,meanMat,stdevMat,cvMat,stderrMat,numinMat
def exportCompoundAreas(myLabelString,uGroups,rowLabels,colLabels,dataMat,meanMat,rtMat,stdevMat,cvMat,stderrMat,numinMat,dictData):
output_filename = 'data/%s/peakHeight_Table_%s.tab' % (myLabelString,myLabelString) #re.sub('fileInfo','peakArea_Table_',re.sub('txt','tab',filename))
export_filenames = []
myList = ['name','pubchem_id','formula','neutral_mass','mz','mz_threshold','adducts','rt_max','rt_min','rt_peak']
with codecs.open(output_filename, 'w', encoding='utf-8') as fid:
for listItem in myList:
fid.write('%s\t' % listItem)
for j,gro in enumerate(uGroups):
fid.write('Mean %s\t' % gro)
for j,gro in enumerate(uGroups):
fid.write('STDEV %s\t' % gro)
for j,gro in enumerate(uGroups):
fid.write('STDERR %s\t' % gro)
for j,gro in enumerate(uGroups):
fid.write('CV %s\t' % gro)
for j,gro in enumerate(uGroups):
fid.write('Num in %s\t' % gro)
for filename in colLabels:
fid.write('%s Peak Area\t' % filename)
for filename in colLabels:
fid.write('%s Retention Time in\t' % filename)
fid.write('\n')
for i,met in enumerate(rowLabels):
compound = dictData[u'compounds'][i]
for listItem in myList:
fid.write('%s\t' % compound[listItem])
for j,gro in enumerate(uGroups):
fid.write('%5.2f\t' % meanMat[i,j])
for j,gro in enumerate(uGroups):
fid.write('%5.2f\t' % stdevMat[i,j])
for j,gro in enumerate(uGroups):
fid.write('%5.2f\t' % stderrMat[i,j])
for j,gro in enumerate(uGroups):
fid.write('%5.2f\t' % cvMat[i,j])
for j,gro in enumerate(uGroups):
fid.write('%5.2f\t' % numinMat[i,j])
for j,f in enumerate(colLabels):
fid.write('%5.2f\t' % dataMat[i,j])
for j,f in enumerate(colLabels):
fid.write('%5.2f\t' % rtMat[i,j])
fid.write('\n')
def plotTICs(colLabels,export_fileIds,ticData,myLabelString):
# Plot All the Tics
for i,f in enumerate(colLabels):
fig = plt.figure(1, figsize=(18, 8))
ax = fig.add_subplot(111)
x = ticData[ticData[:,2]==export_fileIds[i],0]
y = ticData[ticData[:,2]==export_fileIds[i],1]
idx = np.argsort(x)
ax.plot(x[idx],y[idx])
ax.set_xlabel('Time (min)')
ax.set_ylabel('Magnitude (TIC)')
fname = 'data/%s/TICs/TIC_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '_', f),'.pdf')
fig.savefig(fname)
fig.clear()
def plotChromatograms(rowLabels,myLabelString,export_fileIds,fileInfo,dictData,numCols,data,equalaxis):
# for each compound, make a chromatogram for each file
# each filename is going to be a compound name
plt.rcParams['pdf.fonttype']=42
# plt.rcParams['pdf.useafm'] = True
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['text.usetex'] = False
plt.rcParams.update({'font.size': 12})
plt.rcParams.update({'font.weight': 'bold'})
plt.rcParams['axes.linewidth'] = 2 # set the value globally
export_filenames = []
for i,cname in enumerate(rowLabels):
export_filenames.append('data/%s/Chromatograms/Chromatograms_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '_', cname),'.pdf'))
subplot_titles = []
for i,myFile in enumerate(export_fileIds):
for j,fid in enumerate(fileInfo['fid']):
if fid == myFile:
subplot_titles.append(fileInfo['name'][j].replace('.mzML',''))
# %config InlineBackend.figure_format = 'png'
rtCorr = []
for f in export_fileIds:
for i,f2 in enumerate(fileInfo['fid']):
if f2 == f:
rtCorr.append(fileInfo['retention_correction'][i])
nRows = int(np.ceil(len(export_fileIds)/numCols))
for i,compound in enumerate(dictData[u'compounds']):
fig, ax = plt.subplots(nRows, int(numCols),figsize=(8*numCols,nRows * 6))
min_x_val = 1000000
max_x_val = 0
max_y_val = 0
myname = dictData[u'compounds'][i]['name']
for j,a in enumerate(ax.flat):
a.plot(float(compound[u'rt_peak'])+rtCorr[j],1e12,'.')
a.axvline(float(compound[u'rt_min'])+rtCorr[j],linewidth=2, color='k') #original rtMin
a.axvline(float(compound[u'rt_max'])+rtCorr[j],linewidth=2, color='k') #original rtMax
a.axvline(float(compound[u'rt_peak'])+rtCorr[j],linewidth=2, color='r') #original rtPeak
a.set_xlabel('Time (min)',weight='bold')
a.set_ylabel('Intensity (au)',weight='bold')
a.set_title(subplot_titles[j],fontsize=12,weight='bold')
if j<len(export_fileIds):
if len(data[i])>3:
x1 = data[i][:,0][(data[i][:,2]==export_fileIds[j])]
y1 = data[i][:,1][(data[i][:,2]==export_fileIds[j])]
if x1.size>0:
# if myname.startswith('IST'):
# y1 = y1[:]
# else:
# y1 = y1[:] / fileInfo['normalization_factor'][j]
idx = np.argsort(x1)
x1 = x1[idx]
y1 = y1[idx]
y1 = y1 - np.min(y1)
a.plot(x1,y1,'k-',linewidth=2.0,alpha=1.0)
myWhere = np.logical_and(x1>=(float(compound[u'rt_min'])+float(rtCorr[j])), x1<=(float(compound[u'rt_max'])+float(rtCorr[j])) )
a.fill_between(x1,0,y1,myWhere, facecolor='c', alpha=0.3) #new rtBounds
if np.min(data[i][:,0])<min_x_val:
min_x_val = np.min(data[i][:,0])
if np.max(data[i][:,0])>max_x_val:
max_x_val = np.max(data[i][:,0])
if np.max(y1)>max_y_val:
max_y_val = np.max(y1)
for j,a in enumerate(ax.flat):
a.set_xlim([min_x_val,max_x_val])
if equalaxis == 1:
for j,a in enumerate(ax.flat):
a.set_ylim([0,max_y_val])
fig.tight_layout()
fig.savefig(export_filenames[i])
fig.clear()
plt.close('all')
def plotBoxPlots(myLabelString,rowLabels,colGroups,dataMat,scale,fontsize):
plt.rcParams['pdf.fonttype']=42
# plt.rcParams['pdf.useafm'] = True
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['text.usetex'] = False
plt.rcParams.update({'font.size': 18})
plt.rcParams.update({'font.weight': 'bold'})
plt.rcParams['axes.linewidth'] = 2 # set the value globally
for i,cname in enumerate(rowLabels):
myVals = []
for j,mygroup in enumerate(colGroups):
myVals.append((mygroup, dataMat[i,j]+1))
myVals = sorted(myVals, key=lambda x: x[0])
data_to_plot = []
groupName = []
for key, group in groupby(myVals, lambda x: x[0]):
L = list(zip(*group)[1])
data_to_plot.append(L)
groupName.append(key)
fig = plt.figure(1, figsize=(18, 18))
ax = fig.add_subplot(111,yscale=scale)
bp = ax.boxplot(data_to_plot)
ax.set_xticklabels(groupName,rotation=40, ha='right',fontsize=fontsize)
ax.set_title(cname)
ax.grid('on',alpha=0.5)
ax.set_ylabel('Peak Area')
plt.tight_layout()
fig.savefig('data/%s/Boxplots/Boxplot_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '', cname),'.pdf'))
fig.clear()
# plt.rcParams['ps.fonttype']=42
# plt.rcParams['ps.useafm']= True
def plotChromatograms(rowLabels,myLabelString,export_fileIds,fileInfo,dictData,numCols,data,equalaxis):
# for each compound, make a chromatogram for each file
# each filename is going to be a compound name
plt.rcParams['pdf.fonttype']=42
# plt.rcParams['pdf.useafm'] = True
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['text.usetex'] = False
plt.rcParams.update({'font.size': 12})
plt.rcParams.update({'font.weight': 'bold'})
plt.rcParams['axes.linewidth'] = 2 # set the value globally
export_filenames = []
for i,cname in enumerate(rowLabels):
export_filenames.append('data/%s/Chromatograms/Chromatograms_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '_', cname),'.pdf'))
subplot_titles = []
for i,myFile in enumerate(export_fileIds):
for j,fid in enumerate(fileInfo['fid']):
if fid == myFile:
subplot_titles.append(fileInfo['name'][j].replace('.mzML',''))
# %config InlineBackend.figure_format = 'png'
nRows = int(np.ceil(len(export_fileIds)/numCols))
for i,compound in enumerate(dictData[u'compounds']):
fig, ax = plt.subplots(nRows, int(numCols),figsize=(8*numCols,nRows * 6))
min_x_val = 1000000
max_x_val = 0
max_y_val = 0
myname = dictData[u'compounds'][i]['name']
for j,a in enumerate(ax.flat):
if j<len(export_fileIds):
a.set_xlabel('Time (min)',weight='bold')
a.set_ylabel('Intensity (au)',weight='bold')
a.set_title(subplot_titles[j],fontsize=12,weight='bold')
a.axvline(float(compound[u'rt_min']),linewidth=2, color='k') #original rtMin
a.axvline(float(compound[u'rt_max']),linewidth=2, color='k') #original rtMax
a.axvline(float(compound[u'rt_peak']),linewidth=2, color='r') #original rtPeak
if len(data[i])>3:
x1 = data[i][:,0][(data[i][:,2]==export_fileIds[j])]
y1 = data[i][:,1][(data[i][:,2]==export_fileIds[j])]
if x1.size>0:
# if myname.startswith('IST'):
# y1 = y1[:]
# else:
# y1 = y1[:] / fileInfo['normalization_factor'][j]
idx = np.argsort(x1)
x1 = x1[idx]
y1 = y1[idx]
y1 = y1 - np.min(y1)
a.plot(x1,y1,'k-',linewidth=2.0,alpha=1.0)
myWhere = np.logical_and((x1>=float(compound[u'rt_min'])), (x1<=float(compound[u'rt_max'])) )
a.fill_between(x1,0,y1,myWhere, facecolor='c', alpha=0.3) #new rtBounds
if np.min(data[i][:,0])<min_x_val:
min_x_val = np.min(data[i][:,0])
if np.max(data[i][:,0])>max_x_val:
max_x_val = np.max(data[i][:,0])
if np.max(y1)>max_y_val:
max_y_val = np.max(y1)
for j,a in enumerate(ax.flat):
a.set_xlim([min_x_val,max_x_val])
if equalaxis == 1:
for j,a in enumerate(ax.flat):
a.set_ylim([0,max_y_val])
fig.tight_layout()
print export_filenames[i]
fig.savefig(export_filenames[i])
fig.clear()
plt.close('all')
def plotBoxPlots(myLabelString,rowLabels,colGroups,dataMat,scale,fontsize):
plt.rcParams['pdf.fonttype']=42
# plt.rcParams['pdf.useafm'] = True
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['text.usetex'] = False
plt.rcParams.update({'font.size': 18})
plt.rcParams.update({'font.weight': 'bold'})
plt.rcParams['axes.linewidth'] = 2 # set the value globally
for i,cname in enumerate(rowLabels):
myVals = []
for j,mygroup in enumerate(colGroups):
myVals.append((mygroup, dataMat[i,j]+1))
myVals = sorted(myVals, key=lambda x: x[0])
data_to_plot = []
groupName = []
for key, group in groupby(myVals, lambda x: x[0]):
L = list(zip(*group)[1])
data_to_plot.append(L)
groupName.append(key)
fig = plt.figure(1, figsize=(18, 18))
ax = fig.add_subplot(111,yscale=scale)
bp = ax.boxplot(data_to_plot)
ax.set_xticklabels(groupName,rotation=40, ha='right',fontsize=fontsize)
ax.set_title(cname)
ax.grid('on',alpha=0.5)
ax.set_ylabel('Peak Area')
plt.tight_layout()
fig.savefig('data/%s/Boxplots/Boxplot_%s %s%s' % (myLabelString,myLabelString,re.sub('[^A-Za-z0-9]+', '', cname),'.pdf'))
fig.clear()
# plt.rcParams['ps.fonttype']=42
# plt.rcParams['ps.useafm']= True
def saveData(myFolder,ticData,dictId,myExperimentID,dictData,data,fileInfo,export_fileIds):
metatlas_data = {'myLabelString':myFolder,'ticData':ticData,'dictId':dictId,'myExperimentID':myExperimentID,'dictData':dictData, 'data':data,'fileInfo':fileInfo,'export_fileIds':export_fileIds}
pickle.dump( metatlas_data, open( 'data/%s/dataset_%s.pkl' % (myFolder,myFolder), "wb" ) )