In [1]:
# %load http://pmb-bordeaux.fr/scripts/PyODAM.py
import requests
import pandas as pd
def getDataFromODAM(dataset, subset='', query=''):
# See http://pmb-bordeaux.fr/odamsw/
headers = {'authorization': "Basic API Key Ommitted", 'accept': "text/csv"}
urlcomp = 'http://pmb-bordeaux.fr/getdata/tsv/'+dataset
if subset:
urlcomp = urlcomp+'/('+subset+')'
if query:
urlcomp = urlcomp+'/'+query
## API Call to retrieve report
rcomp = requests.get(urlcomp, headers=headers)
## API Results
data = rcomp.text
## Parse data into a DataFrame
## see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
labels = data.split('\n')[0].split('\t')
df = pd.DataFrame([x.split('\t') for x in data.split('\n')], columns=labels)
df.drop(df.index[0], inplace=True)
df = df.mask(df.eq('None')).dropna().reset_index()
## Convert all variables (columns) to numeric when possible
## see http://queirozf.com/entries/pandas-dataframe-examples-column-operations
for l in labels:
try:
df[l] = pd.to_numeric(df[l])
except:
pass
# Return Data.frame
return df
def intersection(lst1, lst2):
lst3 = [value for value in lst1 if value in lst2]
return lst3
def getVarNum(dataframe):
varnum=[]
for l in dataframe.columns:
try:
dataframe[l] = pd.to_numeric(dataframe[l])
varnum.append(l)
except:
pass
return varnum
def getSubsetFromODAM(dataset, subset='', query=''):
df1 = getDataFromODAM(dataset, subset, query)
df2 = getDataFromODAM(dataset, subset, 'identifier')
df3 = getDataFromODAM(dataset, subset, 'factor')
df4 = getDataFromODAM(dataset, subset, 'quantitative')
df5 = getDataFromODAM(dataset, subset, 'qualitative')
# Keep only columns that have been converted to numeric (thus removing columns with NA)
S = subset.split(',')
numvars = []
for s in S:
numvars = numvars + intersection(df4[df4.Subset==s]['Attribute'], getVarNum(df1))
list1, list2 = ['data', 'identifier', 'factor', 'quantitative', 'qualitative', 'numvars' ], \
[df1, df2, df3, df4, df5, numvars ]
d = dict( zip( list1, list2 ))
return(d)
def convertDateToStr(DataNum):
dateStr = [ ( pd.to_datetime('1899-12-30') + pd.to_timedelta(x,'D') ).strftime("%m/%d/%Y") for x in DataNum ]
return(dateStr)
def convertTimeToStr(TimeNum):
timeStr = [ ( pd.to_datetime('1899-12-30') + pd.to_timedelta(x,'D') ).strftime("%H:%M") for x in TimeNum ]
return(timeStr)
In [2]:
# Get the subset list of a dataset
dataset = 'frim1'
meta = getDataFromODAM(dataset)
meta[['index', 'LinkID','Subset','Identifier', 'Description']]
Out[2]:
In [3]:
# Get all values of a merged data subsets ( both activome & qNMR_metabofor) the specific 'sample' entry equal to 365
subset = 'activome,qNMR_metabo'
df = getSubsetFromODAM(dataset, subset,'sample/365?limit=10')
data = df['data']
# View all merged subset columns
data.columns
Out[3]:
In [4]:
# Convert both data and time in MS Excel format into String
data.HarvestDate = convertDateToStr(data.HarvestDate)
data.HarvestHour = convertTimeToStr(data.HarvestHour)
data
Out[4]:
In [5]:
# Display the variable list within the 'factor' category of a merged data subset
df['factor']
Out[5]:
In [6]:
# Display the variable list within the 'identifier' category of a merged data subset
df['identifier']
Out[6]:
In [7]:
# Display the variable list within the 'quantitative' category of a merged data subset
quantitative = df['quantitative']
quantitative.loc[ 0:10, ]
Out[7]:
In [8]:
# Select the variables from the merged data belongings to the 'activome' data subset
data[quantitative[quantitative.Subset=='activome']['Attribute']]
Out[8]:
In [9]:
# Select the variables from the merged data belongings to the 'qNMR_metabo' data subset
data[quantitative[quantitative.Subset=='qNMR_metabo']['Attribute']]
Out[9]:
In [10]:
# Convert a sub-data set to numpy format
numpy_matrix = data[quantitative[quantitative.Subset=='activome']['Attribute']].to_numpy()
numpy_matrix
Out[10]:
In [ ]: