In [1]:
# %load http://pmb-bordeaux.fr/scripts/PyODAM.py
import requests
import pandas as pd

def getDataFromODAM(dataset, subset='', query=''):
    # See http://pmb-bordeaux.fr/odamsw/
    headers = {'authorization': "Basic API Key Ommitted", 'accept': "text/csv"}
    urlcomp = 'http://pmb-bordeaux.fr/getdata/tsv/'+dataset
    if subset:
        urlcomp = urlcomp+'/('+subset+')'
    if query:
        urlcomp = urlcomp+'/'+query

    ## API Call to retrieve report
    rcomp = requests.get(urlcomp, headers=headers)

    ## API Results
    data = rcomp.text

    ## Parse data into a DataFrame
    ## see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    labels = data.split('\n')[0].split('\t')
    df = pd.DataFrame([x.split('\t') for x in data.split('\n')], columns=labels)
    df.drop(df.index[0], inplace=True)
    df = df.mask(df.eq('None')).dropna().reset_index()

    ## Convert all variables (columns) to numeric when possible
    ## see http://queirozf.com/entries/pandas-dataframe-examples-column-operations
    for l in labels:
        try:
            df[l] = pd.to_numeric(df[l])
        except:
            pass

    # Return Data.frame
    return df

def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3 

def getVarNum(dataframe):
    varnum=[]
    for l in dataframe.columns:
        try:
            dataframe[l] = pd.to_numeric(dataframe[l])
            varnum.append(l)
        except:
            pass
    return varnum

def getSubsetFromODAM(dataset, subset='', query=''):
    df1 = getDataFromODAM(dataset, subset, query)
    df2 = getDataFromODAM(dataset, subset, 'identifier')
    df3 = getDataFromODAM(dataset, subset, 'factor')
    df4 = getDataFromODAM(dataset, subset, 'quantitative')
    df5 = getDataFromODAM(dataset, subset, 'qualitative')

    # Keep only columns that have been converted to numeric (thus removing columns with NA)
    S = subset.split(',')
    numvars = []
    for s in S:
        numvars = numvars + intersection(df4[df4.Subset==s]['Attribute'], getVarNum(df1))

    list1, list2 = ['data', 'identifier', 'factor', 'quantitative', 'qualitative', 'numvars' ], \
                   [df1, df2, df3, df4, df5, numvars ]
    d = dict( zip( list1, list2 ))
    return(d)

def convertDateToStr(DataNum):
    dateStr = [ ( pd.to_datetime('1899-12-30') + pd.to_timedelta(x,'D') ).strftime("%m/%d/%Y") for x in DataNum ]
    return(dateStr)

def convertTimeToStr(TimeNum):
    timeStr = [ ( pd.to_datetime('1899-12-30') + pd.to_timedelta(x,'D') ).strftime("%H:%M") for x in TimeNum ]
    return(timeStr)

In [2]:
# Get the subset list of a dataset
dataset = 'frim1'
meta = getDataFromODAM(dataset)
meta[['index', 'LinkID','Subset','Identifier', 'Description']]


Out[2]:
index LinkID Subset Identifier Description
0 1 0 plants PlantID Plant features
1 2 1 samples SampleID Sample features
2 3 2 aliquots AliquotID Aliquots features
3 4 3 cellwall_metabo AliquotID Cell wall Compound quantifications
4 5 3 cellwall_metaboFW AliquotID Cell Wall Compound quantifications (FW)
5 6 3 activome AliquotID Activome Features
6 7 2 pools PoolID Pools of remaining pools
7 8 7 qMS_metabo PoolID MS Compounds quantification
8 9 7 qNMR_metabo PoolID NMR Compounds quantification
9 10 3 plato_hexosesP AliquotID Hexoses Phosphate
10 11 3 lipids_AG AliquotID Lipids AG
11 12 3 AminoAcid AliquotID Amino Acids

In [3]:
# Get all values of a merged data subsets ( both activome & qNMR_metabofor) the specific 'sample' entry equal to 365
subset = 'activome,qNMR_metabo'
df = getSubsetFromODAM(dataset, subset,'sample/365?limit=10')
data = df['data']

# View all merged subset columns
data.columns


Out[3]:
Index(['index', 'PlantID', 'Rank', 'PlantNum', 'Treatment', 'SampleID',
       'Truss', 'DevStage', 'FruitAge', 'HarvestDate', 'HarvestHour',
       'FruitPosition', 'FruitDiameter', 'FruitHeight', 'FruitFW', 'DW',
       'AliquotID', 'PGM', 'F16BP_Cyt', 'PyrK', 'CitS', 'PPI', 'AcoS', 'PFK',
       'FruS', 'F16BP_Stroma', 'GluS', 'ISODH_NAD', 'EnoS', 'ISODH_NADP',
       'PEPC', 'FBPA', 'SucCoALig', 'MALDH', 'AlaS', 'FumS', 'AspS',
       'GLUDH_NADP', 'GAPDH_NAD', 'GAPDH_NADP', 'GLUDH_NAD', 'TPI', 'PhoS',
       'NI', 'AciS', 'G6PDH', 'UGPS', 'SucS', 'MAL_NAD', 'ShiS', 'MAL_NADP',
       'PGI_tot', 'SolStarchS', 'AGPS', 'SucPhosphateS', 'PoolID', 'glucose',
       'saccharose', 'fructose', 'galactose', 'mannose', 'rhamnose', 'acetate',
       'chlorogenate', 'citrate', 'fumarate', 'galacturonate', 'malate',
       'quinate', 'alanine', 'asparagine', 'aspartate', 'GABA', 'glutamine',
       'glutamate', 'isoleucine', 'phenylalanine', 'tryptophane', 'tyrosine',
       'valine', 'pyroglutamate', 'trigonelline', 'choline', 'inositol'],
      dtype='object')

In [4]:
# Convert both data and time in MS Excel format into String
data.HarvestDate = convertDateToStr(data.HarvestDate)
data.HarvestHour = convertTimeToStr(data.HarvestHour)
data


Out[4]:
index PlantID Rank PlantNum Treatment SampleID Truss DevStage FruitAge HarvestDate ... glutamate isoleucine phenylalanine tryptophane tyrosine valine pyroglutamate trigonelline choline inositol
0 1 A17 A 17 Control 365 T6 FR.02 47DPA 09/02/2010 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
1 2 A8 A 8 Control 365 T6 FR.02 47DPA 09/02/2010 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
2 3 D3 D 210 Control 365 T6 FR.02 47DPA 09/02/2010 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
3 4 E35 E 311 Control 365 T6 FR.02 47DPA 09/02/2010 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
4 5 H11 H 356 Control 365 T6 FR.02 47DPA 09/02/2010 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711

5 rows × 84 columns


In [5]:
# Display the variable list within the 'factor' category of a merged data subset
df['factor']


Out[5]:
index Subset Attribute Description Type CV_Term_ID CV_Term_Name
0 1 plants Treatment Treatment applied on plants string http://www.ebi.ac.uk/efo/EFO_0000469 environmental factor
1 2 samples DevStage fruit development stage string http://purl.obolibrary.org/obo/PO_0001002 fruit development stage
2 3 samples FruitAge fruit age (dpa) string http://purl.obolibrary.org/obo/PO_0001002 fruit development stage

In [6]:
# Display the variable list within the 'identifier' category of a merged data subset
df['identifier']


Out[6]:
index Subset Attribute Description Type CV_Term_ID CV_Term_Name
0 1 plants PlantID Plant identifier string http://purl.obolibrary.org/obo/OBI_0001141 individual organism identifier
1 2 samples SampleID Pool of several harvests numeric http://www.ebi.ac.uk/efo/EFO_0005316 sample pooling
2 3 aliquots AliquotID Aliquot Identifier numeric http://www.ebi.ac.uk/efo/EFO_0005316 sample pooling
3 4 activome AliquotID Aliquot Identifier numeric http://purl.obolibrary.org/obo/IAO_0000578 centrally registered identifier
4 5 pools PoolID Pool of several samples string http://www.ebi.ac.uk/efo/EFO_0005316 sample pooling
5 6 qNMR_metabo PoolID Pool of several samples string http://www.ebi.ac.uk/efo/EFO_0005316 sample pooling

In [7]:
# Display the variable list within the 'quantitative' category of a merged data subset
quantitative = df['quantitative']
quantitative.loc[ 0:10, ]


Out[7]:
index Subset Attribute Description Type CV_Term_ID CV_Term_Name
0 1 samples FruitDiameter Fruit diameter (mm) numeric http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus... Diameter
1 2 samples FruitHeight Fruit height (mm) numeric http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus... Height
2 3 samples FruitFW Fruit Fresh Weight(g) numeric http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus... Weight
3 4 samples DW Percentage of dry matter (% DW), measured afte... numeric
4 5 activome PGM Phosphoglucomutase numeric
5 6 activome F16BP_Cyt Fructose-1,6-bisphosphatase (cyt) numeric
6 7 activome PyrK Pyruvate kinase numeric
7 8 activome CitS Citrate synthase numeric
8 9 activome PPI Phosphofructokinase (PPi) numeric
9 10 activome AcoS Aconitase numeric
10 11 activome PFK Phosphofructokinase (ATP) numeric

In [8]:
# Select the variables from the merged data belongings to the 'activome' data subset
data[quantitative[quantitative.Subset=='activome']['Attribute']]


Out[8]:
PGM F16BP_Cyt PyrK CitS PPI AcoS PFK FruS F16BP_Stroma GluS ... G6PDH UGPS SucS MAL_NAD ShiS MAL_NADP PGI_tot SolStarchS AGPS SucPhosphateS
0 NA 10.92 304.68 6.12 576.56 145.05 73.97 73.88 44.38 26.46 ... 109.01 3233.62 NA 535.3 67.44 219.78 384.85 77.01 80.18 64.33
1 NA 10.92 304.68 6.12 576.56 145.05 73.97 73.88 44.38 26.46 ... 109.01 3233.62 NA 535.3 67.44 219.78 384.85 77.01 80.18 64.33
2 NA 10.92 304.68 6.12 576.56 145.05 73.97 73.88 44.38 26.46 ... 109.01 3233.62 NA 535.3 67.44 219.78 384.85 77.01 80.18 64.33
3 NA 10.92 304.68 6.12 576.56 145.05 73.97 73.88 44.38 26.46 ... 109.01 3233.62 NA 535.3 67.44 219.78 384.85 77.01 80.18 64.33
4 NA 10.92 304.68 6.12 576.56 145.05 73.97 73.88 44.38 26.46 ... 109.01 3233.62 NA 535.3 67.44 219.78 384.85 77.01 80.18 64.33

5 rows × 38 columns


In [9]:
# Select the variables from the merged data belongings to the 'qNMR_metabo' data subset
data[quantitative[quantitative.Subset=='qNMR_metabo']['Attribute']]


Out[9]:
glucose saccharose fructose galactose mannose rhamnose acetate chlorogenate citrate fumarate ... glutamate isoleucine phenylalanine tryptophane tyrosine valine pyroglutamate trigonelline choline inositol
0 756.688509 33.907515 975.485366 1.544911 4.150709 4.02276 4.296019 2.428279 182.777927 0.079999 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
1 756.688509 33.907515 975.485366 1.544911 4.150709 4.02276 4.296019 2.428279 182.777927 0.079999 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
2 756.688509 33.907515 975.485366 1.544911 4.150709 4.02276 4.296019 2.428279 182.777927 0.079999 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
3 756.688509 33.907515 975.485366 1.544911 4.150709 4.02276 4.296019 2.428279 182.777927 0.079999 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711
4 756.688509 33.907515 975.485366 1.544911 4.150709 4.02276 4.296019 2.428279 182.777927 0.079999 ... 45.478818 2.750447 5.153496 0.345287 1.624778 1.372135 24.046419 1.078428 5.936098 65.898711

5 rows × 28 columns


In [10]:
# Convert a sub-data set to numpy format
numpy_matrix = data[quantitative[quantitative.Subset=='activome']['Attribute']].to_numpy()
numpy_matrix


Out[10]:
array([['NA', 10.92, 304.68, 6.12, 576.56, 145.05, 73.97, 73.88, 44.38,
        26.46, 222.35, 'NA', 46.9, 434.9, 1184.62, 85.38, 18335.08,
        1370.46, 'NA', 2731.5, 6.65, 1991.65, 1488.75, 24.54, 46454.49,
        4093.38, 88.17, 635.9575251, 109.01, 3233.62, 'NA', 535.3, 67.44,
        219.78, 384.85, 77.01, 80.18, 64.33],
       ['NA', 10.92, 304.68, 6.12, 576.56, 145.05, 73.97, 73.88, 44.38,
        26.46, 222.35, 'NA', 46.9, 434.9, 1184.62, 85.38, 18335.08,
        1370.46, 'NA', 2731.5, 6.65, 1991.65, 1488.75, 24.54, 46454.49,
        4093.38, 88.17, 635.9575251, 109.01, 3233.62, 'NA', 535.3, 67.44,
        219.78, 384.85, 77.01, 80.18, 64.33],
       ['NA', 10.92, 304.68, 6.12, 576.56, 145.05, 73.97, 73.88, 44.38,
        26.46, 222.35, 'NA', 46.9, 434.9, 1184.62, 85.38, 18335.08,
        1370.46, 'NA', 2731.5, 6.65, 1991.65, 1488.75, 24.54, 46454.49,
        4093.38, 88.17, 635.9575251, 109.01, 3233.62, 'NA', 535.3, 67.44,
        219.78, 384.85, 77.01, 80.18, 64.33],
       ['NA', 10.92, 304.68, 6.12, 576.56, 145.05, 73.97, 73.88, 44.38,
        26.46, 222.35, 'NA', 46.9, 434.9, 1184.62, 85.38, 18335.08,
        1370.46, 'NA', 2731.5, 6.65, 1991.65, 1488.75, 24.54, 46454.49,
        4093.38, 88.17, 635.9575251, 109.01, 3233.62, 'NA', 535.3, 67.44,
        219.78, 384.85, 77.01, 80.18, 64.33],
       ['NA', 10.92, 304.68, 6.12, 576.56, 145.05, 73.97, 73.88, 44.38,
        26.46, 222.35, 'NA', 46.9, 434.9, 1184.62, 85.38, 18335.08,
        1370.46, 'NA', 2731.5, 6.65, 1991.65, 1488.75, 24.54, 46454.49,
        4093.38, 88.17, 635.9575251, 109.01, 3233.62, 'NA', 535.3, 67.44,
        219.78, 384.85, 77.01, 80.18, 64.33]], dtype=object)

In [ ]: