In [40]:
library(ggplot2)

In [22]:
# Get the data
local_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/'
project_path = paste('/revo_healthcare/data/processed/Husermet_MTBLS97/',
                    'positive_ion/',sep='')
total_path = paste(local_path, project_path,sep='')
# load feature table
# patients x features
feats = read.csv(paste(total_path,
               'Husermet_UPLCMS_positive_ion_mode_featuretable.csv',
                   sep=''))
metadata = read.csv(paste(total_path,
                    'Husermet_UPLCMS_positive_ion_mode_metadata.csv',
                    sep=''))

In [94]:
head(meta)
dim(metadata)
columns = colnames(metadata)
hist_columns = c(columns[2], tail(columns,28))
hist_columns


IdxAgeGenderBMISBPDBPCSmokerALBPROTCREAPLTRBCWBCTBILALPALTASTGGTPHOSLDH
1 60.00000M 25.01000132 78 N NA 72.0 93 NA NA NA 10.0 66.0 49 41 23.0 0.97 465
2 54.33333M 24.38000140 80 N NA 66.0 70 NA NA NA 24.0 70.0 25 19 36.0 1.02 332
3 56.75000M 28.87000142 76 E NA 70.0 106 NA NA NA 9.0 62.0 18 19 20.0 1.00 322
4 41.20000M 24.10000135 79 U 42 70.9 89 247 5 8 4.1 37.1 18 14 16.3 NA NA
5 55.75000M 28.90842160 98 N 44 NA 113 203 5 5 NA NA NA NA NA NA NA
6 35.08333M 27.28000110 60 E NA 69.0 77 NA NA NA 7.0 161.0 40 24 34.0 0.92 318
  1. 1189
  2. 31
  1. 'Age'
  2. 'BMI'
  3. 'SBP'
  4. 'DBP'
  5. 'CSmoker'
  6. 'ALB'
  7. 'PROT'
  8. 'CREA'
  9. 'SGLUC'
  10. 'SODIUM'
  11. 'K'
  12. 'CALCIUM'
  13. 'CHOL'
  14. 'TRIG'
  15. 'HDLC'
  16. 'LDLC'
  17. 'Ratio'
  18. 'UREA'
  19. 'HAEM'
  20. 'PLT'
  21. 'RBC'
  22. 'WBC'
  23. 'TBIL'
  24. 'ALP'
  25. 'ALT'
  26. 'AST'
  27. 'GGT'
  28. 'PHOS'
  29. 'LDH'

In [96]:
# Show me an age breakdown
# TODO make a gridplot of all the distributions
for (i in hist_columns){
    print(i)
    hist(metadata[[i]], xlab=i,  main=sprintf("Histogram of %s", i))
    }


[1] "Age"
[1] "BMI"
[1] "SBP"
[1] "DBP"
[1] "CSmoker"
Error in hist.default(metadata[[i]], xlab = i, main = sprintf("Histogram of %s", : 'x' must be numeric
Traceback:

1. hist(metadata[[i]], xlab = i, main = sprintf("Histogram of %s", 
 .     i))   # at line 5 of file <text>
2. hist.default(metadata[[i]], xlab = i, main = sprintf("Histogram of %s", 
 .     i))
3. stop("'x' must be numeric")

In [48]:
p <- ggplot(mpg, aes(displ, cty)) + geom_point()
p