In [1]:
using MLDemos, DataFrames


INFO: Recompiling stale cache file /Users/abhijithc/.julia/lib/v0.4/LightXML.ji for module LightXML.
INFO: Recompiling stale cache file /Users/abhijithc/.julia/lib/v0.4/HttpParser.ji for module HttpParser.
INFO: Recompiling stale cache file /Users/abhijithc/.julia/lib/v0.4/Requests.ji for module Requests.
INFO: Recompiling stale cache file /Users/abhijithc/.julia/lib/v0.4/MbedTLS.ji for module MbedTLS.
INFO: Recompiling stale cache file /Users/abhijithc/.julia/lib/v0.4/Codecs.ji for module Codecs.

In [2]:
terms = readtable("$(Pkg.dir())/MLDemos/data/drug/data.csv", header=true)


Out[2]:
DiseaseDrugTarget
1EpilepsyDoxaprostGonadotropin-Releasing Hormone Receptor
2GlaucomaDoxazosin MesylateMetabotropic Glutamate Receptor 1
3ArrhythmiaDoxepin HydrochlorideMetabotropic Glutamate Receptor 2
4Calculus UrinaryDoxycycline HydrochlorideMetabotropic Glutamate Receptor 3
5SeizuresDracotanoside DPancreatic Alpha-Amylase
6PruritisDragonamide BLysosomal Alpha-Glucosidase
7Cushings SyndromeDragonamide ECarbonic Anhydrase 3
8Deep Vein ThrombosisDrobulineAdenosine A3 Receptor
9Hypertension OcularDrospirenoneAtp-Binding Cassette Transporter Sub-Family C Member 8
10Arterial Occlusion PeripheralDroxinavir HydrochlorideMultidrug Resistance Protein 1
11Bipolar Affective DisorderDrupaninRenin
12Gilberts SyndromeKaad-CyclopamineGamma-Aminobutyric Acid Receptor Subunit Gamma-1
13HyperbilirubinemiaKabiramide KGamma-Aminobutyric Acid Receptor Subunit Gamma-3
14Rheumatoid ArthritisKadcoccilactone AGamma-Aminobutyric Acid Receptor Subunit Theta
15Rhinitis Seasonal AllergicKadcoccilactone CGamma-Aminobutyric Acid Receptor Subunit Rho-3
16Myeloma MultipleKadcoccilactone DGamma-Aminobutyric-Acid Receptor Subunit Alpha-1
17Opiate DependenceKadcoccilactone FNeuronal Acetylcholine Receptor Subunit Alpha-10
18Polycystic Ovarian SyndromeKadsulactone ACalcium-Activated Potassium Channel Subunit Alpha 1
19Transplant Bone MarrowKadsuphilol CMethylenetetrahydrofolate Reductase
20Central Nervous System DisorderKadsuphilol EGlutamate Receptor 1
21Deficiency Vitamin DKadsuracoccinic Acid AEstrogen Receptor Beta
22Nephropathy DiabeticKaempferitrinMicrotubule-Associated Protein 1a
23Infection RickettsiaKaempferolLow-Density Lipoprotein Receptor-Related Protein 2
24Shigellosis4-Medck ThiolactoneAtp-Sensitive Inward Rectifier Potassium Channel 1
25Nervousness4-MercaptopyridineGlutathione Peroxidase 6
26Anesthesia LocalN-Heptanoyl-L-Homoserine LactoneGlutathione S-Transferase Theta-1
27HypoglycemiaN-Heptyl-4-Sulfamoyl-BenzamideMaleylacetoacetate Isomerase
28Muscle RelaxationN-Heptylcarbamic Acid Quinolin-6-Yl EsterThioredoxin Domain-Containing Protein 12
29Arthritis PsoriaticN-HexanesulfonamideG Protein-Activated Inward Rectifier Potassium Channel 2
30Carcinoma BrainAb-MecaTissue-Type Plasminogen Activator
&vellip&vellip&vellip&vellip

In [3]:
drugs = dropna(terms[:Drug]);
targets = dropna(terms[:Target]);
diseases = dropna(terms[:Disease]);

In [43]:
size(drugs)


Out[43]:
(252,)

In [44]:
all = [drugs[1:100];targets[1:100];diseases[1:100]]


Out[44]:
300-element Array{UTF8String,1}:
 "Doxaprost"                 
 "Doxazosin Mesylate"        
 "Doxepin Hydrochloride"     
 "Doxycycline Hydrochloride" 
 "Dracotanoside D"           
 "Dragonamide B"             
 "Dragonamide E"             
 "Drobuline"                 
 "Drospirenone"              
 "Droxinavir Hydrochloride"  
 "Drupanin"                  
 "Kaad-Cyclopamine"          
 "Kabiramide K"              
 ⋮                           
 "Cardiac Dysrhythmia"       
 "Puberty Precocious"        
 "Aspergillosis"             
 "Meningitis Bacterial"      
 "Ankylosing Spondylitis"    
 "Pain Musculoskeletal"      
 "Hyperlipoproteinemia"      
 "Obstructive Airway Disease"
 "Iritis"                    
 "Carcinoma Endometrial"     
 "Anxiety Disorder"          
 "Somatoform Disorder"       

In [45]:
@time T = tdm(all, "PUBMED")


 Doxaprost [Title/Abstract] AND
 Doxazosin Mesylate [Title/Abstract] AND
 Doxepin Hydrochloride [Title/Abstract] AND
 Doxycycline Hydrochloride [Title/Abstract] AND
 Dracotanoside D [Title/Abstract] AND
 Dragonamide B [Title/Abstract] AND
 Dragonamide E [Title/Abstract] AND
 Drobuline [Title/Abstract] AND
 Drospirenone [Title/Abstract] AND
 Droxinavir Hydrochloride [Title/Abstract] AND
 Drupanin [Title/Abstract] AND
 Kaad-Cyclopamine [Title/Abstract] AND
 Kabiramide K [Title/Abstract] AND
 Kadcoccilactone A [Title/Abstract] AND
 Kadcoccilactone C [Title/Abstract] AND
 Kadcoccilactone D [Title/Abstract] AND
 Kadcoccilactone F [Title/Abstract] AND
 Kadsulactone A [Title/Abstract] AND
 Kadsuphilol C [Title/Abstract] AND
 Kadsuphilol E [Title/Abstract] AND
 Kadsuracoccinic Acid A [Title/Abstract] AND
 Kaempferitrin [Title/Abstract] AND
 Kaempferol [Title/Abstract] AND
 4-Medck Thiolactone [Title/Abstract] AND
 4-Mercaptopyridine [Title/Abstract] AND
 N-Heptanoyl-L-Homoserine Lactone [Title/Abstract] AND
 N-Heptyl-4-Sulfamoyl-Benzamide [Title/Abstract] AND
 N-Heptylcarbamic Acid Quinolin-6-Yl Ester [Title/Abstract] AND
 N-Hexanesulfonamide [Title/Abstract] AND
 Ab-Meca [Title/Abstract] AND
 Abaperidone [Title/Abstract] AND
 Abeohyousterone [Title/Abstract] AND
 Abiesadine B [Title/Abstract] AND
 Doxacurium [Title/Abstract] AND
 Doxaminol [Title/Abstract] AND
 Doxanthrine [Title/Abstract] AND
 Doxazolidine [Title/Abstract] AND
 Doxorubicin Analogue [Title/Abstract] AND
 Doxorubicin Trifluoroacetate [Title/Abstract] AND
 Dpdpe [Title/Abstract] AND
 Draconin A [Title/Abstract] AND
 Dragmacidin E [Title/Abstract] AND
 Dragonamide [Title/Abstract] AND
 Drimentine H [Title/Abstract] AND
 Droclidinium [Title/Abstract] AND
 Droloxifene [Title/Abstract] AND
 Drotaverine [Title/Abstract] AND
 Droxacin [Title/Abstract] AND
 Droxidopa [Title/Abstract] AND
 Droxypropine [Title/Abstract] AND
 Drummondin C [Title/Abstract] AND
 Dsm-121 [Title/Abstract] AND
 Dsm-131 [Title/Abstract] AND
 K-Strophanthoside [Title/Abstract] AND
 Kabiramide G [Title/Abstract] AND
 Kadangustin C [Title/Abstract] AND
 Kadangustin H [Title/Abstract] AND
 Kadangustin J [Title/Abstract] AND
 Kadlongilactone A [Title/Abstract] AND
 Kadsuphilins B [Title/Abstract] AND
 Kadsuphilol A [Title/Abstract] AND
 Kadsuphilol D [Title/Abstract] AND
 Kadsurenin C [Title/Abstract] AND
 Kadsurin [Title/Abstract] AND
 4-Methanesulfonyl-Benzamidine [Title/Abstract] AND
 N-Hexadecyl-4-Methoxybenzamide [Title/Abstract] AND
 N-Hexadecyl-4-Nitrobenzamide [Title/Abstract] AND
 N-Hexadecyl-N'-Methyl-Guanidine Trifluoroacetic Acid [Title/Abstract] AND
 N-Hexanoyldihydrosphingosine [Title/Abstract] AND
 N-Hexanoylspingosine [Title/Abstract] AND
 Dox-Saliform [Title/Abstract] AND
 Doxefazepam [Title/Abstract] AND
 Doxorubicin 8-(Menthoxycarbonyl)Octanoylhydrazone Hydrochloride [Title/Abstract] AND
 Doxorubicin-Pep42 [Title/Abstract] AND
 Doxylamine Succinate [Title/Abstract] AND
 Dracorhodin [Title/Abstract] AND
 Dracotanoside A [Title/Abstract] AND
 Draflazine [Title/Abstract] AND
 Dramedilol [Title/Abstract] AND
 Dronedarone Hydrochloride [Title/Abstract] AND
 Drymaritin [Title/Abstract] AND
 Dsm-123 [Title/Abstract] AND
 Dsm-124 [Title/Abstract] AND
 Dsm-73 [Title/Abstract] AND
 K76-Cooh [Title/Abstract] AND
 Kabiramide C [Title/Abstract] AND
 Kabiramide J [Title/Abstract] AND
 Kadangustin B [Title/Abstract] AND
 Kadangustin G [Title/Abstract] AND
 Kadcoccilactone E [Title/Abstract] AND
 Kadsulignan L [Title/Abstract] AND
 Kadsuphilol B [Title/Abstract] AND
 Kadsuralignan J [Title/Abstract] AND
 Kaempferol Diacyl Rhamnoside [Title/Abstract] AND
 Kaempferol-3-O-(2''-O-Galloyl)-Glucoside [Title/Abstract] AND
 N-Glycylaminomethyl-P-Methylphosphinic Acid [Title/Abstract] AND
 N-Heptyl-2-Mercapto-Thionicotinamide [Title/Abstract] AND
 N-Heptyl-N-Methyl-N-Nitrosoamine [Title/Abstract] AND
 N-Heptylpiperazin-1-Amine [Title/Abstract] AND
 N-Hexadecanyl Alpha-D-Galactopyranoside [Title/Abstract] AND
 Gonadotropin-Releasing Hormone Receptor [Title/Abstract] AND
 Metabotropic Glutamate Receptor 1 [Title/Abstract] AND
 Metabotropic Glutamate Receptor 2 [Title/Abstract] AND
 Metabotropic Glutamate Receptor 3 [Title/Abstract] AND
 Pancreatic Alpha-Amylase [Title/Abstract] AND
 Lysosomal Alpha-Glucosidase [Title/Abstract] AND
 Carbonic Anhydrase 3 [Title/Abstract] AND
 Adenosine A3 Receptor [Title/Abstract] AND
 Atp-Binding Cassette Transporter Sub-Family C Member 8 [Title/Abstract] AND
 Multidrug Resistance Protein 1 [Title/Abstract] AND
 Renin [Title/Abstract] AND
 Gamma-Aminobutyric Acid Receptor Subunit Gamma-1 [Title/Abstract] AND
 Gamma-Aminobutyric Acid Receptor Subunit Gamma-3 [Title/Abstract] AND
 Gamma-Aminobutyric Acid Receptor Subunit Theta [Title/Abstract] AND
 Gamma-Aminobutyric Acid Receptor Subunit Rho-3 [Title/Abstract] AND
 Gamma-Aminobutyric-Acid Receptor Subunit Alpha-1 [Title/Abstract] AND
 Neuronal Acetylcholine Receptor Subunit Alpha-10 [Title/Abstract] AND
 Calcium-Activated Potassium Channel Subunit Alpha 1 [Title/Abstract] AND
 Methylenetetrahydrofolate Reductase [Title/Abstract] AND
 Glutamate Receptor 1 [Title/Abstract] AND
 Estrogen Receptor Beta [Title/Abstract] AND
 Microtubule-Associated Protein 1a [Title/Abstract] AND
 Low-Density Lipoprotein Receptor-Related Protein 2 [Title/Abstract] AND
 Atp-Sensitive Inward Rectifier Potassium Channel 1 [Title/Abstract] AND
 Glutathione Peroxidase 6 [Title/Abstract] AND
 Glutathione S-Transferase Theta-1 [Title/Abstract] AND
 Maleylacetoacetate Isomerase [Title/Abstract] AND
 Thioredoxin Domain-Containing Protein 12 [Title/Abstract] AND
 G Protein-Activated Inward Rectifier Potassium Channel 2 [Title/Abstract] AND
 Tissue-Type Plasminogen Activator [Title/Abstract] AND
 Phosphatidylinositol 3-Kinase Regulatory Subunit Gamma [Title/Abstract] AND
 Chromaffin Granule Amine Transporter [Title/Abstract] AND
 Dihydrofolate Reductase [Title/Abstract] AND
 Small Conductance Calcium-Activated Potassium Channel Protein 2 [Title/Abstract] AND
 Prostaglandin E2 Receptor Ep4 Subtype [Title/Abstract] AND
 Tachykinin Receptor 1 [Title/Abstract] AND
 Vitamin D Receptor Interacting Protein [Title/Abstract] AND
 Penicillin Binding Protein 3 [Title/Abstract] AND
 Dna Gyrase [Title/Abstract] AND
 Phospholipase A2 [Title/Abstract] AND
 Beta-Tubulin [Title/Abstract] AND
 Calcium Dependent Atpase [Title/Abstract] AND
 Opioid Receptor Mu [Title/Abstract] AND
 N-Type Calcium Channel [Title/Abstract] AND
 Cyclooxygenase-3 [Title/Abstract] AND
 Erbb2 [Title/Abstract] AND
 Aldosterone Receptor [Title/Abstract] AND
 Udp-Glucuronosyltransferase [Title/Abstract] AND
 Opioid Receptor Sigma 1 [Title/Abstract] AND
 Crth2 Receptor [Title/Abstract] AND
 Ent2 [Title/Abstract] AND
 Cox-2 [Title/Abstract] AND
 Sphingomyelinase [Title/Abstract] AND
 Sulfonylurea Receptor 2b [Title/Abstract] AND
 Hiv-1 Integrase [Title/Abstract] AND
 Mu Opioid Receptor [Title/Abstract] AND
 Trace Amine-Associated Receptor 1 [Title/Abstract] AND
 5-Hydroxytryptamine 6 Receptor [Title/Abstract] AND
 Cytochrome B [Title/Abstract] AND
 Arachidonate 5-Lipoxygenase [Title/Abstract] AND
 Sodium Channel Protein Type 5 Subunit Alpha [Title/Abstract] AND
 Aldo-Keto Reductase Family 1 Member C3 [Title/Abstract] AND
 Gap Junction Alpha-1 Protein [Title/Abstract] AND
 Vascular Cell Adhesion Protein 1 [Title/Abstract] AND
 Myeloperoxidase [Title/Abstract] AND
 Tyrosine-Protein Kinase Fer [Title/Abstract] AND
 Peptidyl-Prolyl Cis-Trans Isomerase Fkbp4 [Title/Abstract] AND
 4-Hydroxy-Tetrahydrodipicolinate Synthase [Title/Abstract] AND
 Prostaglandin Reductase 1 [Title/Abstract] AND
 Xaa-Pro Dipeptidase [Title/Abstract] AND
 Hydroxyacid Oxidase 1 [Title/Abstract] AND
 Hypoxia-Inducible Factor 1-Alpha [Title/Abstract] AND
 Natriuretic Peptides A [Title/Abstract] AND
 Dna Polymerase I [Title/Abstract] AND
 Cysteinyl Leukotriene Receptor 2 [Title/Abstract] AND
 Sphingosine 1-Phosphate Receptor 1 [Title/Abstract] AND
 Alpha-Glucosidase [Title/Abstract] AND
 Induced Myeloid Leukemia Cell Differentiation Protein Mcl-1 [Title/Abstract] AND
 Macrophage Metalloelastase [Title/Abstract] AND
 Egl Nine Homolog 1 [Title/Abstract] AND
 Nad-Dependent Protein Deacetylase Sirtuin-1 [Title/Abstract] AND
 Serine/Threonine-Protein Kinase 17a [Title/Abstract] AND
 Hyaluronidase-1 [Title/Abstract] AND
 Probable Low Molecular Weight Protein-Tyrosine-Phosphatase [Title/Abstract] AND
 Serine/Threonine-Protein Kinase Brsk2 [Title/Abstract] AND
 Maternal Embryonic Leucine Zipper Kinase [Title/Abstract] AND
 Collagenase 3 [Title/Abstract] AND
 Aurora Kinase A [Title/Abstract] AND
 Melanin-Concentrating Hormone Receptor 1 [Title/Abstract] AND
 Chorismate Synthase [Title/Abstract] AND
 Kallikrein-8 [Title/Abstract] AND
 Protein Fimh [Title/Abstract] AND
 Ns3 [Title/Abstract] AND
 Mitogen-Activated Protein Kinase Kinase Kinase 8 [Title/Abstract] AND
 Galectin-1 [Title/Abstract] AND
 Carbonic Anhydrase 4 [Title/Abstract] AND
 Retinoic Acid Receptor Alpha [Title/Abstract] AND
 Retinoic Acid Receptor Gamma-1 [Title/Abstract] AND
 Retinoic Acid Receptor Rxr-Beta [Title/Abstract] AND
 Adenosine A1 Receptor [Title/Abstract] AND
 Epilepsy [Title/Abstract] AND
 Glaucoma [Title/Abstract] AND
 Arrhythmia [Title/Abstract] AND
 Calculus Urinary [Title/Abstract] AND
 Seizures [Title/Abstract] AND
 Pruritis [Title/Abstract] AND
 Cushings Syndrome [Title/Abstract] AND
 Deep Vein Thrombosis [Title/Abstract] AND
 Hypertension Ocular [Title/Abstract] AND
 Arterial Occlusion Peripheral [Title/Abstract] AND
 Bipolar Affective Disorder [Title/Abstract] AND
 Gilberts Syndrome [Title/Abstract] AND
 Hyperbilirubinemia [Title/Abstract] AND
 Rheumatoid Arthritis [Title/Abstract] AND
 Rhinitis Seasonal Allergic [Title/Abstract] AND
 Myeloma Multiple [Title/Abstract] AND
 Opiate Dependence [Title/Abstract] AND
 Polycystic Ovarian Syndrome [Title/Abstract] AND
 Transplant Bone Marrow [Title/Abstract] AND
 Central Nervous System Disorder [Title/Abstract] AND
 Deficiency Vitamin D [Title/Abstract] AND
 Nephropathy Diabetic [Title/Abstract] AND
 Infection Rickettsia [Title/Abstract] AND
 Shigellosis [Title/Abstract] AND
 Nervousness [Title/Abstract] AND
 Anesthesia Local [Title/Abstract] AND
 Hypoglycemia [Title/Abstract] AND
 Muscle Relaxation [Title/Abstract] AND
 Arthritis Psoriatic [Title/Abstract] AND
 Carcinoma Brain [Title/Abstract] AND
 Graft Versus Host Disease [Title/Abstract] AND
 Metastatic Breast Cancer [Title/Abstract] AND
 Amnesia [Title/Abstract] AND
 Pinworm [Title/Abstract] AND
 Dementia [Title/Abstract] AND
 Hyperthyroidism [Title/Abstract] AND
 Skin Disease [Title/Abstract] AND
 Hypogonadism Male Primary [Title/Abstract] AND
 Niemann Pick Disease [Title/Abstract] AND
 Inflammatory Bowel Diseases [Title/Abstract] AND
 Conjunctival Disease [Title/Abstract] AND
 Xerostomia [Title/Abstract] AND
 Pneumonia Pneumocystis Carinii [Title/Abstract] AND
 Gastritis [Title/Abstract] AND
 Hepatitis Chronic Active [Title/Abstract] AND
 Drug Hypersensitivity [Title/Abstract] AND
 Leukemias [Title/Abstract] AND
 Multiple Myeloma [Title/Abstract] AND
 Chronic Lymphocytic Leukemia [Title/Abstract] AND
 Renal Failure [Title/Abstract] AND
 Adult Respiratory Distress Syndrome [Title/Abstract] AND
 Anemia [Title/Abstract] AND
 Melanoma Malignant [Title/Abstract] AND
 Leukemia Chronic Myelogenous [Title/Abstract] AND
 Myelodysplastic Syndrome [Title/Abstract] AND
 Allergic Rhinitis [Title/Abstract] AND
 Urticaria [Title/Abstract] AND
 Proteinuria [Title/Abstract] AND
 Itching [Title/Abstract] AND
 Bone Disorder [Title/Abstract] AND
 Gonorrhea [Title/Abstract] AND
 Albuminuria [Title/Abstract] AND
 Infection Upper Respiratory Tract [Title/Abstract] AND
 Edema Pulmonary [Title/Abstract] AND
 Seizures Partial [Title/Abstract] AND
 Meningitis [Title/Abstract] AND
 Lichen Planus [Title/Abstract] AND
 Lupus Erythematosus Discoid [Title/Abstract] AND
 Necrobiosis Lipoidica [Title/Abstract] AND
 Nephrotic Syndrome [Title/Abstract] AND
 Conjunctivitis [Title/Abstract] AND
 Arthralgia Syndrome [Title/Abstract] AND
 Non Insulin Dependent Diabetes Mellitus [Title/Abstract] AND
 Chills [Title/Abstract] AND
 Malnutrition [Title/Abstract] AND
 Soft Tissue Infection [Title/Abstract] AND
 Muscle Weakness [Title/Abstract] AND
 Headache [Title/Abstract] AND
 Pain [Title/Abstract] AND
 Hepatitis B [Title/Abstract] AND
 Anxiety [Title/Abstract] AND
 Aggression [Title/Abstract] AND
 Depression [Title/Abstract] AND
 Stroke [Title/Abstract] AND
 Cerebrovascular Disorder [Title/Abstract] AND
 Pneumonia [Title/Abstract] AND
 Hypertriglyceridemia [Title/Abstract] AND
 Carcinoma Head [Title/Abstract] AND
 Cardiac Dysrhythmia [Title/Abstract] AND
 Puberty Precocious [Title/Abstract] AND
 Aspergillosis [Title/Abstract] AND
 Meningitis Bacterial [Title/Abstract] AND
 Ankylosing Spondylitis [Title/Abstract] AND
 Pain Musculoskeletal [Title/Abstract] AND
 Hyperlipoproteinemia [Title/Abstract] AND
 Obstructive Airway Disease [Title/Abstract] AND
 Iritis [Title/Abstract] AND
 Carcinoma Endometrial [Title/Abstract] AND
 Anxiety Disorder [Title/Abstract] AND
 Somatoform Disorder [Title/Abstract] AND
355.895514 seconds (425.84 k allocations: 668.239 MB, 0.01% gc time)
Out[45]:
300x27097379 sparse matrix with 2218 Int64 entries:
	[1       ,     7000]  =  1
	[268     ,   374316]  =  1
	[8       ,   692260]  =  1
	[245     ,   842879]  =  1
	[1       ,   935524]  =  1
	[212     ,  1055005]  =  1
	[298     ,  1152496]  =  1
	[245     ,  1152537]  =  1
	[290     ,  1276700]  =  1
	[198     ,  1332684]  =  1
	⋮
	[266     , 27097347]  =  1
	[5       , 27097348]  =  1
	[16      , 27097348]  =  1
	[111     , 27097354]  =  1
	[15      , 27097360]  =  1
	[15      , 27097363]  =  1
	[247     , 27097363]  =  1
	[96      , 27097368]  =  1
	[15      , 27097373]  =  1
	[281     , 27097379]  =  1
	[284     , 27097379]  =  1

In [47]:
@time tdmf=T .* log(size(T,2)./sum(T,2))


  0.369356 seconds (42 allocations: 206.779 MB, 5.86% gc time)
Out[47]:
300x27097379 sparse matrix with 2218 Float64 entries:
	[1       ,     7000]  =  16.4218
	[268     ,   374316]  =  14.8124
	[8       ,   692260]  =  17.1149
	[245     ,   842879]  =  15.3232
	[1       ,   935524]  =  16.4218
	[212     ,  1055005]  =  16.0163
	[298     ,  1152496]  =  15.3232
	[245     ,  1152537]  =  15.3232
	[290     ,  1276700]  =  14.8124
	[198     ,  1332684]  =  15.3232
	⋮
	[266     , 27097347]  =  14.8124
	[5       , 27097348]  =  14.8124
	[16      , 27097348]  =  14.8124
	[111     , 27097354]  =  14.8124
	[15      , 27097360]  =  14.8124
	[15      , 27097363]  =  14.8124
	[247     , 27097363]  =  14.8124
	[96      , 27097368]  =  14.8124
	[15      , 27097373]  =  14.8124
	[281     , 27097379]  =  14.8124
	[284     , 27097379]  =  14.8124

In [48]:
@time dtmf=T' .* log(size(T',1)./sum(T',1))


  0.107908 seconds (61 allocations: 164.844 KB)
Out[48]:
27097379x300 sparse matrix with 2218 Float64 entries:
	[7000    ,        1]  =  16.4218
	[935524  ,        1]  =  16.4218
	[22131637,        2]  =  14.8124
	[23385960,        2]  =  14.8124
	[23983168,        2]  =  14.8124
	[25007615,        2]  =  14.8124
	[25668796,        2]  =  14.8124
	[25840026,        2]  =  14.8124
	[26201344,        2]  =  14.8124
	[26328143,        2]  =  14.8124
	⋮
	[27096571,      299]  =  14.8124
	[26863248,      300]  =  14.8124
	[26919056,      300]  =  14.8124
	[26932754,      300]  =  14.8124
	[26944392,      300]  =  14.8124
	[26951025,      300]  =  14.8124
	[26984121,      300]  =  14.8124
	[26997020,      300]  =  14.8124
	[27037573,      300]  =  14.8124
	[27045631,      300]  =  14.8124
	[27064523,      300]  =  14.8124

In [49]:
function cosine_vectorized(i::SparseMatrixCSC{Float64, Int64}, j::SparseMatrixCSC{Float64, Int64})
    return sum(i .* j)/sqrt(sum(i.*i)*sum(j.*j))
end


Out[49]:
cosine_vectorized (generic function with 1 method)

In [91]:
array(terms[1,:])[:]


Out[91]:
3-element Array{UTF8String,1}:
 "Epilepsy"                               
 "Doxaprost"                              
 "Gonadotropin-Releasing Hormone Receptor"

In [92]:
dbsearch("PUBMED",array(terms[1,:])[:], "OR")


 Epilepsy [Title/Abstract] OR Doxaprost [Title/Abstract] OR Gonadotropin-Releasing Hormone Receptor [Title/Abstract] OR
Out[92]:
MLDemos.PubIds(Nullable(UTF8String["Epilepsy","Doxaprost","Gonadotropin-Releasing Hormone Receptor"]),Nullable([27097007,27096812,27096250,27095821,27095588,27095555,27095099,27095080,27095079,27094525]),10)

In [104]:
d=dbsearch("PUBMED",[terms[1,2]], "OR")


 Doxaprost [Title/Abstract] OR
Out[104]:
MLDemos.PubIds(Nullable(UTF8String["Doxaprost"]),Nullable([935524,7000]),2)

In [115]:
drug = get(d.pubids)


Out[115]:
2-element Array{Int64,1}:
 935524
   7000

In [114]:
dis = get(di.pubids)


Out[114]:
10-element Array{Int64,1}:
 27097007
 27096812
 27096250
 27095821
 27095588
 27095555
 27095099
 27095080
 27095079
 27094525

In [113]:
target=get(t.pubids)


Out[113]:
10-element Array{Int64,1}:
 27063262
 27045358
 26953247
 26920257
 26892063
 26645560
 26580281
 26550267
 26373374
 26345908

In [63]:
@time D = zeros(r,r)
for i = 1:r
    for j = i+1:r
        D[i,j] = cosine_vectorized(tdmf[:,i],tdmf[:,j])
    end
end


  0.000692 seconds (9 allocations: 703.422 KB)

In [64]:
Ds = sparse(D)


Out[64]:
300x300 sparse matrix with 44850 Float64 entries:
	[1  ,   2]  =  NaN
	[1  ,   3]  =  NaN
	[2  ,   3]  =  NaN
	[1  ,   4]  =  NaN
	[2  ,   4]  =  NaN
	[3  ,   4]  =  NaN
	[1  ,   5]  =  NaN
	[2  ,   5]  =  NaN
	[3  ,   5]  =  NaN
	[4  ,   5]  =  NaN
	⋮
	[289, 300]  =  NaN
	[290, 300]  =  NaN
	[291, 300]  =  NaN
	[292, 300]  =  NaN
	[293, 300]  =  NaN
	[294, 300]  =  NaN
	[295, 300]  =  NaN
	[296, 300]  =  NaN
	[297, 300]  =  NaN
	[298, 300]  =  NaN
	[299, 300]  =  NaN

In [62]:
Ds.nzval


Out[62]:
9670-element Array{Float64,1}:
   1.0
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
   ⋮  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
 NaN  
   0.1

In [34]:
dtmf[:,2]


Out[34]:
27097348x1 sparse matrix with 10 Float64 entries:
	[22131637,        1]  =  14.8124
	[23385960,        1]  =  14.8124
	[23983168,        1]  =  14.8124
	[25007615,        1]  =  14.8124
	[25668796,        1]  =  14.8124
	[25840026,        1]  =  14.8124
	[26201344,        1]  =  14.8124
	[26328143,        1]  =  14.8124
	[26716887,        1]  =  14.8124
	[26735908,        1]  =  14.8124

In [35]:
all[11]


Out[35]:
"Gonadotropin-Releasing Hormone Receptor"

In [36]:
all[1]


Out[36]:
"Doxaprost"

In [37]:
all[21]


Out[37]:
"Epilepsy"

In [39]:
dtmf[:,11]


Out[39]:
27097348x1 sparse matrix with 10 Float64 entries:
	[26345908,        1]  =  14.8124
	[26373374,        1]  =  14.8124
	[26550267,        1]  =  14.8124
	[26580281,        1]  =  14.8124
	[26645560,        1]  =  14.8124
	[26892063,        1]  =  14.8124
	[26920257,        1]  =  14.8124
	[26953247,        1]  =  14.8124
	[27045358,        1]  =  14.8124
	[27063262,        1]  =  14.8124

In [40]:
dtmf[:,1]


Out[40]:
27097348x1 sparse matrix with 2 Float64 entries:
	[7000    ,        1]  =  16.4218
	[935524  ,        1]  =  16.4218

In [42]:
dbsearch("PUBMED", [all[11]], "AND")


 Gonadotropin-Releasing Hormone Receptor [Title/Abstract] AND
Out[42]:
MLDemos.PubIds(Nullable(UTF8String["Gonadotropin-Releasing Hormone Receptor"]),Nullable([27063262,27045358,26953247,26920257,26892063,26645560,26580281,26550267,26373374,26345908]),10)

In [ ]: