In [6]:
import sys
sys.path.append('/Users/simon/git/lda/code/')

In [2]:
from ms2lda_feature_extraction import LoadMGF

In [3]:
mgf_file = '/home/simon/Dropbox/MolecularNetworking_GCF-MF_Crusemann_Duncan/Crusemann_only_Clutered_Data/METABOLOMICS-SNETS-c36f90ba-download_clustered_spectra-main.mgf'

In [4]:
min_ms2_intensity = 25

In [5]:
l = LoadMGF(min_ms2_intensity = 25)
ms1,ms2,metadata = l.load_spectra([mgf_file])


Filtering MS2 on intensity
85977 MS2 remaining

In [6]:
%load_ext autoreload
%autoreload 2

In [56]:
mols_as_dicts = {}
for m in ms1:
    mols_as_dicts[m.name] = []

In [57]:
for f in ms2:
    m = f[3]
    mols_as_dicts[m.name].append((f[0],f[2]))

In [58]:
for m in mols_as_dicts:
    mols_as_dicts[m] = sorted(mols_as_dicts[m],key = lambda x: x[0])

In [59]:
for m in mols_as_dicts:
    max_val = max([x[1] for x in mols_as_dicts[m]])
    new_spec = []
    for mz,i in mols_as_dicts[m]:
        new_spec.append((mz,100.0*i/max_val))
    mols_as_dicts[m] = new_spec

In [60]:
parentmass = {}
for m in ms1:
    parentmass[m.name] = metadata[m.name].get('parentmass')

Compute all the scores


In [61]:
scores = []
n_mols = len(ms1)
import numpy as np

In [120]:
from spectrum_alignment import score_alignment
tol = 0.5
n_done = 0
max_align = 0
max_score = 0
for i,m1 in enumerate(ms1[:-1]):
    for j,m2 in enumerate(ms1[i+1:]):
        score,alignment = score_alignment(mols_as_dicts[m1.name],mols_as_dicts[m2.name],parentmass[m1.name],parentmass[m2.name],tol)
#         scoremat[i,j] = score
#         n_matches[i,j] = len(alignment)
        if len(alignment) > 0:
            scores.append((m1.name,m2.name,score,len(alignment)))
            if len(alignment) > max_align:
                max_align = len(alignment)
#                 print max_align
            if score > max_score:
                max_score = score
#                 print max_score
    n_done += 1
    if n_done % 100 == 0:
        print n_done,n_mols
#         scores.append((m1,m2,score))


100 5930
200 5930
300 5930
400 5930
500 5930
600 5930
700 5930
800 5930
900 5930
1000 5930
1100 5930
1200 5930
1300 5930
1400 5930
1500 5930
1600 5930
1700 5930
1800 5930
1900 5930
2000 5930
2100 5930
2200 5930
2300 5930
2400 5930
2500 5930
2600 5930
2700 5930
2800 5930
2900 5930
3000 5930
3100 5930
3200 5930
3300 5930
3400 5930
3500 5930
3600 5930
3700 5930
3800 5930
3900 5930
4000 5930
4100 5930
4200 5930
4300 5930
4400 5930
4500 5930
4600 5930
4700 5930
4800 5930
4900 5930
5000 5930
5100 5930
5200 5930
5300 5930
5400 5930
5500 5930
5600 5930
5700 5930
5800 5930
5900 5930

In [121]:
with open('crusemann_scores_05.csv','w') as f:
    writer = csv.writer(f)
    for row in scores:
        writer.writerow(row)

In [10]:
import csv
scores = []
with open('/Users/simon/Dropbox/crusemann_scores_05.csv','r') as f:
    reader = csv.reader(f)
    for line in reader:
        scores.append(line)

In [11]:
from mol_networks import create_edge_dict,Network
min_frag_overlap = 4
min_score = 0.5
max_component_size = 250
edge_dict = create_edge_dict(scores,min_frag_overlap = min_frag_overlap,min_score = min_score)
network = Network(edge_dict)
man_network = network.copy()
man_network.top_k_filter(k=10)
components = man_network.convert_to_components(max_component_size=max_component_size,min_score = min_score)


Found 1 components
Found 2 components
Found 3 components
Found 4 components
Found 5 components
Found 6 components
Found 7 components
Found 8 components
Found 9 components
Found 10 components
Found 11 components
Found 12 components
Found 13 components
Found 14 components
Found 15 components
Found 16 components
Found 17 components
Found 18 components
Found 19 components
Found 20 components
Found 21 components
Found 22 components
Found 23 components
Found 24 components
Found 25 components
Found 26 components
Found 27 components
Found 28 components
Found 29 components
Found 30 components
Found 31 components
Found 32 components
Found 33 components
Found 34 components
Found 35 components
Found 36 components
Found 37 components
Found 38 components
Found 39 components
Found 40 components
Found 41 components
Found 42 components
Found 43 components
Found 44 components
Found 45 components
Found 46 components
Found 47 components
Found 48 components
Found 49 components
Found 50 components
Found 51 components
Found 52 components
Found 53 components
Found 54 components
Found 55 components
Found 56 components
Found 57 components
Found 58 components
Found 59 components
Found 60 components
Found 61 components
Found 62 components
Found 63 components
Found 64 components
Found 65 components
Found 66 components
Found 67 components
Found 68 components
Found 69 components
Found 70 components
Found 71 components
Found 72 components
Found 73 components
Found 74 components
Found 75 components
Found 76 components
Found 77 components
Found 78 components
Found 79 components
Found 80 components
Found 81 components
Found 82 components
Found 83 components
Found 84 components
Found 85 components
Found 86 components
Found 87 components
Found 88 components
Found 89 components
Found 90 components
Found 91 components
Found 92 components
Found 93 components
Found 94 components
Found 95 components
Found 96 components
Found 97 components
Found 98 components
Found 99 components
Found 100 components
Found 101 components
Found 102 components
Found 103 components
Found 104 components
Found 105 components
Found 106 components
Found 107 components
Found 108 components
Found 109 components
Found 110 components
Found 111 components
Found 112 components
Found 113 components
Found 114 components
Found 115 components
Found 116 components
Found 117 components
Found 118 components
Found 119 components
Found 120 components
Found 121 components
Found 122 components
Found 123 components
Found 124 components
Found 125 components
Found 126 components
Found 127 components
Found 128 components
Found 129 components
Found 130 components
Found 131 components
Found 132 components
Found 133 components
Found 134 components
Found 135 components
Found 136 components
Found 137 components
Found 138 components
Found 139 components
Found 140 components
Found 141 components
Found 142 components
Found 143 components
Found 144 components
Found 145 components
Found 146 components
Found 147 components
Found 148 components
Found 149 components
Found 150 components
Found 151 components
Found 152 components
Found 153 components
Found 154 components
Found 155 components
Found 156 components
Found 157 components
Found 158 components
Found 159 components
Found 160 components
Found 161 components
Found 162 components
Found 163 components
Found 164 components
Found 165 components
Found 166 components
Found 167 components
Found 168 components
Found 169 components
Found 170 components
Found 171 components
Found 172 components
Found 173 components
Found 174 components
Found 175 components
Found 176 components
Found 177 components
Found 178 components
Found 179 components
Found 180 components
Found 181 components
Found 182 components
Found 183 components
Found 184 components
Found 185 components
Found 186 components
Found 187 components
Found 188 components
Found 189 components
Found 190 components
Found 191 components
Found 192 components
Found 193 components
Found 194 components
Found 195 components
Found 196 components
Found 197 components
Found 198 components
Found 199 components
Found 200 components
Found 201 components
Found 202 components
Found 203 components
Found 204 components
Found 205 components
Found 206 components
Found 207 components
Found 208 components
Found 209 components
Found 210 components
Found 211 components
Found 212 components
Found 213 components
Found 214 components
Found 215 components
Found 216 components
Found 217 components
Found 218 components
Found 219 components
Found 220 components
Found 221 components
Found 222 components
Found 223 components
Found 224 components
Found 225 components
Found 226 components
Found 227 components
Found 228 components
Found 229 components
Found 230 components
Found 231 components
Found 232 components
Found 233 components
Found 234 components
Found 235 components
Found 236 components
Found 237 components
Found 238 components
Found 239 components
Found 240 components
Found 241 components
Found 242 components
Found 243 components
Found 244 components
Found 245 components
Found 246 components
Found 247 components
Found 248 components
Found 249 components
Found 250 components
Found 251 components
Found 252 components
Found 253 components
Found 254 components
Found 255 components
Found 256 components
Found 257 components
Found 258 components
Found 259 components
Found 260 components
Found 261 components
Found 262 components
Found 263 components
Found 264 components
Found 265 components
Found 266 components
Found 267 components
Found 268 components
Found 269 components
Found 270 components
Found 271 components
Found 272 components
Found 273 components
Found 274 components
Found 275 components
Found 276 components
Found 277 components
Found 278 components
Found 279 components
Found 280 components
Found 281 components
Found 282 components
Found 283 components
Found 284 components
Found 285 components
Found 286 components
Found 287 components
Found 288 components
Found 289 components
Found 290 components
Found 291 components
Found 292 components
Found 293 components
Found 294 components
Found 295 components
Found 296 components
Found 297 components
Found 298 components
Found 299 components
Found 300 components
Found 301 components
Found 302 components
Found 303 components
Found 304 components
Found 305 components
Found 306 components
Found 307 components
Found 308 components
Found 309 components
Found 310 components
Found 311 components
Found 312 components
Found 313 components
Found 314 components
Found 315 components
Found 316 components
Found 317 components
Found 318 components
Found 319 components
Found 320 components
Found 321 components
Found 322 components
Found 323 components
Found 324 components
Found 325 components
Found 326 components
Found 327 components
Found 328 components
Found 329 components
Found 330 components
Found 331 components
Found 332 components
Found 333 components
Found 334 components
Found 335 components
Found 336 components
Found 337 components
Found 338 components
Found 339 components
Found 340 components
Found 341 components
Found 342 components
Found 343 components
Found 344 components
Found 345 components
Found 346 components
Found 347 components
Found 348 components
Found 349 components
Found 350 components
Found 351 components
Found 352 components
Found 353 components
Found 354 components
Found 355 components
Found 356 components
Found 357 components
Found 358 components
Found 359 components
Found 360 components
Found 361 components
Found 362 components
Found 363 components
Found 364 components
Found 365 components
Found 366 components
Found 367 components
Found 368 components
Found 369 components
Found 370 components
Found 371 components
Found 372 components
Found 373 components
Found 374 components
Found 375 components
Found 376 components
Found 377 components
Found 378 components
Found 379 components
Found 380 components
Found 381 components
Found 382 components
Found 383 components
Found 384 components
Found 385 components
Found 386 components
Found 387 components
Found 388 components
Found 389 components
Found 390 components
Found 391 components
Found 392 components
Found 393 components
Found 394 components
Found 395 components
Found 396 components
Found 397 components
Found 398 components
Found 399 components
Found 400 components
Found 401 components
Found 402 components
Found 403 components
Found 404 components
Found 405 components
Found 406 components
Found 407 components
Found 408 components
Found 409 components
Found 410 components
Found 411 components
Found 412 components
Found 413 components
Found 414 components
Found 415 components
Found 416 components
Found 417 components
Found 418 components
Found 419 components
Found 420 components
Found 421 components
Found 422 components
Found 423 components
Found 424 components
Found 425 components
Found 426 components
Found 427 components
Found 428 components
Found 429 components
Found 430 components
Found 431 components
Found 432 components
Found 433 components
Found 434 components
Found 435 components
Found 436 components
Found 437 components
Found 438 components
Found 439 components
Found 440 components
Found 441 components
Found 442 components
Found 443 components
Found 444 components
Found 445 components
Found 446 components
Found 447 components
Found 448 components
Found 449 components
Found 450 components
Found 451 components
Found 452 components
Found 453 components
Found 454 components
Found 455 components
Found 456 components
Found 457 components
Found 458 components
Found 459 components
Found 460 components
Found 461 components
Found 462 components
Found 463 components
Found 464 components
Found 465 components
Found 466 components
Found 467 components
Found 468 components
Found 469 components
Found 470 components
Found 471 components
Found 472 components
Found 473 components
Found 474 components
Found 475 components
Found 476 components
Found 477 components
Found 478 components
Found 479 components
Found 480 components
Found 481 components
Found 482 components
Found 483 components
Found 484 components
Found 485 components
Found 486 components
Found 487 components
Found 488 components
Found 489 components
Found 490 components
Found 491 components
Found 492 components
Found 493 components
Found 494 components
Found 495 components
Found 496 components
Found 497 components
Found 498 components
Found 499 components
Found 500 components
Found 501 components
Found 502 components
Found 503 components
Found 504 components
Found 505 components
Found 506 components
Found 507 components
Found 508 components
Found 509 components
Found 510 components
Found 511 components
Found 512 components
Found 513 components
Found 514 components
Found 515 components
Found 516 components
Found 517 components
Found 518 components
Found 519 components
Found 520 components
Found 521 components
Found 522 components
Found 523 components
Found 524 components
Found 525 components
Found 526 components
Found 527 components
Found 528 components
Found 529 components
Found 530 components
Found 531 components
Found 532 components
Found 533 components
Found 534 components
Found 535 components
Found 536 components
Found 537 components
Found 538 components
Found 539 components
Found 540 components
Found 541 components
Found 542 components
Found 543 components
Found 544 components
Found 545 components
Found 546 components
Found 547 components
Found 548 components
Found 549 components
Found 550 components
Found 551 components
Found 552 components
Found 553 components
Found 554 components
Found 555 components
Found 556 components
Found 557 components
Found 558 components
Found 559 components
Found 560 components
Found 561 components
Found 562 components
Found 563 components
Found 564 components
Found 565 components
Found 566 components
Found 567 components
Found 568 components
Found 569 components
Found 570 components
Found 571 components
Found 572 components
Found 573 components
Found 574 components
Found 575 components
Found 576 components
Found 577 components
Found 578 components
Found 579 components
Found 580 components
Found 581 components
Found 582 components
Found 583 components
Found 584 components
Found 585 components
Found 586 components
Found 587 components
Found 588 components
Found 589 components
Found 590 components
Found 591 components
Found 592 components
Found 593 components
Found 594 components
Found 595 components
Found 596 components
Found 597 components
Found 598 components
Found 599 components
Found 600 components
Found 601 components
Found 602 components
Found 603 components
Found 604 components
Found 605 components
Found 606 components
Found 607 components
Found 608 components
Found 609 components
Found 610 components
Found 611 components
Found 612 components
Found 613 components
Found 614 components
Found 615 components
Found 616 components
Found 617 components
Found 618 components
Found 619 components
Found 620 components
Found 621 components
Found 622 components
Found 623 components
Found 624 components
Found 625 components
Found 626 components
Found 627 components
Found 628 components
Found 629 components
Found 630 components
Found 631 components
Found 632 components
Found 633 components
Found 634 components
Found 635 components
Found 636 components
Found 637 components
Found 638 components
Found 639 components
Found 640 components
Found 641 components
Found 642 components
Found 643 components
Found 644 components
Found 645 components
Found 646 components
Found 647 components
Found 648 components
Found 649 components
Found 650 components
Found 651 components
Found 652 components
Found 653 components
Found 654 components
Found 655 components
Found 656 components
Found 657 components
Found 658 components
Found 659 components
Found 660 components
Found 661 components
Found 662 components
Found 663 components
Found 664 components
Found 665 components
Found 666 components
Found 667 components
Found 668 components
Found 669 components
Found 670 components
Found 671 components
Found 672 components
Found 673 components
Found 674 components
Found 675 components
Found 676 components
Found 677 components
Found 678 components
Found 679 components
Found 680 components
Found 681 components
Found 682 components
Found 683 components
Found 684 components
Found 685 components
Found 686 components
Found 687 components
Found 688 components
Found 689 components
Found 690 components
Found 691 components
Found 692 components
Found 693 components
Found 694 components
Found 695 components
Found 696 components
Found 697 components
Found 698 components
Found 699 components
Found 700 components
Found 701 components
Found 702 components
Found 703 components
Found 704 components
Found 705 components
Found 706 components
Found 707 components
Found 708 components
Found 709 components
Found 710 components
Found 711 components
Found 712 components
Found 713 components
Found 714 components
Found 715 components
Found 716 components
Found 717 components
Found 718 components
Found 719 components
Found 720 components
Found 721 components
Found 722 components
Found 723 components
Found 724 components
Found 725 components
Found 726 components
Found 727 components
Found 728 components
Found 729 components
Found 730 components
Found 731 components
Found 732 components
Found 733 components
Found 734 components
Found 735 components
Found 736 components
Found 737 components
Found 738 components
Found 739 components
Found 740 components
Found 741 components
Found 742 components
Found 743 components
Found 744 components
Found 745 components
Found 746 components
Found 747 components
Found 748 components
Found 749 components
Found 750 components
Found 751 components
Found 752 components
Found 753 components
Found 754 components
Found 755 components
Found 756 components
Found 757 components
Found 758 components
Found 759 components
Found 760 components
Found 761 components
Found 762 components
Found 763 components
Found 764 components
Found 765 components
Found 766 components
Found 767 components
Found 768 components
Found 769 components
Found 770 components
Found 771 components
Found 772 components
Found 773 components
Found 774 components
Found 775 components
Found 776 components
Found 777 components
Found 778 components
Found 779 components
Found 780 components
Found 781 components
Found 782 components
Found 783 components
Found 784 components
Found 785 components
Found 786 components
Found 787 components
Found 788 components
Found 789 components
Found 790 components
Found 791 components
Found 792 components
Found 793 components
Found 794 components
Found 795 components
Found 796 components
Found 797 components
Found 798 components
Found 799 components
Found 800 components
Found 801 components
Found 802 components
Found 803 components
Found 804 components
Found 805 components
Found 806 components
Found 807 components
Found 808 components
Found 809 components
Found 810 components
Found 811 components
Found 812 components
Found 813 components
Found 814 components
Found 815 components
Found 816 components
Found 817 components
Found 818 components
Found 819 components
Found 820 components
Found 821 components
Found 822 components
Found 823 components
Found 824 components
Found 825 components
Found 826 components
Found 827 components
Found 828 components
Found 829 components
Found 830 components
Found 831 components
Found 832 components
Found 833 components
Found 834 components
Found 835 components
Found 836 components
Found 837 components
Found 838 components
Found 839 components
Found 840 components
Found 841 components
Found 842 components
Found 843 components
Found 844 components
Found 845 components
Found 846 components
Found 847 components
Found 848 components
Found 849 components
Found 850 components
Found 851 components
Found 852 components
Found 853 components
Found 854 components
Found 855 components
Found 856 components
Found 857 components
Found 858 components
Found 859 components
Found 860 components
Found 861 components
Found 862 components
Found 863 components
Found 864 components
Found 865 components
Found 866 components
Found 867 components
Found 868 components
Found 869 components
Found 870 components
Found 871 components
Found 872 components
Found 873 components
Found 874 components
Found 875 components
Found 876 components
Found 877 components
Found 878 components
Found 879 components
Found 880 components
Found 881 components
Found 882 components
Found 883 components
Found 884 components
Found 885 components
Found 886 components
Found 887 components
Found 888 components
Found 889 components
Found 890 components
Found 891 components
Found 892 components
Found 893 components
Found 894 components
Found 895 components
Found 896 components
Found 897 components
Found 898 components
Found 899 components
Found 900 components
Found 901 components
Found 902 components
Found 903 components
Found 904 components
Found 905 components
Found 906 components
Found 907 components
Found 908 components
Found 909 components
Found 910 components
Found 911 components
Found 912 components
Found 913 components
Found 914 components
Found 915 components
Found 916 components
Found 917 components
Found 918 components
Found 919 components
Found 920 components
Found 921 components
Found 922 components
Found 923 components
Found 924 components
Found 925 components
Found 926 components
Found 927 components
Found 928 components
Found 929 components
Found 930 components
Found 931 components
Found 932 components
Found 933 components
Found 934 components
Found 935 components
Found 936 components
Found 937 components
Found 938 components
Found 939 components
Found 940 components
Found 941 components
Found 942 components
Found 943 components
Found 944 components
Found 945 components
Found 946 components
Found 947 components
Found 948 components
Found 949 components
Found 950 components
Found 951 components
Found 952 components
Found 953 components
Found 954 components
Found 955 components
Found 956 components
Found 957 components
Found 958 components
Found 959 components
Found 960 components
Found 961 components
Found 962 components
Found 963 components
Found 964 components
Found 965 components
Found 966 components
Found 967 components
Found 968 components
Found 969 components
Found 970 components
Found 971 components
Found 972 components
Found 973 components
Found 974 components
Found 975 components
Found 976 components
Found 977 components
Found 978 components
Found 979 components
Found 980 components
Found 981 components
Found 982 components
Found 983 components
Found 984 components
Found 985 components
Found 986 components
Found 987 components
Found 988 components
Found 989 components
Found 990 components
Found 991 components
Found 992 components
Found 993 components
Found 994 components
Found 995 components
Found 996 components
Found 997 components
Found 998 components
Found 999 components
Found 1000 components
Found 1001 components
Found 1002 components
Found 1003 components
Found 1004 components
Found 1005 components
Found 1006 components
Found 1007 components
Found 1008 components
Found 1009 components
Found 1010 components
Found 1011 components
Found 1012 components
Found 1013 components
Found 1014 components
Found 1015 components
Found 1016 components
Found 1017 components
Found 1018 components
Found 1019 components
Found 1020 components
Found 1021 components
Found 1022 components
Found 1023 components
Found 1024 components
Found 1025 components
Found 1026 components
Found 1027 components
Found 1028 components
Found 1029 components
Found 1030 components
Found 1031 components
Found 1032 components
Found 1033 components
Found 1034 components
Found 1035 components
Found 1036 components
Found 1037 components
Found 1038 components
Found 1039 components
Found 1040 components
Found 1041 components
Found 1042 components
Found 1043 components
Found 1044 components
Found 1045 components
Found 1046 components
Cropping components to have max size = 250
	1045 components ok, 1 need cropping
Cropping components to have max size = 250
	1046 components ok, 1 need cropping
Cropping components to have max size = 250
	1047 components ok, 1 need cropping
Cropping components to have max size = 250
	1048 components ok, 1 need cropping
Cropping components to have max size = 250
	1049 components ok, 1 need cropping
Cropping components to have max size = 250
	1050 components ok, 1 need cropping
Cropping components to have max size = 250
	1051 components ok, 1 need cropping
Cropping components to have max size = 250
	1052 components ok, 1 need cropping
Cropping components to have max size = 250
	1053 components ok, 1 need cropping
Cropping components to have max size = 250
	1054 components ok, 1 need cropping
Cropping components to have max size = 250
	1055 components ok, 1 need cropping
Cropping components to have max size = 250
	1056 components ok, 1 need cropping
Cropping components to have max size = 250
	1057 components ok, 1 need cropping
Cropping components to have max size = 250
	1058 components ok, 1 need cropping
Cropping components to have max size = 250
	1059 components ok, 1 need cropping
Cropping components to have max size = 250
	1060 components ok, 1 need cropping
Cropping components to have max size = 250
	1061 components ok, 1 need cropping
Cropping components to have max size = 250
	1062 components ok, 1 need cropping
Cropping components to have max size = 250
	1063 components ok, 1 need cropping
Cropping components to have max size = 250
	1064 components ok, 1 need cropping
Cropping components to have max size = 250
	1065 components ok, 1 need cropping
Cropping components to have max size = 250
	1066 components ok, 1 need cropping
Cropping components to have max size = 250
	1067 components ok, 1 need cropping
Cropping components to have max size = 250
	1068 components ok, 1 need cropping
Cropping components to have max size = 250
	1069 components ok, 1 need cropping
Cropping components to have max size = 250
	1070 components ok, 1 need cropping
Cropping components to have max size = 250
	1071 components ok, 1 need cropping
Cropping components to have max size = 250
	1072 components ok, 1 need cropping
Cropping components to have max size = 250
	1073 components ok, 1 need cropping
Cropping components to have max size = 250
	1074 components ok, 1 need cropping
Cropping components to have max size = 250
	1075 components ok, 1 need cropping
Cropping components to have max size = 250
	1076 components ok, 1 need cropping
Cropping components to have max size = 250
	1077 components ok, 1 need cropping
Cropping components to have max size = 250
	1078 components ok, 1 need cropping
Cropping components to have max size = 250
	1079 components ok, 1 need cropping
Cropping components to have max size = 250
	1080 components ok, 1 need cropping
Cropping components to have max size = 250
	1081 components ok, 1 need cropping
Cropping components to have max size = 250
	1082 components ok, 1 need cropping
Cropping components to have max size = 250
	1083 components ok, 1 need cropping
Cropping components to have max size = 250
	1084 components ok, 1 need cropping
Cropping components to have max size = 250
	1085 components ok, 1 need cropping
Cropping components to have max size = 250
	1086 components ok, 1 need cropping
Cropping components to have max size = 250
	1087 components ok, 1 need cropping
Cropping components to have max size = 250
	1088 components ok, 1 need cropping
Cropping components to have max size = 250
	1089 components ok, 1 need cropping
Cropping components to have max size = 250
	1090 components ok, 1 need cropping
Cropping components to have max size = 250
	1091 components ok, 1 need cropping
Cropping components to have max size = 250
	1092 components ok, 1 need cropping
Cropping components to have max size = 250
	1093 components ok, 1 need cropping
Cropping components to have max size = 250
	1094 components ok, 1 need cropping
Cropping components to have max size = 250
	1095 components ok, 1 need cropping
Cropping components to have max size = 250
	1096 components ok, 1 need cropping
Cropping components to have max size = 250
	1097 components ok, 1 need cropping
Cropping components to have max size = 250
	1098 components ok, 1 need cropping
Cropping components to have max size = 250
	1099 components ok, 1 need cropping
Cropping components to have max size = 250
	1100 components ok, 1 need cropping
Cropping components to have max size = 250
	1101 components ok, 1 need cropping
Cropping components to have max size = 250
	1102 components ok, 1 need cropping
Cropping components to have max size = 250
	1103 components ok, 1 need cropping
Cropping components to have max size = 250
	1104 components ok, 1 need cropping
Cropping components to have max size = 250
	1105 components ok, 1 need cropping
Cropping components to have max size = 250
	1106 components ok, 1 need cropping
Cropping components to have max size = 250
	1107 components ok, 1 need cropping
Cropping components to have max size = 250
	1108 components ok, 1 need cropping
Cropping components to have max size = 250
	1109 components ok, 1 need cropping
Cropping components to have max size = 250
	1110 components ok, 1 need cropping
Cropping components to have max size = 250
	1111 components ok, 1 need cropping
Cropping components to have max size = 250
	1112 components ok, 1 need cropping
Cropping components to have max size = 250
	1113 components ok, 1 need cropping
Cropping components to have max size = 250
	1114 components ok, 1 need cropping
Cropping components to have max size = 250
	1115 components ok, 1 need cropping
Cropping components to have max size = 250
	1116 components ok, 1 need cropping
Cropping components to have max size = 250
	1117 components ok, 1 need cropping
Cropping components to have max size = 250
	1118 components ok, 1 need cropping
Cropping components to have max size = 250
	1119 components ok, 1 need cropping
Cropping components to have max size = 250
	1120 components ok, 1 need cropping
Cropping components to have max size = 250
	1121 components ok, 1 need cropping
Cropping components to have max size = 250
	1122 components ok, 1 need cropping
Cropping components to have max size = 250
	1123 components ok, 1 need cropping
Cropping components to have max size = 250
	1124 components ok, 1 need cropping
Cropping components to have max size = 250
	1125 components ok, 1 need cropping
Cropping components to have max size = 250
	1126 components ok, 1 need cropping
Cropping components to have max size = 250
	1127 components ok, 1 need cropping
Cropping components to have max size = 250
	1128 components ok, 1 need cropping
Cropping components to have max size = 250
	1129 components ok, 1 need cropping
Cropping components to have max size = 250
	1130 components ok, 1 need cropping
Cropping components to have max size = 250
	1131 components ok, 1 need cropping
Cropping components to have max size = 250
	1132 components ok, 1 need cropping
Cropping components to have max size = 250
	1133 components ok, 1 need cropping
Cropping components to have max size = 250
	1134 components ok, 1 need cropping
Cropping components to have max size = 250
	1135 components ok, 1 need cropping
Cropping components to have max size = 250
	1136 components ok, 1 need cropping
Cropping components to have max size = 250
	1137 components ok, 1 need cropping
Cropping components to have max size = 250
	1138 components ok, 1 need cropping
Cropping components to have max size = 250
	1139 components ok, 1 need cropping
Cropping components to have max size = 250
	1140 components ok, 1 need cropping
Cropping components to have max size = 250
	1141 components ok, 1 need cropping
Cropping components to have max size = 250
	1142 components ok, 1 need cropping
Cropping components to have max size = 250
	1143 components ok, 1 need cropping
Cropping components to have max size = 250
	1144 components ok, 1 need cropping
Cropping components to have max size = 250
	1145 components ok, 1 need cropping
Cropping components to have max size = 250
	1146 components ok, 1 need cropping
Cropping components to have max size = 250
	1147 components ok, 1 need cropping
Cropping components to have max size = 250
	1148 components ok, 1 need cropping
Cropping components to have max size = 250
	1149 components ok, 1 need cropping
Cropping components to have max size = 250
	1150 components ok, 1 need cropping
Cropping components to have max size = 250
	1151 components ok, 1 need cropping
Cropping components to have max size = 250
	1152 components ok, 1 need cropping
Cropping components to have max size = 250
	1153 components ok, 1 need cropping
Cropping components to have max size = 250
	1154 components ok, 1 need cropping
Cropping components to have max size = 250
	1155 components ok, 1 need cropping
Cropping components to have max size = 250
	1156 components ok, 1 need cropping
Cropping components to have max size = 250
	1157 components ok, 1 need cropping
Cropping components to have max size = 250
	1158 components ok, 1 need cropping
Cropping components to have max size = 250
	1159 components ok, 1 need cropping
Cropping components to have max size = 250
	1160 components ok, 1 need cropping
Cropping components to have max size = 250
	1161 components ok, 1 need cropping
Cropping components to have max size = 250
	1162 components ok, 1 need cropping
Cropping components to have max size = 250
	1163 components ok, 1 need cropping
Cropping components to have max size = 250
	1164 components ok, 1 need cropping
Cropping components to have max size = 250
	1165 components ok, 1 need cropping
Cropping components to have max size = 250
	1166 components ok, 1 need cropping
Cropping components to have max size = 250
	1167 components ok, 1 need cropping
Cropping components to have max size = 250
	1168 components ok, 1 need cropping
Cropping components to have max size = 250
	1169 components ok, 1 need cropping
Cropping components to have max size = 250
	1170 components ok, 1 need cropping
Cropping components to have max size = 250
	1171 components ok, 1 need cropping
Cropping components to have max size = 250
	1172 components ok, 1 need cropping
Cropping components to have max size = 250
	1173 components ok, 1 need cropping
Cropping components to have max size = 250
	1174 components ok, 1 need cropping
Cropping components to have max size = 250
	1175 components ok, 1 need cropping
Cropping components to have max size = 250
	1176 components ok, 1 need cropping
Cropping components to have max size = 250
	1177 components ok, 1 need cropping
Cropping components to have max size = 250
	1178 components ok, 1 need cropping
Cropping components to have max size = 250
	1179 components ok, 1 need cropping
Cropping components to have max size = 250
	1180 components ok, 1 need cropping
Cropping components to have max size = 250
	1181 components ok, 1 need cropping
Cropping components to have max size = 250
	1182 components ok, 1 need cropping
Cropping components to have max size = 250
	1183 components ok, 1 need cropping
Cropping components to have max size = 250
	1184 components ok, 1 need cropping
Cropping components to have max size = 250
	1185 components ok, 1 need cropping
Cropping components to have max size = 250
	1186 components ok, 1 need cropping
Cropping components to have max size = 250
	1187 components ok, 1 need cropping
Cropping components to have max size = 250
	1188 components ok, 1 need cropping
Cropping components to have max size = 250
	1189 components ok, 1 need cropping
Cropping components to have max size = 250
	1190 components ok, 1 need cropping
Cropping components to have max size = 250
	1191 components ok, 1 need cropping
Cropping components to have max size = 250
	1192 components ok, 1 need cropping
Cropping components to have max size = 250
	1193 components ok, 1 need cropping
Cropping components to have max size = 250
	1194 components ok, 1 need cropping
Cropping components to have max size = 250
	1195 components ok, 1 need cropping
Cropping components to have max size = 250
	1196 components ok, 1 need cropping
Cropping components to have max size = 250
	1197 components ok, 1 need cropping
Cropping components to have max size = 250
	1198 components ok, 1 need cropping
Cropping components to have max size = 250
	1199 components ok, 1 need cropping
Cropping components to have max size = 250
	1200 components ok, 1 need cropping
Cropping components to have max size = 250
	1201 components ok, 1 need cropping
Cropping components to have max size = 250
	1202 components ok, 1 need cropping
Cropping components to have max size = 250
	1203 components ok, 1 need cropping
Cropping components to have max size = 250
	1204 components ok, 1 need cropping
Cropping components to have max size = 250
	1205 components ok, 1 need cropping
Cropping components to have max size = 250
	1206 components ok, 1 need cropping
Cropping components to have max size = 250
	1207 components ok, 1 need cropping
Cropping components to have max size = 250
	1208 components ok, 1 need cropping
Cropping components to have max size = 250
	1209 components ok, 1 need cropping
Cropping components to have max size = 250
	1210 components ok, 1 need cropping
Cropping components to have max size = 250
	1211 components ok, 1 need cropping
Cropping components to have max size = 250
	1212 components ok, 1 need cropping
Cropping components to have max size = 250
	1213 components ok, 1 need cropping
Cropping components to have max size = 250
	1214 components ok, 1 need cropping
Cropping components to have max size = 250
	1215 components ok, 1 need cropping
Cropping components to have max size = 250
	1216 components ok, 1 need cropping
Cropping components to have max size = 250
	1217 components ok, 1 need cropping
Cropping components to have max size = 250
	1218 components ok, 1 need cropping
Cropping components to have max size = 250
	1219 components ok, 1 need cropping
Cropping components to have max size = 250
	1220 components ok, 1 need cropping
Cropping components to have max size = 250
	1221 components ok, 1 need cropping
Cropping components to have max size = 250
	1222 components ok, 1 need cropping
Cropping components to have max size = 250
	1223 components ok, 1 need cropping
Cropping components to have max size = 250
	1224 components ok, 1 need cropping
Cropping components to have max size = 250
	1225 components ok, 1 need cropping
Cropping components to have max size = 250
	1226 components ok, 1 need cropping
Cropping components to have max size = 250
	1227 components ok, 1 need cropping
Cropping components to have max size = 250
	1228 components ok, 1 need cropping
Cropping components to have max size = 250
	1229 components ok, 1 need cropping
Cropping components to have max size = 250
	1230 components ok, 1 need cropping
Cropping components to have max size = 250
	1231 components ok, 1 need cropping
Cropping components to have max size = 250
	1232 components ok, 1 need cropping
Cropping components to have max size = 250
	1233 components ok, 1 need cropping
Cropping components to have max size = 250
	1234 components ok, 1 need cropping
Cropping components to have max size = 250
	1235 components ok, 1 need cropping
Cropping components to have max size = 250
	1236 components ok, 1 need cropping
Cropping components to have max size = 250
	1237 components ok, 1 need cropping
Cropping components to have max size = 250
	1238 components ok, 1 need cropping
Cropping components to have max size = 250
	1239 components ok, 1 need cropping
Cropping components to have max size = 250
	1240 components ok, 1 need cropping
Cropping components to have max size = 250
	1241 components ok, 1 need cropping
Cropping components to have max size = 250
	1242 components ok, 1 need cropping
Cropping components to have max size = 250
	1243 components ok, 1 need cropping
Cropping components to have max size = 250
	1244 components ok, 1 need cropping
Cropping components to have max size = 250
	1245 components ok, 1 need cropping
Cropping components to have max size = 250
	1246 components ok, 1 need cropping
Cropping components to have max size = 250
	1247 components ok, 1 need cropping
Cropping components to have max size = 250
	1248 components ok, 1 need cropping
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-f8962012b78c> in <module>()
      7 man_network = network.copy()
      8 man_network.top_k_filter(k=10)
----> 9 components = man_network.convert_to_components(max_component_size=max_component_size,min_score = min_score)

/Users/simon/git/lda/code/mol_networks.py in convert_to_components(self, min_score, max_component_size)
    165                         new_crop = []
    166                         for component in needs_cropping:
--> 167                                 new_comp = component._cut(min_score = 0.5)
    168                                 if new_comp.n_nodes() > max_component_size:
    169                                         new_crop.append(new_comp)

/Users/simon/git/lda/code/mol_networks.py in _cut(self, min_score)
    136                         del self.edge_dict[lowest_pair[0]][lowest_pair[1]]
    137                         del self.edge_dict[lowest_pair[1]][lowest_pair[0]]
--> 138                         sub_network = self._extract_component(min_score = min_score,start_node = lowest_pair[0])
    139                         # check if that's the entire network and if it is, we need to remove another edge
    140                         if len(self.edge_dict) == 0:

/Users/simon/git/lda/code/mol_networks.py in _extract_component(self, min_score, start_node)
    104                         for target,score in self.edge_dict[mol].items():
    105                                 if target in component_nodes:
--> 106                                         new_edge_dict[mol][target] = score
    107 
    108                 self._remove_nodes(component_nodes)

KeyboardInterrupt: 

In [8]:
print "There are {} components".format(len(components))
print
print
for component in components:
    print
    print component


There are 1 components



Network has 2 nodes
d (degree = 1)	(o,c)
o (degree = 1)	(d,c)


In [113]:
import csv
with open('crusemann_edges.csv','w') as f:
    writer = csv.writer(f)
    network.write_network(writer,heads = True)

In [114]:
with open('crusemann_components.csv','w') as f:
    writer = csv.writer(f)
    for i,component in enumerate(components):
        if i==0:
            heads = True
        else:
            heads = False
        component.write_network(writer,heads = heads)

In [115]:
component_sizes = {}
for component in components:
    size = component.n_nodes()
    if not size in component_sizes:
        component_sizes[size] = 1
    else:
        component_sizes[size] += 1
cs = zip(component_sizes.keys(),component_sizes.values())
cs = sorted(cs,key = lambda x: x[0])
for s,c in cs:
    print s,c
print len(components)-component_sizes[1]


1 814
2 191
3 66
4 42
5 18
6 13
7 8
8 7
9 7
10 1
11 2
12 5
13 6
14 3
15 6
16 1
17 1
18 2
19 2
20 5
21 1
23 2
24 2
25 1
26 2
27 1
28 1
29 1
30 1
32 3
33 1
34 1
35 1
38 1
43 1
49 1
51 1
53 2
61 1
70 1
76 1
104 1
188 1
249 2
250 1
418

Grab some data from the server


In [88]:
import requests
crusemann_id = 583
crusemann_mzdiff_id = 582
url = 'http://ms2lda.org/basicviz/get_all_doc_data/{}'.format(crusemann_id)
response = requests.get(url)

In [102]:
# make a dictionary of doc names to motifs
doc_motifs = {}
motif_docs = {}
for doc_name,_,motifs in response.json():
    doc_motifs[doc_name] = {}
    for motif_name,p,o in motifs:
        doc_motifs[doc_name][motif_name] = (p,o)
        if not motif_name in motif_docs:
            motif_docs[motif_name] = {}
        motif_docs[motif_name][doc_name] = (p,o)

In [118]:
print motif_docs['motif_24']
doc_set = set(motif_docs['motif_24'].keys())
print doc_set


{u'document_4619': (0.0198345629767357, 0.0816474035301373), u'document_231': (0.878934386174287, 0.0294346142772308), u'document_4575': (0.0230404701308814, 0.184359899108348), u'document_379': (0.0271346316759214, 0.0413725721216124), u'document_4180': (0.175198353809687, 0.0881036493495618), u'document_5313': (0.0222237492855581, 0.0430490175694649), u'document_4128': (0.0411489607385251, 0.0193940439650437), u'document_5335': (0.406814354163913, 0.0485022178882047), u'document_822': (0.124311025173957, 0.04145268784811), u'document_2984': (0.45645277743345, 0.232024047896673), u'document_4235': (0.269218681107128, 0.0909874174149197), u'document_4339': (0.0726479639748166, 0.233415192675697), u'document_2983': (0.402094703857634, 0.230675337718747), u'document_4258': (0.257957122177343, 0.0843458699555986), u'document_2074': (0.0447447550465354, 0.0414323307307675), u'document_2490': (0.854419774797443, 0.103779701085604), u'document_3121': (0.0289142218318227, 0.184379188362364), u'document_2173': (0.43163003186813, 0.0139332743011417), u'document_4834': (0.201530019918681, 0.230698368746054), u'document_4859': (0.44263095529538, 0.23601960920188), u'document_2393': (0.832015619456971, 0.0763294225951716), u'document_3118': (0.325997653361068, 0.246592812676274), u'document_4163': (0.634182083897895, 0.152559258029744), u'document_4623': (0.01835516522902, 0.08166180539422), u'document_4621': (0.0130129855693578, 0.0816205983092773), u'document_3119': (0.31283973322599, 0.250595424627159), u'document_4605': (0.0155542630006798, 0.0816588291900875), u'document_1326': (0.999917210444636, 0.0835224599620095), u'document_4578': (0.03869802280856, 0.249905849410097), u'document_4267': (0.187887672388057, 0.0816812570654548), u'document_4266': (0.203208090575559, 0.0852808569741126), u'document_4342': (0.0983832253527206, 0.233464582650455), u'document_4344': (0.0631731879723651, 0.184401831596287), u'document_4346': (0.441550298076893, 0.250607129901634), u'document_5732': (0.999951115743925, 0.14145770423989), u'document_5470': (0.939881528933847, 0.0247479038436384), u'document_863': (0.0886674100818688, 0.0414425323723557), u'document_4325': (0.258587713628704, 0.236469803699089), u'document_3114': (0.332460452778113, 0.25058686011876), u'document_3115': (0.425770763014282, 0.253051974799425), u'document_3116': (0.381548267294207, 0.266451017177581), u'document_3117': (0.253975591013062, 0.204303748223767), u'document_3270': (0.163621035304597, 0.184411652657246), u'document_3331': (0.0691306803730074, 0.247635498770915), u'document_3213': (0.0615970501783121, 0.246536539396883), u'document_4581': (0.0443535672775617, 0.213572914703249)}
set([u'document_4619', u'document_231', u'document_379', u'document_3116', u'document_5313', u'document_4623', u'document_5335', u'document_4342', u'document_2984', u'document_4235', u'document_4339', u'document_2983', u'document_4258', u'document_2074', u'document_2490', u'document_3121', u'document_2173', u'document_4859', u'document_2393', u'document_3331', u'document_4346', u'document_5732', u'document_4163', u'document_4128', u'document_4621', u'document_5470', u'document_4605', u'document_1326', u'document_4578', u'document_4267', u'document_4266', u'document_822', u'document_4344', u'document_4575', u'document_3118', u'document_3119', u'document_863', u'document_4325', u'document_3114', u'document_3115', u'document_4180', u'document_3117', u'document_3270', u'document_4834', u'document_3213', u'document_4581'])

In [119]:
with open('crusemann_motif_24.csv','w') as f:
    writer = csv.writer(f)
    with open('crusemann_components.csv','r') as g:
        reader = csv.reader(g)
        writer.writerow(reader.next() + ['motif_24']) # headers
        for line in reader:
            doc1 = line[0]
            doc2 = line[1]
            score = line[2]
            if doc1 in doc_set and doc2 in doc_set:
                m4 = 1
            else:
                m4 = 0
            writer.writerow([doc1,doc2,score,m4])

In [ ]: