In [1]:
from goetia.signatures import SourmashSignature
from goetia.parsing import FastxParser
from goetia.alphabets import DNAN_SIMPLE
from goetia import libgoetia
import cppyy
import cppyy.ll
from sourmash._lowlevel import ffi, lib

In [2]:
def bridge(num, ksize, is_protein=False, dayhoff=False, hp=False, seed=42, max_hash=0):
    from sourmash import MinHash
    from sourmash._lowlevel import ffi
    
    # create goetia-side , cppyy-managed KmerMinHash
    bk_sig = SourmashSignature.Signature.build(num,
                                             ksize,
                                             is_protein,
                                             dayhoff,
                                             hp,
                                             seed,
                                             max_hash)
    
    sig = MinHash._from_objptr(ffi.cast('KmerMinHash *', cppyy.ll.addressof(bk_sig._get_ptr())))
    sig._goetia_ref = bk_sig
    
    return sig

In [3]:
sig = bridge(500, 21)

In [4]:
processor = SourmashSignature.Processor.build(sig._goetia_ref)

In [5]:
parser = FastxParser[DNAN_SIMPLE].build('../examples/ecoli.1.fastq.gz')
for n_reads, state in processor.chunked_process(parser):
    if state.medium:
        print('Processed', n_reads, 'sequences.')


Processed 100000 sequences.
Processed 200000 sequences.
Processed 300000 sequences.
Processed 400000 sequences.
Processed 500000 sequences.
Processed 600000 sequences.
Processed 700000 sequences.
Processed 800000 sequences.
Processed 900000 sequences.
Processed 1000000 sequences.
Processed 1100000 sequences.
Processed 1200000 sequences.
Processed 1300000 sequences.
Processed 1400000 sequences.
Processed 1500000 sequences.
Processed 1600000 sequences.
Processed 1700000 sequences.
Processed 1800000 sequences.

In [7]:
sig.get_hashes()


Out[7]:
[3205141980413,
 3236092822679,
 3703694776023,
 6514066417504,
 11713594549730,
 15214235558162,
 17551373616191,
 21150285087017,
 23555715885782,
 23587874902366,
 25771393241063,
 27362219888026,
 28337268266212,
 29520094084160,
 32603191963564,
 33355113951698,
 34635727113501,
 36460262049084,
 42387633352177,
 43341805525762,
 44060762338450,
 45971515261246,
 54267487195332,
 54928807149369,
 57980449935288,
 61762038119671,
 62764922061356,
 63144967958311,
 64532943199117,
 68707543473620,
 72019825479352,
 72830525677828,
 73076489911828,
 80389167076728,
 86709255682763,
 89217825112117,
 94292632250736,
 96823843735004,
 98055163335582,
 100178539101135,
 100743995926159,
 102616779995824,
 104113874982914,
 104177745548946,
 105077740230033,
 106776035873039,
 107138115093535,
 109925963465487,
 112565798999620,
 113269032034961,
 113769038976489,
 116289689660513,
 116589162149174,
 116925212033615,
 117793242025942,
 124408420690137,
 126141660684959,
 128463218921037,
 129269525023962,
 131359518570667,
 133535365812254,
 141197018150342,
 143323663918981,
 143345725674503,
 148554695608225,
 150497667828147,
 156200380958478,
 159311917533494,
 159377446980764,
 160953224652819,
 163259242334820,
 164907064434098,
 167043047663236,
 168562548773139,
 169624182697529,
 171696315934934,
 174448487999172,
 175896512464017,
 178066284855131,
 185135589226004,
 186037883513144,
 190599583623642,
 191598411457504,
 191796325080644,
 195539820147899,
 196066579242383,
 198176128092587,
 201265791500373,
 202660465829744,
 204848266864272,
 206236771068477,
 206930517751655,
 207152302967130,
 219127232708164,
 219664389204268,
 222298563998768,
 224407513972383,
 233296489277561,
 234488146968831,
 239945629602519,
 241463376410948,
 245718427631749,
 246554804629051,
 251532409189057,
 252789454998736,
 256163797083700,
 257814287504831,
 259088076671028,
 260247065310174,
 262297732220758,
 263511612897560,
 264950323314684,
 266297663386490,
 266747475753425,
 268748182387402,
 273169705301671,
 275037120221046,
 275533144384068,
 276550692192199,
 278740376483011,
 279975244530514,
 282631119565930,
 289412741225997,
 291938081838213,
 293968050377279,
 294053482644774,
 295102052892922,
 298150060019612,
 299021510237758,
 299076956740152,
 299928575343003,
 302140257136880,
 302608756655792,
 309739075521586,
 311935564800556,
 312258088334861,
 313805160671801,
 314449369471731,
 314668769989362,
 318427908817011,
 320800131693944,
 324019769911377,
 324230192022775,
 327506758448950,
 330633050071627,
 333559616386501,
 333578810836565,
 334815632608891,
 337941786314434,
 338179046030787,
 339034220533139,
 343034588177493,
 343647125950017,
 344614911667350,
 346304608688862,
 347197665990378,
 353351852845900,
 355068611980657,
 358505080983595,
 360289253603847,
 362836201801361,
 363600372848562,
 365593558798478,
 367745128085240,
 368915873239401,
 369171159957169,
 371819092004421,
 376057577697219,
 377374743504565,
 377510398200963,
 380476869989738,
 382644013236755,
 386621431482715,
 387132813395206,
 390993437097698,
 398701726680669,
 405896853301818,
 411750154928339,
 411954089848651,
 413913093469527,
 417183285987744,
 418142356576564,
 418809648539744,
 419966752631691,
 420513773821979,
 421984488183114,
 424077093440950,
 427807967849872,
 431689642481401,
 435107516686782,
 436440873577812,
 438382781212187,
 438854574541963,
 442379340315192,
 443275948871001,
 445237108469565,
 448223986562665,
 449954328588135,
 453670957441629,
 456979697855880,
 457672164093786,
 468082959264345,
 468864379056837,
 471959923966825,
 471980910724736,
 472429909944599,
 474125832709421,
 481883942840175,
 483348184811890,
 488767829120351,
 491199959981751,
 492623359996625,
 496814813028226,
 503172701324924,
 504842659335720,
 505868155534739,
 507915443189932,
 517330267815754,
 518593397944982,
 521630902714921,
 524560311807028,
 529925224871035,
 532224739114182,
 539073760627909,
 544702565975360,
 545034796114697,
 545138079913215,
 545847248881234,
 548308538650093,
 550353384378143,
 554127215600247,
 558330427857180,
 558735877332876,
 558987764977344,
 559120244606289,
 560801168910368,
 561066383177590,
 562607240181395,
 568000522973802,
 571241623320514,
 573945976256947,
 577022782136385,
 585844926888365,
 587051242722034,
 587510003855352,
 588381446639777,
 590514779151956,
 596695358152535,
 597514614862891,
 600045732091067,
 606127695939638,
 606392454750462,
 607883184243225,
 608678462596380,
 611156861902846,
 612022897882620,
 612498938365174,
 616379210356540,
 618575223361984,
 624463011933123,
 629452585573989,
 631127619358482,
 632723652775754,
 635136383123691,
 636625391324385,
 636729285540945,
 638282327722268,
 640450884391901,
 641497577649551,
 646501329465509,
 649936829809338,
 652324546143338,
 662293556334996,
 665020402838263,
 666962464978635,
 668892957381456,
 670478382943540,
 673908089802275,
 676172650048594,
 680389347622509,
 680518485888724,
 682622198819436,
 686313471895371,
 690623632202753,
 692569216538315,
 693994837834663,
 696766285662917,
 700059310672934,
 703469720615540,
 704993095450483,
 706599535253560,
 713348354559104,
 714286805523134,
 716601596337949,
 717718474299749,
 719942821921135,
 720705997422760,
 720755799589889,
 723541483862397,
 730627061744034,
 732902525224878,
 740607477961099,
 741338318683858,
 742449454393303,
 744465728949077,
 748493633127713,
 750864218690445,
 751099803827720,
 753153853702987,
 754935911314939,
 760199217039095,
 762700557485135,
 768617718185904,
 768911401584106,
 769954764926391,
 771166874256243,
 772022488682750,
 775026539738299,
 775825801107746,
 780277024378283,
 780927423520861,
 782651869345516,
 785648654295272,
 785842719842079,
 790853979660393,
 796516093461366,
 798868643812160,
 804804358917752,
 805332733234351,
 806065740141997,
 806540983549507,
 814171191501980,
 816017492775658,
 816503841678163,
 818088105703173,
 819228047714963,
 822342248772339,
 823416313395439,
 826565322720406,
 832385482837616,
 849299192264220,
 857278455817910,
 861763781830808,
 862976180790423,
 864132194105303,
 865206260926776,
 875134964763787,
 875634003469814,
 876228623607351,
 876369363914390,
 877463201362698,
 877591728721152,
 885105075142743,
 886507963472541,
 889716034582361,
 891472737961131,
 892684572200909,
 893070792300812,
 895399681539329,
 897011042032045,
 898355044186622,
 901931471834337,
 904388964458649,
 905107234139642,
 907310714922475,
 909557781804548,
 910260459544594,
 915177206215178,
 921915425077441,
 922005921377760,
 923446557520329,
 923547613745274,
 924120950251599,
 924337930096207,
 924694737817066,
 930469700962645,
 931049396265524,
 932095532759875,
 935307784828332,
 944610797262077,
 949779878662363,
 950790026596952,
 954033844040890,
 956777688150257,
 956794137753897,
 957664219980442,
 965770318045625,
 969727203646291,
 972326515385306,
 974078104867189,
 974096972791927,
 976819860882976,
 978797577436361,
 978850611402558,
 979847056089397,
 982466293871076,
 986418271184453,
 987105269848049,
 987890720037699,
 988438618333714,
 991336039265177,
 993865794987670,
 995696411194344,
 1000793834431881,
 1004403388656351,
 1010691614882200,
 1012693995966716,
 1014488286180932,
 1018355523411192,
 1021639750908720,
 1021838098774831,
 1024116778875076,
 1025475811135389,
 1026278892352268,
 1027375444638649,
 1030863354604216,
 1032677078924633,
 1034672844085632,
 1034854861581131,
 1035348009397075,
 1039026374005855,
 1040906481516572,
 1041944326585600,
 1043888546838874,
 1050381270791945,
 1051896060691813,
 1052015300730222,
 1052289371649832,
 1052577627164936,
 1053649852479242,
 1054172634769122,
 1056399742412383,
 1056815373995971,
 1062265454900130,
 1062640232478052,
 1072860534836871,
 1073290951532423,
 1074593646588826,
 1075365629697532,
 1078933252500127,
 1085464144855401,
 1086860360966183,
 1089910217236668,
 1093706599324609,
 1095072345551621,
 1099216006225077,
 1099512157992528,
 1104080838638807,
 1108166494080021,
 1110463687231669,
 1111983466948421,
 1113547510077440,
 1122474689470832,
 1124632343461454,
 1125038647685684,
 1130056091934672,
 1130787738375691,
 1133127975259728,
 1133560680778039,
 1134161017902675,
 1136441185432963,
 1138218115835429,
 1139436710861227,
 1140012540019696,
 1145125463956471,
 1147610223223647,
 1150373777852198,
 1150488693870878,
 1153526756527497,
 1154625347894374,
 1156519831795465,
 1156577842927566,
 1156936765056096,
 1158716490930212,
 1159121476698136,
 1164172811228611,
 1168824230275516,
 1171002909830033,
 1171605363066337,
 1171980783285280,
 1172164637492226,
 1173684745316974,
 1175050420824570,
 1175397545550648,
 1177183468310475,
 1178572981144597,
 1179786421592233,
 1184653719138484,
 1187867221021824,
 1188810923981199,
 1190733677603405,
 1193130973153197,
 1193194748379973,
 1199755153304757,
 1202707226789805,
 1202720293649839,
 1204751753802167,
 1205148966280707,
 1206109020850657,
 1206628672783665]

In [ ]: