In [1]:
from goetia.signatures import SourmashSignature
from goetia.parsing import FastxParser
from goetia.alphabets import DNAN_SIMPLE
from goetia import libgoetia
import cppyy
import cppyy.ll
from sourmash._lowlevel import ffi, lib
In [2]:
def bridge(num, ksize, is_protein=False, dayhoff=False, hp=False, seed=42, max_hash=0):
from sourmash import MinHash
from sourmash._lowlevel import ffi
# create goetia-side , cppyy-managed KmerMinHash
bk_sig = SourmashSignature.Signature.build(num,
ksize,
is_protein,
dayhoff,
hp,
seed,
max_hash)
sig = MinHash._from_objptr(ffi.cast('KmerMinHash *', cppyy.ll.addressof(bk_sig._get_ptr())))
sig._goetia_ref = bk_sig
return sig
In [3]:
sig = bridge(500, 21)
In [4]:
processor = SourmashSignature.Processor.build(sig._goetia_ref)
In [5]:
parser = FastxParser[DNAN_SIMPLE].build('../examples/ecoli.1.fastq.gz')
for n_reads, state in processor.chunked_process(parser):
if state.medium:
print('Processed', n_reads, 'sequences.')
Processed 100000 sequences.
Processed 200000 sequences.
Processed 300000 sequences.
Processed 400000 sequences.
Processed 500000 sequences.
Processed 600000 sequences.
Processed 700000 sequences.
Processed 800000 sequences.
Processed 900000 sequences.
Processed 1000000 sequences.
Processed 1100000 sequences.
Processed 1200000 sequences.
Processed 1300000 sequences.
Processed 1400000 sequences.
Processed 1500000 sequences.
Processed 1600000 sequences.
Processed 1700000 sequences.
Processed 1800000 sequences.
In [7]:
sig.get_hashes()
Out[7]:
[3205141980413,
3236092822679,
3703694776023,
6514066417504,
11713594549730,
15214235558162,
17551373616191,
21150285087017,
23555715885782,
23587874902366,
25771393241063,
27362219888026,
28337268266212,
29520094084160,
32603191963564,
33355113951698,
34635727113501,
36460262049084,
42387633352177,
43341805525762,
44060762338450,
45971515261246,
54267487195332,
54928807149369,
57980449935288,
61762038119671,
62764922061356,
63144967958311,
64532943199117,
68707543473620,
72019825479352,
72830525677828,
73076489911828,
80389167076728,
86709255682763,
89217825112117,
94292632250736,
96823843735004,
98055163335582,
100178539101135,
100743995926159,
102616779995824,
104113874982914,
104177745548946,
105077740230033,
106776035873039,
107138115093535,
109925963465487,
112565798999620,
113269032034961,
113769038976489,
116289689660513,
116589162149174,
116925212033615,
117793242025942,
124408420690137,
126141660684959,
128463218921037,
129269525023962,
131359518570667,
133535365812254,
141197018150342,
143323663918981,
143345725674503,
148554695608225,
150497667828147,
156200380958478,
159311917533494,
159377446980764,
160953224652819,
163259242334820,
164907064434098,
167043047663236,
168562548773139,
169624182697529,
171696315934934,
174448487999172,
175896512464017,
178066284855131,
185135589226004,
186037883513144,
190599583623642,
191598411457504,
191796325080644,
195539820147899,
196066579242383,
198176128092587,
201265791500373,
202660465829744,
204848266864272,
206236771068477,
206930517751655,
207152302967130,
219127232708164,
219664389204268,
222298563998768,
224407513972383,
233296489277561,
234488146968831,
239945629602519,
241463376410948,
245718427631749,
246554804629051,
251532409189057,
252789454998736,
256163797083700,
257814287504831,
259088076671028,
260247065310174,
262297732220758,
263511612897560,
264950323314684,
266297663386490,
266747475753425,
268748182387402,
273169705301671,
275037120221046,
275533144384068,
276550692192199,
278740376483011,
279975244530514,
282631119565930,
289412741225997,
291938081838213,
293968050377279,
294053482644774,
295102052892922,
298150060019612,
299021510237758,
299076956740152,
299928575343003,
302140257136880,
302608756655792,
309739075521586,
311935564800556,
312258088334861,
313805160671801,
314449369471731,
314668769989362,
318427908817011,
320800131693944,
324019769911377,
324230192022775,
327506758448950,
330633050071627,
333559616386501,
333578810836565,
334815632608891,
337941786314434,
338179046030787,
339034220533139,
343034588177493,
343647125950017,
344614911667350,
346304608688862,
347197665990378,
353351852845900,
355068611980657,
358505080983595,
360289253603847,
362836201801361,
363600372848562,
365593558798478,
367745128085240,
368915873239401,
369171159957169,
371819092004421,
376057577697219,
377374743504565,
377510398200963,
380476869989738,
382644013236755,
386621431482715,
387132813395206,
390993437097698,
398701726680669,
405896853301818,
411750154928339,
411954089848651,
413913093469527,
417183285987744,
418142356576564,
418809648539744,
419966752631691,
420513773821979,
421984488183114,
424077093440950,
427807967849872,
431689642481401,
435107516686782,
436440873577812,
438382781212187,
438854574541963,
442379340315192,
443275948871001,
445237108469565,
448223986562665,
449954328588135,
453670957441629,
456979697855880,
457672164093786,
468082959264345,
468864379056837,
471959923966825,
471980910724736,
472429909944599,
474125832709421,
481883942840175,
483348184811890,
488767829120351,
491199959981751,
492623359996625,
496814813028226,
503172701324924,
504842659335720,
505868155534739,
507915443189932,
517330267815754,
518593397944982,
521630902714921,
524560311807028,
529925224871035,
532224739114182,
539073760627909,
544702565975360,
545034796114697,
545138079913215,
545847248881234,
548308538650093,
550353384378143,
554127215600247,
558330427857180,
558735877332876,
558987764977344,
559120244606289,
560801168910368,
561066383177590,
562607240181395,
568000522973802,
571241623320514,
573945976256947,
577022782136385,
585844926888365,
587051242722034,
587510003855352,
588381446639777,
590514779151956,
596695358152535,
597514614862891,
600045732091067,
606127695939638,
606392454750462,
607883184243225,
608678462596380,
611156861902846,
612022897882620,
612498938365174,
616379210356540,
618575223361984,
624463011933123,
629452585573989,
631127619358482,
632723652775754,
635136383123691,
636625391324385,
636729285540945,
638282327722268,
640450884391901,
641497577649551,
646501329465509,
649936829809338,
652324546143338,
662293556334996,
665020402838263,
666962464978635,
668892957381456,
670478382943540,
673908089802275,
676172650048594,
680389347622509,
680518485888724,
682622198819436,
686313471895371,
690623632202753,
692569216538315,
693994837834663,
696766285662917,
700059310672934,
703469720615540,
704993095450483,
706599535253560,
713348354559104,
714286805523134,
716601596337949,
717718474299749,
719942821921135,
720705997422760,
720755799589889,
723541483862397,
730627061744034,
732902525224878,
740607477961099,
741338318683858,
742449454393303,
744465728949077,
748493633127713,
750864218690445,
751099803827720,
753153853702987,
754935911314939,
760199217039095,
762700557485135,
768617718185904,
768911401584106,
769954764926391,
771166874256243,
772022488682750,
775026539738299,
775825801107746,
780277024378283,
780927423520861,
782651869345516,
785648654295272,
785842719842079,
790853979660393,
796516093461366,
798868643812160,
804804358917752,
805332733234351,
806065740141997,
806540983549507,
814171191501980,
816017492775658,
816503841678163,
818088105703173,
819228047714963,
822342248772339,
823416313395439,
826565322720406,
832385482837616,
849299192264220,
857278455817910,
861763781830808,
862976180790423,
864132194105303,
865206260926776,
875134964763787,
875634003469814,
876228623607351,
876369363914390,
877463201362698,
877591728721152,
885105075142743,
886507963472541,
889716034582361,
891472737961131,
892684572200909,
893070792300812,
895399681539329,
897011042032045,
898355044186622,
901931471834337,
904388964458649,
905107234139642,
907310714922475,
909557781804548,
910260459544594,
915177206215178,
921915425077441,
922005921377760,
923446557520329,
923547613745274,
924120950251599,
924337930096207,
924694737817066,
930469700962645,
931049396265524,
932095532759875,
935307784828332,
944610797262077,
949779878662363,
950790026596952,
954033844040890,
956777688150257,
956794137753897,
957664219980442,
965770318045625,
969727203646291,
972326515385306,
974078104867189,
974096972791927,
976819860882976,
978797577436361,
978850611402558,
979847056089397,
982466293871076,
986418271184453,
987105269848049,
987890720037699,
988438618333714,
991336039265177,
993865794987670,
995696411194344,
1000793834431881,
1004403388656351,
1010691614882200,
1012693995966716,
1014488286180932,
1018355523411192,
1021639750908720,
1021838098774831,
1024116778875076,
1025475811135389,
1026278892352268,
1027375444638649,
1030863354604216,
1032677078924633,
1034672844085632,
1034854861581131,
1035348009397075,
1039026374005855,
1040906481516572,
1041944326585600,
1043888546838874,
1050381270791945,
1051896060691813,
1052015300730222,
1052289371649832,
1052577627164936,
1053649852479242,
1054172634769122,
1056399742412383,
1056815373995971,
1062265454900130,
1062640232478052,
1072860534836871,
1073290951532423,
1074593646588826,
1075365629697532,
1078933252500127,
1085464144855401,
1086860360966183,
1089910217236668,
1093706599324609,
1095072345551621,
1099216006225077,
1099512157992528,
1104080838638807,
1108166494080021,
1110463687231669,
1111983466948421,
1113547510077440,
1122474689470832,
1124632343461454,
1125038647685684,
1130056091934672,
1130787738375691,
1133127975259728,
1133560680778039,
1134161017902675,
1136441185432963,
1138218115835429,
1139436710861227,
1140012540019696,
1145125463956471,
1147610223223647,
1150373777852198,
1150488693870878,
1153526756527497,
1154625347894374,
1156519831795465,
1156577842927566,
1156936765056096,
1158716490930212,
1159121476698136,
1164172811228611,
1168824230275516,
1171002909830033,
1171605363066337,
1171980783285280,
1172164637492226,
1173684745316974,
1175050420824570,
1175397545550648,
1177183468310475,
1178572981144597,
1179786421592233,
1184653719138484,
1187867221021824,
1188810923981199,
1190733677603405,
1193130973153197,
1193194748379973,
1199755153304757,
1202707226789805,
1202720293649839,
1204751753802167,
1205148966280707,
1206109020850657,
1206628672783665]
In [ ]:
Content source: camillescott/boink
Similar notebooks: