In [1]:
import os
import sys
from variantannotation import annotate_batch
from variantannotation import myvariant_parsing_utils
from variantannotation import mongo_DB_export
from variantannotation import utilities
In [ ]:
filepath = ".../CSV to be tested"
csv_file = "test_file.csv"
vcf_file = "test_file_.vcf"
os.chdir(filepath)
ANNOVAR_PATH = '/database/annovar/'
IN_PATH = '.../file.vcf'
OUT_PATH = '.../annovar_results'
In [ ]:
#Run only if ANNOVAR is properly installed and databases in memory
utilities.run_annovar(ANNOVAR_PATH, IN_PATH, OUT_PATH)
As soon as you run the scripts from variantannotaiton the data will automatically be stored to it. Database and collection name should be specified, and there must be a running MongoDB connection. The script will set up a client to communicate between python (through pymongo) and the the database.
In general, the shell command:
mongod --dbpath ../data/db
(data/db is the designated location where the data will be stored) will initiate MongoDB. After this, the script should store data to the directory automatically.
For pymongo, and more information on how to set up a Mongo Database: https://docs.mongodb.com/getting-started/python/
In [5]:
chunksize = 1000
step = 0
#Get variant list. Should always be the first step after running ANNOVAR
open_file = myvariant_parsing_utils.VariantParsing()
list_file = open_file.get_variants_from_vcf(vcf_file)
#Name Collection & DB
collection_name = 'ANNOVAR_MyVariant_chunks'
db_name = 'My_Variant_Database'
#Run process, and export (export happens every time 1000 variants are processed and joined)
as_batch = annotate_batch.AnnotationMethods()
as_batch.by_chunks(list_file, chunksize, step, csv_file, collection_name, db_name)
Out[5]:
In [6]:
#get variant list. Should always be the first step after running ANNOVAR
open_file = myvariant_parsing_utils.VariantParsing()
list_file = open_file.get_variants_from_vcf(vcf_file)
#Run process, data saved to joint_list
as_one_file = annotate_batch.AnnotationMethods()
joint_list = as_one_file.full_file(list_file, csv_file)
#Name Collection & DB
collection_name = 'ANNOVAR_MyVariant_full'
db_name = 'My_Variant_Database'
#Export, all at once
exporting_function = mongo_DB_export.export
exporting_function(joint_list, collection_name, db_name)
In [7]:
#Get variant list form vcf file
open_file = myvariant_parsing_utils.VariantParsing()
list_file = open_file.get_variants_from_vcf(vcf_file)
#Run process
my_variants = annotate_batch.AnnotationMethods()
myvariant_data = my_variants.my_variant_at_once(list_file)
#Name Collection & DB
collection_name = 'My_Variant_Info_Collection_Full'
db_name = 'My_Variant_Database'
#Export
exporting_function = mongo_DB_export.export
exporting_function(myvariant_data, collection_name, db_name)
In [8]:
chunksize = 1000
step = 0
#Get variant list from vcf file
open_file = myvariant_parsing_utils.VariantParsing()
list_file = open_file.get_variants_from_vcf(vcf_file)
#Name Collection & DB
collection_name = 'My_Variant_Info_Collection_Chunks'
db_name = 'My_Variant_Database'
#Run process, export to MongoDB in-built
my_variants = annotate_batch.AnnotationMethods()
myvariant_data = my_variants.myvariant_chunks(list_file, chunksize, step, collection_name, db_name)