In [1]:
%pylab inline
from IPython.display import HTML
from pypdb.pypdb import *
import pprint
In [2]:
# Search for PDB IDs related to CRISPR
crispr_query = make_query('crispr')
crispr_results = do_search(crispr_query)
# Run BLAST on the top result
top_result = crispr_results[0]
blast_hits = get_blast2(top_result)
for item in blast_hits[0]:
pdbdesc = describe_pdb(item)
print(pdbdesc['title'])
In [3]:
# Choose a random sample because we don't want to call the database for every single entry
from random import choice
all_pdbs = get_all()
all_dates = list()
for ii in range(100):
pdb_desc = describe_pdb( choice(all_pdbs) )
depdate = (pdb_desc['deposition_date'])
all_dates.append( int(depdate[:4]) )
all_dates = array(all_dates)
figure()
subs_v_time = hist(all_dates, max(all_dates)-min(all_dates))
show(subs_v_time)
# Show power-law scaling
figure()
subs_v_time_loglog = loglog(subs_v_time[0],'.')
show(subs_v_time_loglog)
In [5]:
# Perform search
all_dates = find_dates('crispr', max_results=500)
all_dates = array(all_dates)
all_dates = array([int(depdate[:4]) for depdate in all_dates])
subs_v_time = histogram(all_dates, max(all_dates)-min(all_dates))
dates, num_entries = subs_v_time[1][1:], subs_v_time[0]
popgraph = fill_between(dates, 0, num_entries)
# Formatting the plots
xlim([dates[0], dates[-1]] )
gca().xaxis.set_major_formatter(FormatStrFormatter('%d'))
xticks(fontweight='bold')
yticks(fontweight='bold')
xlabel('Year',fontweight='bold')
ylabel('New PDB entries',fontweight='bold')
show(popgraph)
In [15]:
point_group = 'C1'
max_distance = 5.0
npts = 20
dist_vals = linspace(0.0, max_distance, npts)
dx = dist_vals[1]-dist_vals[0]
all_ids = []
for dist_val in dist_vals:
idlist = do_protsym_search(point_group, min_rmsd=dist_val, max_rmsd=(dist_val+dx))
all_ids.append(idlist)
counts = array([len(item) for item in all_ids])
show(semilogy(dist_vals, counts))
title('Total results versus RMSD')
xlabel('Radius (A)')
ylabel('Number of results')
Out[15]:
In [56]:
# Search for PDB IDs related to CRISPR
crispr_query = make_query('swim')
crispr_results = do_search(crispr_query)
# Run BLAST on the top result
top_result = crispr_results[0]
blast_hits = get_blast2(top_result)
# Print list of associated taxa
pprint.pprint(list_taxa(blast_hits[0][:5]))
pprint.pprint(list_types(blast_hits[0][:5]))
In [ ]: