In [21]:
import networkx as nx
In [22]:
In [22]:
def to_my_int(node_name):
node_name = node_name[:-1]
node_name = "".join(node_name.split('_'))
node_name = int(node_name)
return node_name
In [23]:
def print_nodes(nodes_list):
print(','.join([str(node) for node in nodes_list]))
In [24]:
def to_single_format(nodes_list):
return set([int(node[:-1]) for node in nodes_list])
def to_double_format(nodes_list):
nodes_1 = [str(node) + "+" for node in nodes_list]
nodes_2 = [str(node) + "-" for node in nodes_list]
return nodes_1 + nodes_2
In [25]:
def rev(node_name):
direction = node_name[-1]
if direction == "-":
rev_direction = "+"
else:
rev_direction = "-"
return node_name[:-1] + rev_direction
In [75]:
G = nx.DiGraph()
with open("/home/margarita/AU/meta_strains_project/final_algo/data/g5_r1/prelim_graph.gfa") as f:
for line in f:
line = line.split()
line_type = line[0]
# S 238024 ACCAATTAT KC:i:37210
if line_type == "S":
v_name = line[1]
v_length = len(line[2])
G.add_node(v_name + "+", length=v_length)
G.add_node(v_name + "-", length=v_length)
# L 238322 + 19590 - 55M
if line_type == "L":
v1 = line[1] + line[2]
v2 = line[3] + line[4]
G.add_edge(v1, v2)
G.add_edge(rev(v2), rev(v1))
print('Number of components:', nx.number_weakly_connected_components(G))
# remain only largest component
new_G = nx.DiGraph()
for g in nx.weakly_connected_component_subgraphs(G):
if new_G.number_of_nodes() < g.number_of_nodes():
new_G = g.copy()
G = new_G.copy()
In [76]:
self_loops = []
for node in G.nodes:
if node in list(nx.all_neighbors(G, node)):
self_loops.append(node)
print_nodes([node[:-1] for node in self_loops])
In [77]:
deleted_tips = []
tips = [node for node in G.nodes if (G.in_degree(node) == 0
and nx.get_node_attributes(G,'length')[node] <= 500)]
deleted_tips.extend(tips)
while len(tips) > 0:
G.remove_nodes_from(tips)
tips = [node for node in G.nodes if (G.in_degree(node) == 0
and nx.get_node_attributes(G,'length')[node] <= 500)]
deleted_tips.extend(tips)
deleted_tips = set([node for node in deleted_tips])
print_nodes([node for node in deleted_tips])
In [ ]: