In [1]:
import bacteriopop_utils
import feature_selection_utils
import load_data
import dynamic_mode_decomposition as dmd
import network_construction as net
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [2]:
loaded_data = data = load_data.load_data()
In [3]:
loaded_data.shape
Out[3]:
In [4]:
loaded_data.head()
Out[4]:
In [5]:
extracted_features = bacteriopop_utils.extract_features(
dataframe = loaded_data,
column_list = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'oxygen', 'abundance']
# default list was: ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'length', 'abundance', 'project']
)
In [6]:
extracted_features.head()
Out[6]:
In [7]:
extracted_features.shape
Out[7]:
In [8]:
pca_results = feature_selection_utils.pca_bacteria(
data = extracted_features.head(100), n_components = 10)
In [9]:
pca_results.components_
Out[9]:
Do correlations for a tiny subset of the data.
In [10]:
feature_selection_utils.calculate_features_target_correlation(
data = extracted_features.head(100),
features = extracted_features.columns.tolist(),
target='abundance',
method="Pearson")
Out[10]:
In [11]:
mappings, nodes_list = dmd.find_fixed_adjacency_matrix(0,'order',False)
In [12]:
for key in mappings.keys():
Adj = mappings[key]
nodes = nodes_list[key]
# g = net.create_one_graph(Adj,nodes,edge_treshhold=1e-10)
Note that "mappings" is called "linear mappings" in the code.
In [13]:
#Calculation parameters:
p = 1 # percent abundance to consider significant.
adjacency_magnitude = 1.5 # signal to trim by before plotting
In [14]:
# Only look for bacteria who are p% of the population in at least 1 sample.
mappings, nodes = dmd.find_fixed_adjacency_matrix(p/100,'order',True)
In [15]:
# Only pull out species that have interactions with another member with magnitude greater than 1.5
mappings, nodes = net.reduce_all_adjacency_matrixes_in_dict(mappings, nodes,
adjacency_magnitude)
In [16]:
# Convert all of the dataframes to Pandas
mappings = dmd.DMD_results_dict_from_numpy_to_pandas(mappings,nodes)
# aggregate_adjacency_matrix_over_replicates is depreciated!
# std_mappings, avg_mappings, snr_mappings = dmd.aggregate_adjacency_matrix_over_replicates(mappings)
In [17]:
mappings.keys()
Out[17]:
In [18]:
k = 1
plt.figure(k)
for i in range(1,5):
rep=('Low',i)
net.plot_heatmap(mappings[rep],
('Low'+ ' Oxygen Replicate '+str(i)+': Most Significant Interactions'),
'./plots/'+'Low'+str(i),
file_type='.png',
width=14, height=10)
k += 1
plt.figure(k)
In [19]:
k = 1
plt.figure(k)
for i in range(1,5):
rep=('High',i)
net.plot_heatmap(mappings[rep],
('High'+ ' Oxygen Replicate '+str(i)+': Most Significant Interactions'),
'./plots/'+'High'+str(i),
file_type='.png',
width=14, height=10)
k += 1
plt.figure(k)
Find the standard deviation and mean of the replicates adjacency matrix:
In [20]:
# mappings, nodes
low_agg = net.aggregate_adjacency_matrices([
mappings[('Low', 1)],
mappings[('Low', 2)],
mappings[('Low', 3)],
mappings[('Low', 4)]
])
In [21]:
low_agg.keys()
Out[21]:
In [22]:
p_low_mean = net.plot_heatmap(low_agg['mean'],
'Low Oxygen: Most Significant Interactions',
'./plots/poster/low_avg--FROM_PANELS',
file_type='.png',
width=14, height=10)
In [23]:
p_low_std = net.plot_heatmap(low_agg['standard deviation'],
'Low Oxygen: Most Significant Interactions',
'./plots/poster/low_avg--FROM_PANELS',
file_type='.png',
width=14, height=10)
In [28]:
p_low_snr = net.plot_heatmap(low_agg['signal to noise'],
'Low Oxygen: Most Significant Interactions',
'./plots/poster/low_avg--FROM_PANELS',
file_type='.png',
width=14, height=10)
Find the standard deviation and mean of the replicates adjacency matrix:
In [24]:
# mappings, nodes
high_agg = net.aggregate_adjacency_matrices([
mappings[('High', 1)],
mappings[('High', 2)],
mappings[('High', 3)],
mappings[('High', 4)]
])
In [25]:
high_agg.keys()
Out[25]:
In [26]:
p_high_mean = net.plot_heatmap(high_agg['mean'],
'High Oxygen: Most Significant Interactions',
'./plots/poster/high_avg--FROM_PANELS',
file_type='.png',
width=14, height=10)
In [27]:
p_high_std = net.plot_heatmap(high_agg['standard deviation'],
'High Oxygen: Most Significant Interactions',
'./plots/poster/high_avg--FROM_PANELS',
file_type='.png',
width=14, height=10)
In [29]:
p_high_snr = net.plot_heatmap(high_agg['signal to noise'],
'High Oxygen: Most Significant Interactions',
'./plots/poster/high_avg--FROM_PANELS',
file_type='.png',
width=14, height=10)
In [ ]: