In [1]:
import pandas as pd
import numpy as np
from path import Path # pip install --user path.py
import re
from IPython.display import display
from pprint import pprint
import netCDF4
from IPython.core.debugger import Pdb
from collections import namedtuple
In [2]:
TEST_RESULT_DIRECTORIES = Path("/home/shibbiry/Dropbox/documents/msu/bachelors_thesis_cluster_topology/test_results") \
.dirs()
In [4]:
display(TEST_RESULT_DIRECTORIES)
In [5]:
def read_benchmark_hostnames(path_to_file):
lines = path_to_file.lines()
return (re.match(r"^(n\d{5})\.", line).groups()[0] for line in lines)
In [6]:
TestResults = namedtuple("TestResults", ["hostnames", "medians", "msg_lengths"])
In [7]:
def import_data(directory):
hostnames = tuple(read_benchmark_hostnames(directory.joinpath("network_hosts.txt")))
with netCDF4.Dataset(directory.joinpath("network_median.nc"), "r") as dataset:
step_len = dataset["step_length"][0]
start_len = dataset["begin_mes_length"][0]
end_len = dataset["end_mes_length"][0]
assert len(hostnames) == dataset["proc_num"][0]
assert dataset["test_type"][0] == 1
assert start_len == 0
assert end_len == 10000 # last message length should be 9900
assert step_len == 100
steps = (end_len - start_len) // step_len - 1
assert start_len + (steps + 1) * step_len == end_len
lengths = range(start_len, end_len, step_len)
data = {
"message_len_{0}".format(length): \
pd.DataFrame(dataset["data"][index], index=hostnames, columns=hostnames)
for (index, length) in enumerate(lengths)
}
panel = pd.Panel(data)
return TestResults(hostnames=hostnames, medians=panel, msg_lengths=list(lengths))
In [8]:
def uniques_in_matrix(matrix):
return frozenset(matrix[col].loc[row] for col in matrix.columns for row in matrix.index)
In [9]:
# tests for the previous function
def test_uniques_in_matrix():
uniques1 = uniques_in_matrix(
pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["c1", "c2", "c3"], index=["i1", "i2", "i3"])
)
assert frozenset({1, 2, 3, 4, 5, 6, 7, 8, 9}) == uniques1
uniques2 = uniques_in_matrix(
pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["i1", "i2", "i3"], index=["i1", "i2", "i3"])
)
assert uniques2 == uniques1
In [10]:
test_uniques_in_matrix()
In [11]:
def count_unique_medians(medians):
uniques_counts = [len(uniques_in_matrix(medians.iloc[i])) for i in range(len(medians))]
ind_with_min_count, min_count = min(enumerate(uniques_counts), key=lambda pair: pair[1])
ind_with_max_count, max_count = max(enumerate(uniques_counts), key=lambda pair: pair[1])
print(
"Minimum number of unique values in matrix is {0}. Message length = {1}."
.format(min_count, medians.keys()[ind_with_min_count])
)
print(
"Maximum number of unique values in matrix is {0}. Message length = {1}."
.format(max_count, medians.keys()[ind_with_max_count])
)
In [12]:
for directory in TEST_RESULT_DIRECTORIES:
medians = import_data(directory).medians
print(directory.basename())
count_unique_medians(medians)