In [82]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import signal
from scipy import interpolate
from scipy.stats import pearsonr
%matplotlib inline

In [257]:
df = pd.DataFrame(np.fromfile(
        "./browser_download.bin",
        dtype=np.uint16).astype(np.float32) * (3300 / 2**12))

In [368]:
df = pd.DataFrame(np.fromfile(
        "./output.bin",
        dtype=np.uint16).astype(np.float32) * (5000 / 2**10))
df[:].plot()


Out[368]:
<matplotlib.axes._subplots.AxesSubplot at 0x14127b710>

Группируем по миллисекундам и усредняем:


In [369]:
df_r1000 = df.groupby(df.index//1000).mean()
fig = sns.plt.figure(figsize=(16, 6))
ax = sns.plt.subplot()
df_r1000.plot(ax=ax)


Out[369]:
<matplotlib.axes._subplots.AxesSubplot at 0x143f0a940>

Интересные нам всплески потребления кончаются где-то на 10000-ной миллисекунде (их пять подряд, мы моргали лампочкой пять раз).


In [4]:
fig = sns.plt.figure(figsize=(16, 6))
ax = sns.plt.subplot()
df_r1000[:12000].plot(ax=ax)


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x11573f710>

Функции для парсинга событий из лога и поиска точки синхронизации:


In [223]:
import numpy as np
import pandas as pd
from scipy import signal
from scipy import interpolate
from scipy.stats import pearsonr

import logging
log = logging.getLogger(__name__)

def torch_status(lines):
    """
    Parse torch statuses from lines
    """
    for line in lines:
        if "newStatus=2" in line:
            yield (
                datetime.strptime(
                    line.split()[1], "%H:%M:%S.%f"),
                1)
        elif "newStatus=1" in line:
            yield (
                datetime.strptime(
                    line.split()[1], "%H:%M:%S.%f"),
                0)

def parse_torch_events(filename, sps=1000):
    """
    Parse torch events from file, considering target sample rate.
    Offset is the number of sample
    """
    log.info("Parsing torch events...")
    with open(filename) as eventlog:
        df = pd.DataFrame.from_records(
            torch_status(eventlog), columns=["offset", "status"])
        df["offset"] = df["offset"].map(
            lambda x: int(np.round((x - df["offset"][0]).total_seconds() * sps)))
        return df

def ref_signal(torch, trailing_zeros=1000):
    """
    Generate square reference signal with trailing zeroes
    """
    log.info("Generating ref signal...")
    f = interpolate.interp1d(torch["offset"], torch["status"], kind="zero")
    X = np.linspace(0, torch["offset"].values[-1], torch["offset"].values[-1])
    return np.append(f(X), np.zeros(trailing_zeros))

def cross_correlate(sig, ref, first=30000):
    """
    Calculate cross-correlation with lag. Take only first n lags.
    """
    log.info("Calculating cross-correlation...")
    lags = np.arange(len(sig) - len(ref))
    if len(lags) > first:
        lags = lags[:first]
    return pd.DataFrame.from_records(
        [pearsonr(sig[lag:lag+len(ref)], ref) for lag in lags],
        columns=["corr", "p_value"])

def sync(sig, eventlog, sps=1000, trailing_zeros=1000, first=30000):
    rs = ref_signal(
        parse_torch_events(eventlog, sps=sps),
        trailing_zeros=trailing_zeros)
    cc = cross_correlate(sig, rs)
    sync_point = np.argmax(cc["corr"])
    if cc["p_value"][sync_point] > 0.05:
        raise RuntimeError("P-value is too big: %d" % cc["p_value"][sync_point])
    log.info(
       "Pearson's coef: %d, p-value: %d",
       cc["corr"][sync_point],
       cc["p_value"][sync_point])
    return sync_point

In [237]:
te = parse_torch_events("browser_download.log", sps=1000)
rs = ref_signal(te)
cc = cross_correlate(df_r1000[0], rs)

In [241]:
fig = sns.plt.figure(figsize=(16, 6))
ax = sns.plt.subplot()

sns.plt.plot(df_r1000[0][:20000], label="signal")
sns.plt.plot(cc["corr"][:20000]*1000 + 500, label="cross-correlation")
sns.plt.plot(np.append(np.zeros(sync_point), rs * 500 + 500), label="reference")
#sns.plt.plot(cc["p_value"][:20000]*1000, label="p-value")
sync_point = np.argmax(cc["corr"])
sns.plt.axvline(sync_point)
ax.legend()


Out[241]:
<matplotlib.legend.Legend at 0x130b63ef0>

In [343]:
fig = sns.plt.figure(figsize=(10, 6))
ax = sns.plt.subplot()
sns.plt.scatter(np.arange(0, 30, 2), np.zeros(15), label="Одно")
sns.plt.scatter(np.arange(1, 31, 2), np.zeros(15), label="Другое", color="red")
ax.legend()


Out[343]:
<matplotlib.legend.Legend at 0x13a3dd0b8>