In [ ]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.style.use('ggplot')

from datetime import datetime
from dateutil.parser import parse

In [ ]:
path_to_your_csv_file = "./motionai.csv"# <--- put in your filename / filepath to your csv-report

# read report
df = pd.read_csv(path_to_your_csv_file, error_bad_lines= True, low_memory=False)

# drop messages from tests
test_msgs = np.where(df['testMode']==True)
df = df.drop(test_msgs[0])

# convert time column to datetime format
df["timestamp"] = df['createdAt'].apply(lambda x: parse(x))

# set timestamps as index of dataframe
df.index = df.timestamp

# drop unnecessary columns
df = df.drop(['_id', 'createdAt', 'updatedAt', '__v', 'archived', 'direction', 
              'botType', 'botNickname', 'botOwner', 'botID',
       'enrichedData', 'result', 'updated_at', 'sid', 'testMode', 'hasSeen', 'to', 'from', 'from_metaData', 
              'quickReplies', 'cardOptions', 'cards', 'media', 'mediaUrl' ], axis=1)

# rename module columns to clarify meaning
df.columns.values[0] = "module_in"
df.columns.values[1] = "module_to"

In [ ]:
# print stats
session = df.groupby("session")
stats = session.text.count().describe()
max_messages = int(stats[7])
print("Your bot got %u messages (in) from %u unique sessions.\nThe mean of messages per session was %u, the median %u.\nMin/max messages per session: %u / %u." % (len(df), stats[0],stats[1],stats[5],stats[3],stats[7]))
print("")
print(stats)

In [ ]:
# plot histogram of messages per session
plt.figure(figsize=(16,9))
plt.rcParams.update({'font.size': 14})
plt.hist(session.text.count(), bins=int(max_messages/2), rwidth=.70)
plt.xlim(1, max_messages)
plt.title('Sessions per # of Messages\n')
plt.ylabel('# Sessions')
plt.xlabel('# Messages')
plt.show()

In [ ]:
# save sorted list of module ids and their counts
top_modules = df.module_in.value_counts()
top_modules.to_csv("./top_modules.csv", index_label="module id", header=["counts"], encoding="utf8")

# save sorted list of user messages ids and their counts
top_user_messages = df.text.value_counts()
top_user_messages.to_csv("./top_user_messages.csv", sep="\t", encoding="utf8")

In [ ]:
# plot bar chart of daily usage
byday = df.session.resample('D').apply(lambda x: x.nunique())
plt.axes().set_ylabel('# Sessions')
byday.plot(kind="bar", figsize=(16,9), rot=45, title="Usage by day\n", fontsize=10)

In [ ]:
# plot line chart of hourly usage
byhour = df.session.resample('H').apply(lambda x: x.nunique())
byhour.plot(kind="line", figsize=(16,9), rot=45, title="Usage by hour\n", fontsize=10)