In [ ]:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot
%matplotlib inline
In [2]:
def get_http_sample():
names = ['id', 'date', 'user', 'pc', 'url']
df = pd.read_csv('data/insider_threat/r1/http.csv', names=names)
df['date'] = pd.to_datetime(df.date, errors='coerce')
df.set_index('date', inplace=True)
return df
In [3]:
http_df = get_http_sample()
http_df.head()
Out[3]:
In [4]:
# df.set_index('date', inplace=True)
http_df.shape
Out[4]:
In [5]:
http_df.info()
In [6]:
daily = http_df.groupby(pd.Grouper(freq='D')).count()
monthly = daily.groupby(pd.Grouper(freq='M')).mean()
In [7]:
daily.plot(kind='line', figsize=(15,5))
Out[7]:
In [8]:
monthly.plot(kind='line', figsize=(15,5))
Out[8]:
This sample is from Jan 4, 2010 - May 14, 2011
In [11]:
user_df = http_df[http_df['user'] == 'DTAA/AEG0905']
user_df.head()
Out[11]:
In [12]:
user_df.shape
Out[12]:
In [13]:
daily = user_df.groupby(pd.Grouper(freq='D')).count()
monthly = daily.groupby(pd.Grouper(freq='W')).mean()
In [15]:
daily.plot(kind='line', figsize=(15,5))
Out[15]:
In [16]:
monthly.plot(kind='line', figsize=(15,5))
Out[16]:
In [17]:
def get_logon_sample():
df = pd.read_csv('data/insider_threat/r1/logon.csv')
df['date'] = pd.to_datetime(df.date, errors='coerce')
df.set_index('date', inplace=True)
return df
In [18]:
logon_df = get_logon_sample()
logon_df.head()
Out[18]:
In [19]:
logon_df.shape
Out[19]:
In [20]:
logon_df.info()
Logon data is from Jan 4, 2010 - May 14, 2011
In [21]:
logon_df.describe()
Out[21]:
In [22]:
daily = logon_df.groupby(pd.Grouper(freq='D')).count()
monthly = daily.groupby(pd.Grouper(freq='M')).mean()
In [23]:
daily.plot(kind='line', figsize=(15,5))
Out[23]:
In [24]:
monthly.plot(kind='line', figsize=(15,5))
Out[24]:
In [25]:
user_df = logon_df[logon_df['user'] == 'DTAA/CGM0994']
user_df.head()
Out[25]:
In [26]:
user_df.shape
Out[26]:
In [27]:
daily = user_df.groupby(pd.Grouper(freq='D')).count()
monthly = daily.groupby(pd.Grouper(freq='M')).mean()
In [28]:
daily.plot(kind='line', figsize=(15,5))
Out[28]:
In [29]:
monthly.plot(kind='line', figsize=(15,5))
Out[29]:
In [30]:
def get_device_sample():
df = pd.read_csv('data/insider_threat/r1/device.csv')
df['date'] = pd.to_datetime(df.date, errors='coerce')
df.set_index('date', inplace=True)
return df
In [31]:
device_df = get_device_sample()
device_df.head()
Out[31]:
In [32]:
device_df.shape
Out[32]:
In [33]:
device_df.describe()
Out[33]:
In [34]:
daily = device_df.groupby(pd.Grouper(freq='D')).count()
monthly = daily.groupby(pd.Grouper(freq='M')).mean()
In [35]:
monthly.head()
Out[35]:
In [36]:
daily.plot(kind='line', figsize=(15,5))
Out[36]:
In [37]:
monthly.plot(kind='line', figsize=(15,5))
Out[37]:
In [38]:
user_df = device_df[device_df['user'] == 'DTAA/MQL0135']
user_df.head()
Out[38]:
In [39]:
user_df.shape
Out[39]:
In [40]:
daily = user_df.groupby(pd.Grouper(freq='D')).count()
monthly = daily.groupby(pd.Grouper(freq='M')).mean()
In [41]:
daily.plot(kind='line', figsize=(15,5))
Out[41]:
In [42]:
monthly.plot(kind='line', figsize=(15,5))
Out[42]:
In [ ]: