In [1]:
%matplotlib inline
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import LinearRegression
import matplotlib.pylab as plt
import pandas as pd
import numpy as np
from pandas.tools.plotting import scatter_matrix
In [32]:
import json
from pprint import pprint
with open('/Users/danielkershaw/PycharmProjects/DiffusionSimulation/data/twitter-geo-test') as data_file:
for l in data_file:
data = json.loads(l)
df = pd.read_json(data["raw"])
import datetime
def dt(X):
return datetime.datetime.fromtimestamp(float(X / 1000))
df['time'] = df['time'].apply(dt)
df = df.sort(["time"])
df
Out[32]:
In [41]:
dt = df.set_index(pd.DatetimeIndex(df['time']))
dt = dt.resample('d').max()
idx = pd.date_range(dt.index[0], dt.index[0] + datetime.timedelta(days=30))
dt.reindex(idx, fill_value=0, method='ffill').fillna(method='ffill')["numberActivatedUsers"]
Out[41]:
In [42]:
dt.reindex(idx, fill_value=0, method='ffill').fillna(method='ffill')["numberOfActivations"]
Out[42]:
In [180]:
import datetime
def dt(X):
return datetime.datetime.fromtimestamp(float(X/1000))
df['time'] = df['time'].apply(dt)
In [181]:
df.head()
Out[181]:
In [198]:
def to_date(X):
return X.day()
dft = df.set_index(pd.DatetimeIndex(df['time']))
start = dft.index.searchsorted(dft.index[0])
end = dft.index.searchsorted(dft.index[0] + datetime.timedelta(days=30))
dft = dft.ix[start:end]
dftt = pd.DataFrame(index = dft.index)
dftt["activations"] = 1
# idx = pd.date_range(dft.index[0].date(), dft.index[0].date() + datetime.timedelta(days=30), freq='1D')
# dft.reindex(idx, fill_value=0)
dftt = dftt.resample('d',how='sum').fillna(0)
dftt["activations"] = (dftt["activations"].cumsum()/dftt["activations"].sum())
In [199]:
dftt.reset_index()
Out[199]:
In [187]:
y = dftt["activations"]
x = np.arange(0, len(dftt["activations"]))
In [188]:
dftt["activations"].mean()
Out[188]:
In [189]:
axes = plt.plot(x, y, 'o');
plt.axis((0,30,0,1))
Out[189]:
In [ ]: