In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from join_traces_and_survey import *


/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)

In [2]:
d_survey, d_survey_in_el = load_survey_dfs()
print('Num Survey Responses:', d_survey.shape[0])
print('Num Survey Responses in EL:', d_survey_in_el.shape[0])


Num Survey Responses: 35948
Num Survey Responses in EL: 32792

In [3]:
d_click_traces = pd.DataFrame(json.load(open('../../data/click_traces/rs3v3/join_data.json')))
print('Num Click Traces:', d_click_traces.shape[0])


Num Click Traces: 58290

In [4]:
# merge traces and survey data
df = join_survey_and_traces(d_survey_in_el, d_click_traces)
print('Num Responses with a trace', df.shape[0])


Join Size:  34795
Has click_request:  34795
Num Responses with a trace 29493

In [5]:
df.to_csv('../../data/responses_with_traces.tsv', sep = '\t', index = False, date_format = '%Y-%m-%d %H:%M:%S')

In [6]:
print(df.shape)


(29493, 17)

In [12]:
pd.read_csv('../../data/responses_with_traces.tsv', sep = '\t').shape


Out[12]:
(29484, 17)

Sample


In [9]:
d_sample = pd.DataFrame(json.load(open('../../data/click_traces/rs3v3/sample_data.json')))

In [10]:
d_sample.to_csv('../../data/random_trace_sample.tsv', sep = '\t', index = False, date_format = '%Y-%m-%d %H:%M:%S')