In [51]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import copy
import pandas as pd
import numpy as np
import datetime


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [52]:
d = pd.read_csv('../../data/raw_responses.tsv', sep = '\t')

In [53]:
d = d[d['Timestamp'] > '2/29/2016 16:17:00']

In [54]:
d.head()


Out[54]:
Timestamp I am reading this article to Prior to visiting this article I am reading this article because This is you survey ID. Please do not modify.
206 2/29/2016 16:17:07 look up a specific fact or to get a quick answer. I was already familiar with the topic. the topic came up in a conversation. 0707bbe2a7e3ecbb
207 2/29/2016 16:17:46 look up a specific fact or to get a quick answer. I was not familiar with the topic and I am lea... Writing story and need a specific fact c662db883c7d389c
208 2/29/2016 16:17:48 get an overview of the topic. I was already familiar with the topic. I am bored or randomly exploring Wikipedia for... 0e7f539e27dcf10f
209 2/29/2016 16:17:58 look up a specific fact or to get a quick answer. I was not familiar with the topic and I am lea... I am bored or randomly exploring Wikipedia for... 8cb0d465ee08e2c6
210 2/29/2016 16:18:04 get an in-depth understanding of the topic. I was already familiar with the topic. I want to know more about a current event (e.g... 42be8c727b522606

In [55]:
d = d.fillna('no_response')

In [56]:
d_q = {'Timestamp': 'submit_timestamp',
       'I am reading this article to': 'raw_information_depth',
       'Prior to visiting this article':'raw_prior_knowledge',
       'I am reading this article because' :  'raw_motivation',
       'This is you survey ID. Please do not modify.' : 'token'
             }


d_a1 = {'look up a specific fact or to get a quick answer.': 'fact',
       'get an overview of the topic.': 'overview',
       'get an in-depth understanding of the topic.': 'in-depth',
       'no_response': 'no response'}

d_a2 = {'I was already familiar with the topic.': 'familiar',
       'I was not familiar with the topic and I am learning about it for the first time.': 'unfamiliar',
        'no_response': 'no response'}

d_a3 = {'I have a work or school-related assignment.' : 'work/school',
       'I need to make a personal decision based on this topic (e.g. to buy a  book, choose a travel destination).': 'personal decision',
       "I want to know more about a current event (e.g. a soccer game, a recent earthquake, somebody's death).": 'current event',
       "the topic was referenced in a piece of media (e.g. TV, radio, article, film, book).": 'media',
       "the topic came up in a conversation.": "conversation",
       "I am bored or randomly exploring Wikipedia for fun.": 'bored/random',
        'no_response': 'no response',
        "this topic is important to me and I want to learn more about it. (e.g., to learn about a culture).": 'intrinsic learning'
       }
d_a = {'raw_information_depth': d_a1, 'raw_prior_knowledge': d_a2,'raw_motivation': d_a3 }

In [59]:
def reformat_dt(s):
    from_pattern = "%m/%d/%Y %H:%M:%S"  
    to_pattern = "%Y-%m-%d %H:%M:%S"
    from_dt = datetime.datetime.strptime (s, from_pattern)
    return from_dt.strftime(to_pattern)
     

def recode_motivation(x):
        x = str(x)
        for k, v in d_a3.items():
            x = x.replace(k,v) 
        reasons = [e if e in d_a3.values() else 'other' for e in x.split(', ')]
        return '|'.join(set(reasons))
    
def recode_df(d):
    d = copy.deepcopy(d)
    
    d.columns = [d_q[c] for c in d.columns]
    
    d['submit_timestamp'] = d['submit_timestamp'].apply(reformat_dt)
    
    d['information depth'] = d['raw_information_depth'].apply(lambda x: d_a['raw_information_depth'].get(x, 'other'))
    d['prior knowledge'] = d['raw_prior_knowledge'].apply(lambda x: d_a['raw_prior_knowledge'].get(x, 'other'))
    
    
    d['motivation'] = d['raw_motivation'].apply(recode_motivation)

    return d

In [60]:
dr = recode_df(d)

In [61]:
dr['prior knowledge'].value_counts()


Out[61]:
familiar       18520
unfamiliar     17210
no response      963
Name: prior knowledge, dtype: int64

In [62]:
dr['information depth'].value_counts()


Out[62]:
overview       14748
fact           12472
in-depth        8573
no response      900
Name: information depth, dtype: int64

In [63]:
for i, r in dr[dr['motivation'] == 'other'][:10].iterrows():
    print (r['raw_motivation'])


Writing story and need a specific fact
I read Wikipedia all the time. It doesn't really feel like self-improvement, but it's a lot healthier than playing video games or going to the bar.
General education
DINDU NUFFINS WE WUZ KANGS AND SHIIEEETTTT
I saw the name in a compilation in my music collection and thought 'whatever happened to...?'
it is in my intrest
the topic randomly popped into my head 
attending an event featuring the subject of the article.
LOL XD JIM CARREY
I have created it

In [64]:
dr['motivation'].value_counts()


Out[64]:
media                                                                                             5594
work/school                                                                                       4679
intrinsic learning                                                                                4119
bored/random                                                                                      3755
conversation                                                                                      2966
other                                                                                             1709
current event                                                                                     1511
personal decision                                                                                 1039
no response                                                                                        760
bored/random|media                                                                                 721
intrinsic learning|work/school                                                                     610
intrinsic learning|media                                                                           562
intrinsic learning|bored/random                                                                    514
conversation|media                                                                                 374
conversation|bored/random                                                                          338
conversation|intrinsic learning                                                                    334
intrinsic learning|personal decision                                                               333
current event|media                                                                                309
current event|intrinsic learning                                                                   262
bored/random|intrinsic learning|media                                                              239
conversation|bored/random|media                                                                    205
current event|intrinsic learning|media                                                             199
personal decision|current event|conversation|intrinsic learning|media|bored/random|work/school     197
conversation|intrinsic learning|bored/random                                                       146
current event|bored/random|media                                                                   146
current event|bored/random                                                                         144
conversation|intrinsic learning|media                                                              137
other|intrinsic learning                                                                           133
bored/random|work/school                                                                           118
current event|intrinsic learning|bored/random                                                      117
                                                                                                  ... 
other|conversation|intrinsic learning|media|work/school                                              2
personal decision|other|bored/random|work/school                                                     1
personal decision|current event|bored/random|media|work/school                                       1
other|current event|bored/random|personal decision                                                   1
personal decision|conversation|other|media|work/school                                               1
personal decision|other|current event|conversation|work/school                                       1
personal decision|conversation|other|work/school                                                     1
other|conversation|bored/random|intrinsic learning|media                                             1
other|conversation|bored/random|media|personal decision                                              1
personal decision|current event|other|work/school                                                    1
other|current event|conversation                                                                     1
other|current event|bored/random|intrinsic learning|media                                            1
personal decision|conversation|other|intrinsic learning|work/school                                  1
personal decision|current event|other|intrinsic learning|work/school                                 1
personal decision|other|current event|conversation|intrinsic learning|bored/random|work/school       1
other|current event|conversation|intrinsic learning|media|bored/random|work/school                   1
other|conversation|intrinsic learning|media|bored/random|work/school                                 1
personal decision|other|current event|intrinsic learning|media|bored/random                          1
personal decision|other|current event|conversation|intrinsic learning|work/school                    1
other|current event|personal decision                                                                1
other|bored/random|intrinsic learning|media|personal decision                                        1
other|current event|bored/random|work/school                                                         1
personal decision|other|intrinsic learning|bored/random|work/school                                  1
personal decision|other|current event|conversation|intrinsic learning|media                          1
personal decision|other|current event|intrinsic learning|bored/random|work/school                    1
personal decision|current event|conversation|bored/random|work/school                                1
personal decision|other|conversation|intrinsic learning|media|bored/random                           1
personal decision|other|conversation|intrinsic learning|bored/random|work/school                     1
other|current event|conversation|personal decision                                                   1
other|conversation|personal decision                                                                 1
Name: motivation, dtype: int64

In [65]:
dr = dr.drop_duplicates('token')

In [66]:
dr.to_csv('../../data/responses.tsv', sep = '\t', index = False)

In [ ]: