notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
questions = pd.DataFrame.from_csv('question_simple.csv', index_col=None)
questions.head()









    Out[2]:






  
    
      
      QuestionId
      QuestionCreation
      QuestionLastActivity
      AcceptedAnsId
      AcceptedDate
      QScore
      QVotes
    
  
  
    
      0
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      25
      50
    
    
      1
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      25
      50
    
    
      2
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      25
      50
    
    
      3
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      25
      50
    
    
      4
      2
      2011-10-25T19:44:21.647
      2014-12-16T20:44:35.923
      11
      2011-10-26T00:00:00.000
      18
      40



In [3]:

    
a_questions = pd.DataFrame.from_csv('question_votes.csv', index_col=None)
a_questions[a_questions.AcceptedAnsId.notnull()].head()









    Out[3]:






  
    
      
      QuestionId
      QuestionCreation
      QuestionLastActivity
      AcceptedAnsId
      AcceptedDate
      VoteType
      QVoteCreation
    
  
  
    
      0
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2
      2011-10-25T19:44:21.590
    
    
      1
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2
      2011-10-25T19:44:21.590
    
    
      2
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2
      2011-10-25T19:44:21.590
    
    
      3
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2
      2011-10-25T19:44:21.590
    
    
      4
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2
      2011-10-25T19:44:21.590



In [4]:

    
get_votes_qv = lambda df: pd.Series((df.VoteType==2).cumsum() + (df.VoteType==3).cumsum(),name='QVotes')
get_score_qv = lambda df: pd.Series((df.VoteType==2).cumsum() - (df.VoteType==3).cumsum(),name='QScore')

predictors_qvotes = ['QuestionId','QuestionCreation','QuestionLastActivity','AcceptedAnsId','AcceptedDate','QVoteCreation']
f_q = lambda df: pd.concat([df[cname] for cname in df.columns.values.tolist() if cname in predictors_qvotes]+[get_score_qv(df),get_votes_qv(df)],axis=1)
a_questions = a_questions.sort_values(by='QVoteCreation').groupby(['QuestionId']).apply(f_q)
a_questions.head(10)









    Out[4]:






  
    
      
      QuestionId
      QuestionCreation
      QuestionLastActivity
      AcceptedAnsId
      AcceptedDate
      QVoteCreation
      QScore
      QVotes
    
  
  
    
      0
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      1
      1
    
    
      60045
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      2
      2
    
    
      60044
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      3
      3
    
    
      60043
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      4
      4
    
    
      60042
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      5
      5
    
    
      60041
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      6
      6
    
    
      60040
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      7
      7
    
    
      60039
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      8
      8
    
    
      60038
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      9
      9
    
    
      60037
      1
      2011-10-25T19:44:21.590
      2015-07-13T23:45:46.583
      8
      2011-10-28T00:00:00.000
      2011-10-25T19:44:21.590
      10
      10



In [5]:

    
a_votes = pd.DataFrame.from_csv('votes-answers.csv', index_col=None)
a_votes = pd.merge(a_votes, a_questions, how='inner', on=['QuestionId'],suffixes=['_v', '_q'])
a_votes.head()









    Out[5]:






  
    
      
      VoteId
      VoteCreation
      AnsCreation
      VoteType
      AnsId
      QuestionId
      AnsWordCount
      QuestionCreation
      QuestionLastActivity
      AcceptedAnsId
      AcceptedDate
      QVoteCreation
      QScore
      QVotes
    
  
  
    
      0
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      2015-12-30T05:21:40.827
      5
      2011-10-25T00:00:00.000
      2011-10-25T19:44:55.973
      1
      1
    
    
      1
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      2015-12-30T05:21:40.827
      5
      2011-10-25T00:00:00.000
      2011-10-25T19:44:55.973
      2
      2
    
    
      2
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      2015-12-30T05:21:40.827
      5
      2011-10-25T00:00:00.000
      2011-10-25T19:44:55.973
      3
      3
    
    
      3
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      2015-12-30T05:21:40.827
      5
      2011-10-25T00:00:00.000
      2011-10-25T19:44:55.973
      4
      4
    
    
      4
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      2015-12-30T05:21:40.827
      5
      2011-10-25T00:00:00.000
      2011-10-25T19:44:55.973
      5
      5



In [6]:

    
predictors_raw_votans =['VoteId','VoteCreation','AnsCreation','VoteType','AnsId','QuestionId','AnsWordCount','QuestionCreation','AcceptedAnsId','AcceptedDate']

valid_qavotes = lambda df: df[df.VoteCreation>=df.QVoteCreation]
#Use twice valid_qavotes, could use once to improve efficiency, but check correctness of index selection
get_max_qv = lambda df: valid_qavotes(df).loc[valid_qavotes(df).QVotes.idxmax(),['QScore','QVotes']].squeeze()
get_latest_qv = lambda df : pd.Series([0,0],index=['QScore','QVotes']) if not (df.VoteCreation>=df.QVoteCreation).any() else get_max_qv(df)
get_head = lambda df: [df[cname].iloc[0] for cname in df.columns.values.tolist() if cname in predictors_raw_votans]
get_qv = lambda df : pd.Series(get_head(df),index=predictors_raw_votans).append(get_latest_qv(df)).to_frame()

a_votes = a_votes.sort_values(by='VoteCreation').groupby(['VoteId']).apply(get_qv).unstack(level=-1).reset_index(level=[0],drop=True)
a_votes.drop(a_votes.columns[[0]], axis=1, inplace=True)
a_votes.columns = a_votes.columns.droplevel()
a_votes.head()









    Out[6]:






  
    
      
      VoteCreation
      AnsCreation
      VoteType
      AnsId
      QuestionId
      AnsWordCount
      QuestionCreation
      AcceptedAnsId
      AcceptedDate
      QScore
      QVotes
    
  
  
    
      0
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
    
    
      1
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
    
    
      2
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
    
    
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
    
    
      4
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0



In [7]:

    
date_placeholder = '2016-07-20T00:00:00.000' #Date After Data Set Collection
#a_votes.loc[a_votes.AcceptedDate == 'None','AcceptedDate'] = pd.to_datetime(date_placeholder)
a_votes['AcceptedDate'].fillna(pd.to_datetime(date_placeholder),inplace=True) 
a_votes['AcceptedAge'] = (pd.to_datetime(a_votes.AcceptedDate,format='%Y-%m-%d %H:%M:%S.%f')
                  -pd.to_datetime(a_votes.QuestionCreation,format='%Y-%m-%d %H:%M:%S.%f')).apply(lambda x: x.astype('timedelta64[D]').item().days)
a_votes['AcceptedAge'] = a_votes['AcceptedAge'] + 1

a_votes.loc[a_votes.AcceptedDate == pd.to_datetime(date_placeholder), 'AcceptedAge'] = -1
a_votes['Age'] = (pd.to_datetime(a_votes.VoteCreation,format='%Y-%m-%d %H:%M:%S.%f')
                  -pd.to_datetime(a_votes.QuestionCreation,format='%Y-%m-%d %H:%M:%S.%f')).apply(lambda x: x.astype('timedelta64[D]').item().days)
a_votes['Age'] = a_votes['Age'] + 1
a_votes.head()









    Out[7]:






  
    
      
      VoteCreation
      AnsCreation
      VoteType
      AnsId
      QuestionId
      AnsWordCount
      QuestionCreation
      AcceptedAnsId
      AcceptedDate
      QScore
      QVotes
      AcceptedAge
      Age
    
  
  
    
      0
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
      0
      0
    
    
      1
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
      0
      0
    
    
      2
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
      0
      0
    
    
      3
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
      0
      0
    
    
      4
      2011-10-25T00:00:00.000
      2011-10-25T19:47:31.947
      2
      5
      3
      74
      2011-10-25T19:44:55.973
      5
      2011-10-25T00:00:00.000
      0
      0
      0
      0



In [8]:

    
a_votes.drop(a_votes.columns[[0, 1, 6, 8]], axis=1, inplace=True)



In [9]:

    
get_score = lambda df: sum(df.VoteType==2) - sum(df.VoteType==3)
get_votes = lambda df: sum(df.VoteType==2) + sum(df.VoteType==3)

predictors = ['QuestionId','AnsWordCount','AcceptedAnsId','AcceptedAge','QScore',
              'QVotes','Score','Votes','Upvotes','Downvotes']
f = lambda df: pd.Series([df.QuestionId.iloc[0],df.AnsWordCount.iloc[0],df.AcceptedAnsId.iloc[0],df.AcceptedAge.iloc[0],
                          df.QScore.iloc[0],df.QVotes.iloc[0],get_score(df),get_votes(df),sum(df.VoteType==2),sum(df.VoteType==3)],index = predictors)
a_groups = a_votes.sort_values(by='Age').groupby(['AnsId','Age']).apply(f)
a_groups = a_groups.reset_index(level=[0,1],drop=False)
a_groups.head()









    Out[9]:






  
    
      
      AnsId
      Age
      QuestionId
      AnsWordCount
      AcceptedAnsId
      AcceptedAge
      QScore
      QVotes
      Score
      Votes
      Upvotes
      Downvotes
    
  
  
    
      0
      5
      0
      3
      74
      5
      0
      0
      0
      10
      10
      10
      0
    
    
      1
      5
      1
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
    
    
      2
      5
      2
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
    
    
      3
      5
      9
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
    
    
      4
      5
      16
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0



In [10]:

    
cum_votes = lambda df: pd.Series(df['Votes'].cumsum(),name='CumVotes')
cum_score = lambda df: pd.Series(df['Score'].cumsum(),name='CumScore')

get_cumulative =lambda df: pd.concat([df[cname] for cname in df.columns.values.tolist()] + [cum_votes(df),cum_score(df)],axis=1)
ff = lambda df: get_cumulative(df.sort_values(by='Age'))
a_groups_c = a_groups.groupby(['AnsId']).apply(ff).reset_index(level=[0],drop=True)
a_groups_c.head()









    Out[10]:






  
    
      
      AnsId
      Age
      QuestionId
      AnsWordCount
      AcceptedAnsId
      AcceptedAge
      QScore
      QVotes
      Score
      Votes
      Upvotes
      Downvotes
      CumVotes
      CumScore
    
  
  
    
      0
      5
      0
      3
      74
      5
      0
      0
      0
      10
      10
      10
      0
      10
      10
    
    
      1
      5
      1
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      11
      11
    
    
      2
      5
      2
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      12
      12
    
    
      3
      5
      9
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      13
      13
    
    
      4
      5
      16
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      14
      14



In [11]:

    
prior_quality = float(a_groups_c['Upvotes'].sum())/(a_groups_c['Upvotes'].sum() + a_groups_c['Downvotes'].sum())
a_groups_c['ReScore'] = (a_groups_c['CumScore']+prior_quality)/(a_groups_c['CumVotes']+1.0)
a_groups_c['QReScore'] = a_groups_c['QScore']/(a_groups_c['QVotes']+1.0)
a_groups_c.head()









    Out[11]:






  
    
      
      AnsId
      Age
      QuestionId
      AnsWordCount
      AcceptedAnsId
      AcceptedAge
      QScore
      QVotes
      Score
      Votes
      Upvotes
      Downvotes
      CumVotes
      CumScore
      ReScore
      QReScore
    
  
  
    
      0
      5
      0
      3
      74
      5
      0
      0
      0
      10
      10
      10
      0
      10
      10
      0.998846
      0.000000
    
    
      1
      5
      1
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      11
      11
      0.998942
      0.997921
    
    
      2
      5
      2
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      12
      12
      0.999024
      0.997921
    
    
      3
      5
      9
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      13
      13
      0.999093
      0.997921
    
    
      4
      5
      16
      3
      74
      5
      0
      480
      480
      1
      1
      1
      0
      14
      14
      0.999154
      0.997921



In [12]:

    
votes_com_f = a_groups_c



In [13]:

    
from itertools import izip
def rank_ans(df,score_only,re_score):
    rk_name = "ReScore_rank" if re_score else "AnsRank"
    def rank_iter():
        cache = {}
        accepted = 0
        for row in df.itertuples():
            if re_score:
                cache[row.AnsId] = row.ReScore
            else :
                cache[row.AnsId] = row.Score
            # rank, nb_ans
            if (not score_only) and row.AcceptedAge>-1 and (row.AnsId == row.AcceptedAnsId) and row.Age >=row.AcceptedAge:
                accepted = 1
                if row.AnsId in cache:
                    del cache[row.AnsId]
                yield (1,len(cache)+accepted,row.Index)
            else :
                rank = sorted(cache, key= lambda k:cache[k],reverse=True).index(row.AnsId) + 1 + accepted
                yield (rank,len(cache)+accepted,row.Index)
            
    ranks, ans_counts, indices = izip(*list(rank_iter())) #TODO: optimize for the future
    return [pd.Series(ranks,name=rk_name, index=indices), pd.Series(ans_counts,name="Ans_count", index=indices)]

predictors = ['QuestionId','AnsId','AnsWordCount','AcceptedAnsId','Age',
              'Score','Votes','Upvotes','Downvotes','CumScore','CumVotes','QScore'
              ,'QVotes','ReScore','QReScore','AnsRank','ReScore_rank']
get_ranks = lambda df,score_only=False,re_score=False: pd.concat(
    [df[cname] for cname in df.columns.values.tolist() if cname in predictors] + rank_ans(df,score_only,re_score),axis=1)
sort_age_score = lambda df: df.sort_values(by=['Age','Score'],ascending=[True,False])



In [14]:

    
votes_com_f = votes_com_f.groupby(['QuestionId']).apply(
    lambda df: get_ranks(sort_age_score(df))).reset_index(drop=True)
votes_com_f = votes_com_f.groupby(['QuestionId']).apply(
    lambda df: get_ranks(sort_age_score(df),score_only=True,re_score=True)).reset_index(drop=True)

votes_com_f['Pbias'] = 1.0/votes_com_f['AnsRank']
votes_com_f['DRank'] = votes_com_f['AnsRank'] - votes_com_f['ReScore_rank']
votes_com_f.head()









    Out[14]:






  
    
      
      AnsId
      Age
      QuestionId
      AnsWordCount
      AcceptedAnsId
      QScore
      QVotes
      Score
      Votes
      Upvotes
      Downvotes
      CumVotes
      CumScore
      ReScore
      QReScore
      AnsRank
      ReScore_rank
      Ans_count
      Pbias
      DRank
    
  
  
    
      0
      22
      0
      1
      76
      8
      0
      0
      4
      4
      4
      0
      4
      4
      0.997462
      0
      1
      1
      1
      1.000000
      0
    
    
      1
      8
      0
      1
      108
      8
      0
      0
      2
      2
      2
      0
      2
      2
      0.995769
      0
      2
      2
      2
      0.500000
      0
    
    
      2
      29
      0
      1
      106
      8
      0
      0
      2
      2
      2
      0
      2
      2
      0.995769
      0
      3
      3
      3
      0.333333
      0
    
    
      3
      31
      0
      1
      130
      8
      0
      0
      1
      1
      1
      0
      1
      1
      0.993654
      0
      4
      4
      4
      0.250000
      0
    
    
      4
      37
      0
      1
      82
      8
      0
      0
      1
      1
      1
      0
      1
      1
      0.993654
      0
      4
      4
      5
      0.250000
      0



In [15]:

    
#AnsRank and Ans_count define unique EPbias
sum_by_rank = lambda df: df.groupby('AnsRank').apply(
    lambda df: pd.Series([df.Votes.sum()],name='EPbias').to_frame()).unstack(level=-1).reset_index(level=0,drop=False)
get_ratio = lambda df: sum_by_rank(df).EPbias/(sum_by_rank(df).EPbias.sum())
ratio_per_rank = lambda df: pd.concat([sum_by_rank(df).AnsRank, get_ratio(df)],axis=1)
get_position_bias = lambda df: pd.merge(df,ratio_per_rank(df),how='inner',on=['AnsRank'])

votes = votes_com_f.groupby(['Ans_count']).apply(get_position_bias).reset_index(level=[0,1],drop=True)
votes.columns.values[-1] = "EPbias"
votes.head()









    Out[15]:






  
    
      
      AnsId
      Age
      QuestionId
      AnsWordCount
      AcceptedAnsId
      QScore
      QVotes
      Score
      Votes
      Upvotes
      ...
      CumVotes
      CumScore
      ReScore
      QReScore
      AnsRank
      ReScore_rank
      Ans_count
      Pbias
      DRank
      EPbias
    
  
  
    
      0
      22
      0
      1
      76
      8
      0
      0
      4
      4
      4
      ...
      4
      4
      0.997462
      0.000000
      1
      1
      1
      1
      0
      1
    
    
      1
      10
      0
      2
      24
      11
      0
      0
      11
      11
      11
      ...
      11
      11
      0.998942
      0.000000
      1
      1
      1
      1
      0
      1
    
    
      2
      5
      0
      3
      74
      5
      0
      0
      10
      10
      10
      ...
      10
      10
      0.998846
      0.000000
      1
      1
      1
      1
      0
      1
    
    
      3
      5
      1
      3
      74
      5
      480
      480
      1
      1
      1
      ...
      11
      11
      0.998942
      0.997921
      1
      1
      1
      1
      0
      1
    
    
      4
      5
      2
      3
      74
      5
      480
      480
      1
      1
      1
      ...
      12
      12
      0.999024
      0.997921
      1
      1
      1
      1
      0
      1
    
  

5 rows × 21 columns



In [16]:

    
test_epbias = votes.groupby(['Ans_count','AnsRank']).first().reset_index(
    level=[0,1],drop=False)[['Ans_count','AnsRank','EPbias']]
test_epbias.to_csv('EPbiasbyAnsCountRank.csv')



In [17]:

    
#GET NULL say yay!!!
tt = votes.groupby(['AnsId','Age']).Votes.count().reset_index(level=[0,1],drop=False)
tt[tt.Votes>2].head()









    Out[17]:






  
    
      
      AnsId
      Age
      Votes



In [18]:

    
votes.to_csv(path_or_buf='AnsVotes_TSeries.csv')



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	QuestionId	QuestionCreation	QuestionLastActivity	AcceptedAnsId	AcceptedDate	QScore	QVotes
0	1	2011-10-25T19:44:21.590	2015-07-13T23:45:46.583	8	2011-10-28T00:00:00.000	25	50
1	1	2011-10-25T19:44:21.590	2015-07-13T23:45:46.583	8	2011-10-28T00:00:00.000	25	50
2	1	2011-10-25T19:44:21.590	2015-07-13T23:45:46.583	8	2011-10-28T00:00:00.000	25	50
3	1	2011-10-25T19:44:21.590	2015-07-13T23:45:46.583	8	2011-10-28T00:00:00.000	25	50
4	2	2011-10-25T19:44:21.647	2014-12-16T20:44:35.923	11	2011-10-26T00:00:00.000	18	40

	VoteId	VoteCreation	AnsCreation	VoteType	AnsId	QuestionId	AnsWordCount	QuestionCreation	QuestionLastActivity	AcceptedAnsId	AcceptedDate	QVoteCreation	QScore	QVotes
0	3	2011-10-25T00:00:00.000	2011-10-25T19:47:31.947	2	5	3	74	2011-10-25T19:44:55.973	2015-12-30T05:21:40.827	5	2011-10-25T00:00:00.000	2011-10-25T19:44:55.973	1	1
1	3	2011-10-25T00:00:00.000	2011-10-25T19:47:31.947	2	5	3	74	2011-10-25T19:44:55.973	2015-12-30T05:21:40.827	5	2011-10-25T00:00:00.000	2011-10-25T19:44:55.973	2	2
2	3	2011-10-25T00:00:00.000	2011-10-25T19:47:31.947	2	5	3	74	2011-10-25T19:44:55.973	2015-12-30T05:21:40.827	5	2011-10-25T00:00:00.000	2011-10-25T19:44:55.973	3	3
3	3	2011-10-25T00:00:00.000	2011-10-25T19:47:31.947	2	5	3	74	2011-10-25T19:44:55.973	2015-12-30T05:21:40.827	5	2011-10-25T00:00:00.000	2011-10-25T19:44:55.973	4	4
4	3	2011-10-25T00:00:00.000	2011-10-25T19:47:31.947	2	5	3	74	2011-10-25T19:44:55.973	2015-12-30T05:21:40.827	5	2011-10-25T00:00:00.000	2011-10-25T19:44:55.973	5	5

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes
0	5	0	3	74	5	0	0	10	10	10
1	5	1	3	74	5	480	480	1	1	1
2	5	2	3	74	5	480	480	1	1	1
3	5	9	3	74	5	480	480	1	1	1
4	5	16	3	74	5	480	480	1	1	1

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes	CumVotes	CumScore
0	5	0	3	74	5	0	0	10	10	10	10	10
1	5	1	3	74	5	480	480	1	1	1	11	11
2	5	2	3	74	5	480	480	1	1	1	12	12
3	5	9	3	74	5	480	480	1	1	1	13	13
4	5	16	3	74	5	480	480	1	1	1	14	14

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes	CumVotes	CumScore	ReScore	QReScore
0	5	0	3	74	5	0	0	10	10	10	10	10	0.998846	0.000000
1	5	1	3	74	5	480	480	1	1	1	11	11	0.998942	0.997921
2	5	2	3	74	5	480	480	1	1	1	12	12	0.999024	0.997921
3	5	9	3	74	5	480	480	1	1	1	13	13	0.999093	0.997921
4	5	16	3	74	5	480	480	1	1	1	14	14	0.999154	0.997921

	AnsId	QuestionId	AnsWordCount	AcceptedAnsId	Score	Votes	Upvotes	CumVotes	CumScore	ReScore	AnsRank	ReScore_rank	Ans_count	Pbias
0	22	1	76	8	4	4	4	4	4	0.997462	1	1	1	1.000000
1	8	1	108	8	2	2	2	2	2	0.995769	2	2	2	0.500000
2	29	1	106	8	2	2	2	2	2	0.995769	3	3	3	0.333333
3	31	1	130	8	1	1	1	1	1	0.993654	4	4	4	0.250000
4	37	1	82	8	1	1	1	1	1	0.993654	4	4	5	0.250000

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes
0	5	0	3	74	5	0	0	10	10	10
1	5	1	3	74	5	480	480	1	1	1
2	5	2	3	74	5	480	480	1	1	1
3	5	9	3	74	5	480	480	1	1	1
4	5	16	3	74	5	480	480	1	1	1

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes	CumVotes	CumScore
0	5	0	3	74	5	0	0	10	10	10	10	10
1	5	1	3	74	5	480	480	1	1	1	11	11
2	5	2	3	74	5	480	480	1	1	1	12	12
3	5	9	3	74	5	480	480	1	1	1	13	13
4	5	16	3	74	5	480	480	1	1	1	14	14

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes
0	5	0	3	74	5	0	0	10	10	10
1	5	1	3	74	5	480	480	1	1	1
2	5	2	3	74	5	480	480	1	1	1
3	5	9	3	74	5	480	480	1	1	1
4	5	16	3	74	5	480	480	1	1	1

	AnsId	Age	QuestionId	AnsWordCount	AcceptedAnsId	QScore	QVotes	Score	Votes	Upvotes	CumVotes	CumScore
0	5	0	3	74	5	0	0	10	10	10	10	10
1	5	1	3	74	5	480	480	1	1	1	11	11
2	5	2	3	74	5	480	480	1	1	1	12	12
3	5	9	3	74	5	480	480	1	1	1	13	13
4	5	16	3	74	5	480	480	1	1	1	14	14