In [1]:
import pandas as pd
import numpy as np
from math import log
In [2]:
# January
#0107
data0107 = pd.read_csv('2014-01-07_userdat.csv', sep=",")
yesterday0107 = 'January 06, 1114'
today0107 = 'January 07, 2014'
count = -1;
for each in data0107['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0107
if 'Today' in each:
time = today0107
data0107.set_value(count, 'registered', time)
data0107 = data0107.dropna()
#0111
data0111 = pd.read_csv('2014-01-11_userdat.csv', sep=",")
yesterday0111 = 'January 10, 1114'
today0111 = 'January 11, 2014'
count = -1;
for each in data0111['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0111
if 'Today' in each:
time = today0111
data0111.set_value(count, 'registered', time)
data0111 = data0111.dropna()
#0120
data0120 = pd.read_csv('2014-01-20_userdat.csv', sep=",")
yesterday0120 = 'January 19, 1114'
today0120 = 'January 20, 2014'
count = -1;
for each in data0120['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0120
if 'Today' in each:
time = today0120
data0120.set_value(count, 'registered', time)
data0120 = data0120.dropna()
#0128
data0128 = pd.read_csv('2014-01-28_userdat.csv', sep=",")
yesterday0128 = 'January 27, 2014'
today0128 = 'January 28, 2014'
count = -1;
for each in data0128['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0128
if 'Today' in each:
time = today0128
data0128.set_value(count, 'registered', time)
data0128 = data0128.dropna()
In [3]:
df_new = pd.concat([data0107, data0111])
df_new = pd.concat([df_new, data0120])
df_new = pd.concat([df_new, data0128])
In [4]:
df_new
Out[4]:
In [5]:
df_1 = df_new.drop_duplicates(['username'], keep='last')
In [6]:
# February
#0208
data0208 = pd.read_csv('2014-02-08_userdat.csv', sep=",")
yesterday0208 = 'February 07, 1114'
today0208 = 'February 08, 2014'
count = -1;
for each in data0208['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0208
if 'Today' in each:
time = today0208
data0208.set_value(count, 'registered', time)
data0208 = data0208.dropna()
#0217
data0217 = pd.read_csv('2014-02-17_userdat.csv', sep=",")
yesterday0217 = 'February 16, 1114'
today0217 = 'February 17, 2014'
count = -1;
for each in data0217['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0217
if 'Today' in each:
time = today0217
data0217.set_value(count, 'registered', time)
data0217 = data0120.dropna()
#0221
data0221 = pd.read_csv('2014-02-21_userdat.csv', sep=",")
yesterday0221 = 'February 20, 1114'
today0221 = 'February 21, 2014'
count = -1;
for each in data0221['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0221
if 'Today' in each:
time = today0221
data0221.set_value(count, 'registered', time)
data0221 = data0221.dropna()
#0224
data0224 = pd.read_csv('2014-02-24_userdat.csv', sep=",")
yesterday0224 = 'February 23, 2014'
today0224 = 'February 24, 2014'
count = -1;
for each in data0224['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0224
if 'Today' in each:
time = today0224
data0224.set_value(count, 'registered', time)
data0224 = data0224.dropna()
df_new = pd.concat([data0208, data0217])
df_new = pd.concat([df_new, data0221])
df_new = pd.concat([df_new, data0224])
df_2 = df_new.drop_duplicates(['username'], keep='last')
In [7]:
# March
#0303
data0303 = pd.read_csv('2014-03-03_userdat.csv', sep=",")
yesterday0303 = 'March 02, 1114'
today0303 = 'March 03, 2014'
count = -1;
for each in data0303['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0303
if 'Today' in each:
time = today0303
data0303.set_value(count, 'registered', time)
data0303 = data0303.dropna()
#0307
data0307 = pd.read_csv('2014-03-07_userdat.csv', sep=",")
yesterday0307 = 'March 06, 1114'
today0307 = 'March 07, 2014'
count = -1;
for each in data0307['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0307
if 'Today' in each:
time = today0307
data0307.set_value(count, 'registered', time)
data0307 = data0307.dropna()
#0310
data0310 = pd.read_csv('2014-03-10_userdat.csv', sep=",")
yesterday0310 = 'March 09, 1114'
today0310 = 'March 10, 2014'
count = -1;
for each in data0310['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0310
if 'Today' in each:
time = today0310
data0310.set_value(count, 'registered', time)
data0310 = data0310.dropna()
df_new = pd.concat([data0303, data0307])
df_new = pd.concat([df_new, data0310])
df_3 = df_new.drop_duplicates(['username'], keep='last')
In [8]:
# April
#0407
data0407 = pd.read_csv('2014-04-07_userdat.csv', sep=",")
yesterday0407 = 'April 06, 1114'
today0407 = 'April 07, 2014'
count = -1;
for each in data0407['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0407
if 'Today' in each:
time = today0407
data0407.set_value(count, 'registered', time)
data0407 = data0407.dropna()
#0416
data0416 = pd.read_csv('2014-04-16_userdat.csv', sep=",")
yesterday0416 = 'April 15, 1114'
today0416 = 'April 16, 2014'
count = -1;
for each in data0416['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0416
if 'Today' in each:
time = today0416
data0416.set_value(count, 'registered', time)
data0416 = data0416.dropna()
#0421
data0421 = pd.read_csv('2014-04-21_userdat.csv', sep=",")
yesterday0421 = 'April 20, 1114'
today0421 = 'April 21, 2014'
count = -1;
for each in data0421['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0421
if 'Today' in each:
time = today0421
data0421.set_value(count, 'registered', time)
data0421 = data0421.dropna()
#0428
data0428 = pd.read_csv('2014-04-28_userdat.csv', sep=",")
yesterday0428 = 'April 27, 1114'
today0428 = 'April 28, 2014'
count = -1;
for each in data0428['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0428
if 'Today' in each:
time = today0428
data0428.set_value(count, 'registered', time)
data0428 = data0428.dropna()
df_new = pd.concat([data0407, data0416])
df_new = pd.concat([df_new, data0421])
df_new = pd.concat([df_new, data0428])
df_4 = df_new.drop_duplicates(['username'], keep='last')
In [9]:
# May
#0503
data0503 = pd.read_csv('2014-05-03_userdat.csv', sep=",")
yesterday0503 = 'May 02, 1114'
today0503 = 'May 03, 2014'
count = -1;
for each in data0503['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0503
if 'Today' in each:
time = today0503
data0503.set_value(count, 'registered', time)
data0503 = data0503.dropna()
#0510
data0510 = pd.read_csv('2014-05-10_userdat.csv', sep=",")
yesterday0510 = 'May 09, 1114'
today0510 = 'May 10, 2014'
count = -1;
for each in data0510['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0510
if 'Today' in each:
time = today0510
data0510.set_value(count, 'registered', time)
data0510 = data0510.dropna()
#0517
data0517 = pd.read_csv('2014-05-17_userdat.csv', sep=",")
yesterday0517 = 'May 16, 1114'
today0517 = 'May 17, 2014'
count = -1;
for each in data0517['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0517
if 'Today' in each:
time = today0517
data0517.set_value(count, 'registered', time)
data0517 = data0517.dropna()
#0524
data0524 = pd.read_csv('2014-05-24_userdat.csv', sep=",")
yesterday0524 = 'May 23, 1114'
today0524 = 'May 24, 2014'
count = -1;
for each in data0524['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0524
if 'Today' in each:
time = today0524
data0524.set_value(count, 'registered', time)
data0524 = data0524.dropna()
#0529
data0529 = pd.read_csv('2014-05-29_userdat.csv', sep=",")
yesterday0529 = 'May 28, 1114'
today0529 = 'May 29, 2014'
count = -1;
for each in data0529['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0529
if 'Today' in each:
time = today0529
data0529.set_value(count, 'registered', time)
data0529 = data0529.dropna()
df_new = pd.concat([data0503, data0510])
df_new = pd.concat([df_new, data0517])
df_new = pd.concat([df_new, data0524])
df_new = pd.concat([df_new, data0529])
df_5 = df_new.drop_duplicates(['username'], keep='last')
In [10]:
# June
#0604
data0604 = pd.read_csv('2014-06-04_userdat.csv', sep=",")
yesterday0604 = 'June 03, 1114'
today0604 = 'June 04, 2014'
count = -1;
for each in data0604['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0604
if 'Today' in each:
time = today0604
data0604.set_value(count, 'registered', time)
data0604 = data0604.dropna()
#0607
data0607 = pd.read_csv('2014-06-07_userdat.csv', sep=",")
yesterday0607 = 'June 06, 1114'
today0607 = 'June 07, 2014'
count = -1;
for each in data0607['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0607
if 'Today' in each:
time = today0607
data0607.set_value(count, 'registered', time)
data0607 = data0607.dropna()
#0611
data0611 = pd.read_csv('2014-06-11_userdat.csv', sep=",")
yesterday0611 = 'June 10, 1114'
today0611 = 'June 11, 2014'
count = -1;
for each in data0611['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0611
if 'Today' in each:
time = today0611
data0611.set_value(count, 'registered', time)
data0611 = data0611.dropna()
#0619
data0619 = pd.read_csv('2014-06-19_userdat.csv', sep=",")
yesterday0619 = 'June 18, 1114'
today0619 = 'June 19, 2014'
count = -1;
for each in data0619['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0619
if 'Today' in each:
time = today0619
data0619.set_value(count, 'registered', time)
data0619 = data0619.dropna()
#0624
data0624 = pd.read_csv('2014-06-24_userdat.csv', sep=",")
yesterday0624 = 'June 23, 1114'
today0624 = 'June 24, 2014'
count = -1;
for each in data0624['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0624
if 'Today' in each:
time = today0624
data0624.set_value(count, 'registered', time)
data0624 = data0624.dropna()
#0630
data0630 = pd.read_csv('2014-06-30_userdat.csv', sep=",")
yesterday0630 = 'June 29, 1114'
today0630 = 'June 30, 2014'
count = -1;
for each in data0630['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0630
if 'Today' in each:
time = today0630
data0630.set_value(count, 'registered', time)
data0630 = data0630.dropna()
df_new = pd.concat([data0604, data0607])
df_new = pd.concat([df_new, data0611])
df_new = pd.concat([df_new, data0619])
df_new = pd.concat([df_new, data0624])
df_new = pd.concat([df_new, data0630])
df_6 = df_new.drop_duplicates(['username'], keep='last')
In [11]:
# July
#0705
data0705 = pd.read_csv('2014-07-05_userdat.csv', sep=",")
yesterday0705 = 'July 04, 1114'
today0705 = 'July 05, 2014'
count = -1;
for each in data0705['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0705
if 'Today' in each:
time = today0705
data0705.set_value(count, 'registered', time)
data0705 = data0705.dropna()
#0720
data0720 = pd.read_csv('2014-07-20_userdat.csv', sep=",")
yesterday0720 = 'July 19, 1114'
today0720 = 'July 20, 2014'
count = -1;
for each in data0720['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0720
if 'Today' in each:
time = today0720
data0720.set_value(count, 'registered', time)
data0720 = data0720.dropna()
#0726
data0726 = pd.read_csv('2014-07-26_userdat.csv', sep=",")
yesterday0726 = 'July 25, 1114'
today0726 = 'July 26, 2014'
count = -1;
for each in data0726['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0726
if 'Today' in each:
time = today0726
data0726.set_value(count, 'registered', time)
data0726 = data0726.dropna()
#0730
data0730 = pd.read_csv('2014-07-30_userdat.csv', sep=",")
yesterday0730 = 'July 29, 1114'
today0730 = 'July 30, 2014'
count = -1;
for each in data0730['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0730
if 'Today' in each:
time = today0730
data0730.set_value(count, 'registered', time)
data0730 = data0730.dropna()
df_new = pd.concat([data0705, data0720])
df_new = pd.concat([df_new, data0726])
df_new = pd.concat([df_new, data0730])
df_7 = df_new.drop_duplicates(['username'], keep='last')
In [12]:
# August
#0803
data0803 = pd.read_csv('2014-08-03_userdat.csv', sep=",")
yesterday0803 = 'August 02, 1114'
today0803 = 'August 03, 2014'
count = -1;
for each in data0803['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0803
if 'Today' in each:
time = today0803
data0803.set_value(count, 'registered', time)
data0803 = data0803.dropna()
#0809
data0809 = pd.read_csv('2014-08-09_userdat.csv', sep=",")
yesterday0809 = 'August 08, 1114'
today0809 = 'August 09, 2014'
count = -1;
for each in data0809['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0809
if 'Today' in each:
time = today0809
data0809.set_value(count, 'registered', time)
data0809 = data0809.dropna()
#0814
data0814 = pd.read_csv('2014-08-14_userdat.csv', sep=",")
yesterday0814 = 'August 13, 1114'
today0814 = 'August 14, 2014'
count = -1;
for each in data0814['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0814
if 'Today' in each:
time = today0814
data0814.set_value(count, 'registered', time)
data0814 = data0814.dropna()
#0822
data0822 = pd.read_csv('2014-08-22_userdat.csv', sep=",")
yesterday0822 = 'August 21, 1114'
today0822 = 'August 22, 2014'
count = -1;
for each in data0822['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0822
if 'Today' in each:
time = today0822
data0822.set_value(count, 'registered', time)
data0822 = data0822.dropna()
#0827
data0827 = pd.read_csv('2014-08-27_userdat.csv', sep=",")
yesterday0827 = 'August 26, 1114'
today0827 = 'August 27, 2014'
count = -1;
for each in data0827['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0827
if 'Today' in each:
time = today0827
data0827.set_value(count, 'registered', time)
data0827 = data0827.dropna()
#0831
data0831 = pd.read_csv('2014-08-31_userdat.csv', sep=",")
yesterday0831 = 'August 30, 1114'
today0831 = 'August 31, 2014'
count = -1;
for each in data0831['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0831
if 'Today' in each:
time = today0831
data0831.set_value(count, 'registered', time)
data0831 = data0831.dropna()
df_new = pd.concat([data0803, data0809])
df_new = pd.concat([df_new, data0814])
df_new = pd.concat([df_new, data0822])
df_new = pd.concat([df_new, data0827])
df_new = pd.concat([df_new, data0831])
df_8 = df_new.drop_duplicates(['username'], keep='last')
In [13]:
# September
#0905
data0905 = pd.read_csv('2014-09-05_userdat.csv', sep=",")
yesterday0905 = 'September 04, 1114'
today0905 = 'September 05, 2014'
count = -1;
for each in data0905['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0905
if 'Today' in each:
time = today0905
data0905.set_value(count, 'registered', time)
data0905 = data0905.dropna()
#0911
data0911 = pd.read_csv('2014-09-11_userdat.csv', sep=",")
yesterday0911 = 'September 10, 1114'
today0911 = 'September 11, 2014'
count = -1;
for each in data0911['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0911
if 'Today' in each:
time = today0911
data0911.set_value(count, 'registered', time)
data0911 = data0911.dropna()
#0915
data0915 = pd.read_csv('2014-09-15_userdat.csv', sep=",")
yesterday0915 = 'September 14, 1114'
today0915 = 'September 15, 2014'
count = -1;
for each in data0915['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0915
if 'Today' in each:
time = today0915
data0915.set_value(count, 'registered', time)
data0915 = data0915.dropna()
#0924
data0924 = pd.read_csv('2014-09-24_userdat.csv', sep=",")
yesterday0924 = 'September 23, 1114'
today0924 = 'September 24, 2014'
count = -1;
for each in data0924['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0924
if 'Today' in each:
time = today0924
data0924.set_value(count, 'registered', time)
data0924 = data0924.dropna()
#0926
data0926 = pd.read_csv('2014-09-26_userdat.csv', sep=",")
yesterday0926 = 'September 25, 1114'
today0926 = 'September 26, 2014'
count = -1;
for each in data0926['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0926
if 'Today' in each:
time = today0926
data0926.set_value(count, 'registered', time)
data0926 = data0926.dropna()
#0930
data0930 = pd.read_csv('2014-09-30_userdat.csv', sep=",")
yesterday0930 = 'September 29, 1114'
today0930 = 'September 30, 2014'
count = -1;
for each in data0930['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday0930
if 'Today' in each:
time = today0930
data0930.set_value(count, 'registered', time)
data0930 = data0930.dropna()
df_new = pd.concat([data0905, data0911])
df_new = pd.concat([df_new, data0915])
df_new = pd.concat([df_new, data0924])
df_new = pd.concat([df_new, data0926])
df_new = pd.concat([df_new, data0930])
df_9 = df_new.drop_duplicates(['username'], keep='last')
In [14]:
# October
#1004
data1004 = pd.read_csv('2014-10-04_userdat.csv', sep=",")
yesterday1004 = 'October 03, 1114'
today1004 = 'October 04, 2014'
count = -1;
for each in data1004['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1004
if 'Today' in each:
time = today1004
data1004.set_value(count, 'registered', time)
data1004 = data1004.dropna()
#1008
data1008 = pd.read_csv('2014-10-08_userdat.csv', sep=",")
yesterday1008 = 'October 07, 1114'
today1008 = 'October 08, 2014'
count = -1;
for each in data1008['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1008
if 'Today' in each:
time = today1008
data1008.set_value(count, 'registered', time)
data1008 = data1008.dropna()
#1011
data1011 = pd.read_csv('2014-10-11_userdat.csv', sep=",")
yesterday1011 = 'October 10, 1114'
today1011 = 'October 11, 2014'
count = -1;
for each in data1011['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1011
if 'Today' in each:
time = today1011
data1011.set_value(count, 'registered', time)
data1011 = data1011.dropna()
#1015
data1015 = pd.read_csv('2014-10-15_userdat.csv', sep=",")
yesterday1015 = 'October 14, 1114'
today1015 = 'October 15, 2014'
count = -1;
for each in data1015['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1015
if 'Today' in each:
time = today1015
data1015.set_value(count, 'registered', time)
data1015 = data1015.dropna()
#1017
data1017 = pd.read_csv('2014-10-17_userdat.csv', sep=",")
yesterday1017 = 'October 16, 1114'
today1017 = 'October 17, 2014'
count = -1;
for each in data1017['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1017
if 'Today' in each:
time = today1017
data1017.set_value(count, 'registered', time)
data1017 = data1017.dropna()
#1024
data1024 = pd.read_csv('2014-10-24_userdat.csv', sep=",")
yesterday1024 = 'October 23, 1114'
today1024 = 'October 24, 2014'
count = -1;
for each in data1024['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1024
if 'Today' in each:
time = today1024
data1024.set_value(count, 'registered', time)
data1024 = data1024.dropna()
#1027
data1027 = pd.read_csv('2014-10-27_userdat.csv', sep=",")
yesterday1027 = 'October 26, 1114'
today1027 = 'October 27, 2014'
count = -1;
for each in data1027['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1027
if 'Today' in each:
time = today1027
data1027.set_value(count, 'registered', time)
data1027 = data1027.dropna()
#1031
data1031 = pd.read_csv('2014-10-31_userdat.csv', sep=",")
yesterday1031 = 'October 30, 1114'
today1031 = 'October 31, 2014'
count = -1;
for each in data1031['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1031
if 'Today' in each:
time = today1031
data1031.set_value(count, 'registered', time)
data1031 = data1031.dropna()
df_new = pd.concat([data1004, data1008])
df_new = pd.concat([df_new, data1011])
df_new = pd.concat([df_new, data1015])
df_new = pd.concat([df_new, data1017])
df_new = pd.concat([df_new, data1024])
df_new = pd.concat([df_new, data1027])
df_new = pd.concat([df_new, data1031])
df_10 = df_new.drop_duplicates(['username'], keep='last')
In [15]:
# November
#1101
data1101 = pd.read_csv('2014-11-01_userdat.csv', sep=",")
yesterday1101 = 'October 31, 1114'
today1101 = 'November 01, 2014'
count = -1;
for each in data1101['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1101
if 'Today' in each:
time = today1101
data1101.set_value(count, 'registered', time)
data1101 = data1101.dropna()
#1104
data1104 = pd.read_csv('2014-11-04_userdat.csv', sep=",")
yesterday1104 = 'November 03, 1114'
today1104 = 'November 04, 2014'
count = -1;
for each in data1104['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1104
if 'Today' in each:
time = today1104
data1104.set_value(count, 'registered', time)
data1104 = data1104.dropna()
#1106
data1106 = pd.read_csv('2014-11-06_userdat.csv', sep=",")
yesterday1106 = 'November 05, 1114'
today1106 = 'November 06, 2014'
count = -1;
for each in data1106['registered']:
count = count + 1
if pd.isnull(each) == False:
time = each.rsplit(',',1)[0]
if 'Yesterday' in each:
time = yesterday1106
if 'Today' in each:
time = today1106
data1106.set_value(count, 'registered', time)
data1106 = data1106.dropna()
df_new = pd.concat([data1101, data1104])
df_new = pd.concat([df_new, data1106])
df_11 = df_new.drop_duplicates(['username'], keep='last')
In [ ]:
In [113]:
df = pd.merge(df_1, df_2, on='username')
In [114]:
df = pd.merge(df, df_3, on='username')
In [115]:
df = pd.merge(df, df_4, on='username')
In [116]:
df = pd.merge(df, df_5, on='username')
In [117]:
df = pd.merge(df, df_6, on='username')
In [118]:
df = pd.merge(df, df_7, on='username')
In [119]:
df = pd.merge(df, df_8, on='username')
In [120]:
df = pd.merge(df, df_9, on='username')
In [121]:
df = pd.merge(df, df_10, on='username')
In [122]:
df = pd.merge(df, df_11, on='username')
In [123]:
df
Out[123]:
In [124]:
df.columns = ['username', 'pos1', 'posts1', 'ppd1', 'up1', 'down1',
'registered1', 'pos2', 'posts2', 'ppd2', 'up2', 'down2',
'registered2', 'pos3', 'posts3', 'ppd3', 'up3', 'down3',
'registered3', 'pos4', 'posts4', 'ppd4', 'up4', 'down4',
'registered4', 'pos5', 'posts5', 'ppd5', 'up5', 'down5',
'registered5', 'pos6', 'posts6', 'ppd6', 'up6', 'down6',
'registered6', 'pos7', 'posts7', 'ppd7', 'up7', 'down7',
'registered7', 'pos8', 'posts8', 'ppd8', 'up8', 'down8',
'registered8', 'pos9', 'posts9', 'ppd9', 'up9', 'down9',
'registered9', 'pos10', 'posts10', 'ppd10', 'up10', 'down10',
'registered10', 'pos11', 'posts11', 'ppd11', 'up11', 'down11',
'registered11']
In [ ]:
In [125]:
data = {'username': df['username'],
'position': df['pos1'],
'posts': df['posts1'],
'ppd': df['ppd1'],
'up': df['up1'],
'down': df['down1'],
'registered': df['registered1']}
merge1 = pd.DataFrame(data)
merge1 = merge1[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
merge1
Out[125]:
In [126]:
data = {'username': df['username'],
'position': df['pos2'],
'posts': df['posts2'],
'ppd': df['ppd2'],
'up': df['up2'],
'down': df['down2'],
'registered': df['registered2']}
merge2 = pd.DataFrame(data)
merge2 = merge2[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [127]:
data = {'username': df['username'],
'position': df['pos3'],
'posts': df['posts3'],
'ppd': df['ppd3'],
'up': df['up3'],
'down': df['down3'],
'registered': df['registered3']}
merge3 = pd.DataFrame(data)
merge3 = merge3[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [128]:
data = {'username': df['username'],
'position': df['pos4'],
'posts': df['posts4'],
'ppd': df['ppd4'],
'up': df['up4'],
'down': df['down4'],
'registered': df['registered4']}
merge4 = pd.DataFrame(data)
merge4 = merge4[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [129]:
data = {'username': df['username'],
'position': df['pos5'],
'posts': df['posts5'],
'ppd': df['ppd5'],
'up': df['up5'],
'down': df['down5'],
'registered': df['registered5']}
merge5 = pd.DataFrame(data)
merge5 = merge5[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [130]:
data = {'username': df['username'],
'position': df['pos6'],
'posts': df['posts6'],
'ppd': df['ppd6'],
'up': df['up6'],
'down': df['down6'],
'registered': df['registered6']}
merge6 = pd.DataFrame(data)
merge6 = merge6[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [131]:
data = {'username': df['username'],
'position': df['pos7'],
'posts': df['posts7'],
'ppd': df['ppd7'],
'up': df['up7'],
'down': df['down7'],
'registered': df['registered7']}
merge7 = pd.DataFrame(data)
merge7 = merge7[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [132]:
data = {'username': df['username'],
'position': df['pos8'],
'posts': df['posts8'],
'ppd': df['ppd8'],
'up': df['up8'],
'down': df['down8'],
'registered': df['registered8']}
merge8 = pd.DataFrame(data)
merge8 = merge8[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [133]:
data = {'username': df['username'],
'position': df['pos9'],
'posts': df['posts9'],
'ppd': df['ppd9'],
'up': df['up9'],
'down': df['down9'],
'registered': df['registered9']}
merge9 = pd.DataFrame(data)
merge9 = merge9[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [134]:
data = {'username': df['username'],
'position': df['pos10'],
'posts': df['posts10'],
'ppd': df['ppd10'],
'up': df['up10'],
'down': df['down10'],
'registered': df['registered10']}
merge10 = pd.DataFrame(data)
merge10 = merge10[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
In [135]:
data = {'username': df['username'],
'position': df['pos11'],
'posts': df['posts11'],
'ppd': df['ppd11'],
'up': df['up11'],
'down': df['down11'],
'registered': df['registered11']}
merge11 = pd.DataFrame(data)
merge11 = merge11[['username', 'position', 'posts', 'ppd', 'up', 'down', 'registered']]
merge11
Out[135]:
In [136]:
merge11['score'] = abs(merge11['up'] - merge11['down'])
merge11
Out[136]:
In [137]:
count = -1
for each in merge11['score']:
count = count + 1
order = log(max(each, 1), 10)
merge11.set_value(count, 'score', order)
In [138]:
merge11_sort = merge11.sort_values(by='score', ascending=0)
In [139]:
rank = range(1,6062)
merge11_sort = merge11_sort[merge11_sort.username != 'DoctorClu']
merge11_sort['rank'] = rank
#merge11_sort
In [140]:
top20 = merge11_sort[:20]['username'].values
#merge11_sort[merge11_sort.username == top20[0]]['rank']
In [141]:
merge10['score'] = abs(merge10['up'] - merge10['down'])
count = -1
for each in merge10['score']:
count = count + 1
order = log(max(each, 1), 10)
merge10.set_value(count, 'score', order)
merge10_sort = merge10.sort_values(by='score', ascending=0)
merge10_sort = merge10_sort[merge10_sort.username != 'DoctorClu']
merge10_sort['rank'] = rank
In [142]:
merge10_sort[:50]
Out[142]:
In [143]:
merge9['score'] = abs(merge9['up'] - merge9['down'])
count = -1
for each in merge9['score']:
count = count + 1
order = log(max(each, 1), 10)
merge9.set_value(count, 'score', order)
merge9_sort = merge9.sort_values(by='score', ascending=0)
merge9_sort = merge9_sort[merge9_sort.username != 'DoctorClu']
merge9_sort['rank'] = rank
In [144]:
merge9_sort[:20]
Out[144]:
In [145]:
merge8['score'] = abs(merge8['up'] - merge8['down'])
count = -1
for each in merge8['score']:
count = count + 1
order = log(max(each, 1), 10)
merge8.set_value(count, 'score', order)
merge8_sort = merge8.sort_values(by='score', ascending=0)
merge8_sort[:10]
merge8_sort = merge8_sort[merge8_sort.username != 'DoctorClu']
merge8_sort['rank'] = rank
In [146]:
merge8_sort[:20]
Out[146]:
In [147]:
merge7['score'] = abs(merge7['up'] - merge7['down'])
count = -1
for each in merge7['score']:
count = count + 1
order = log(max(each, 1), 10)
merge7.set_value(count, 'score', order)
merge7_sort = merge7.sort_values(by='score', ascending=0)
merge7_sort = merge7_sort[merge7_sort.username != 'DoctorClu']
merge7_sort['rank'] = rank
In [148]:
merge7_sort[:20]
Out[148]:
In [149]:
merge6['score'] = abs(merge6['up'] - merge6['down'])
count = -1
for each in merge6['score']:
count = count + 1
order = log(max(each, 1), 10)
merge6.set_value(count, 'score', order)
merge6_sort = merge6.sort_values(by='score', ascending=0)
merge6_sort = merge6_sort[merge6_sort.username != 'DoctorClu']
merge6_sort['rank'] = rank
In [150]:
merge6_sort[:30]
Out[150]:
In [151]:
merge5['score'] = abs(merge5['up'] - merge5['down'])
count = -1
for each in merge5['score']:
count = count + 1
order = log(max(each, 1), 10)
merge5.set_value(count, 'score', order)
merge5_sort = merge5.sort_values(by='score', ascending=0)
merge5_sort = merge5_sort[merge5_sort.username != 'DoctorClu']
merge5_sort['rank'] = rank
In [55]:
merge4['score'] = abs(merge4['up'] - merge4['down'])
count = -1
for each in merge4['score']:
count = count + 1
order = log(max(each, 1), 10)
merge4.set_value(count, 'score', order)
merge4_sort = merge4.sort_values(by='score', ascending=0)
merge4_sort = merge4_sort[merge4_sort.username != 'DoctorClu']
merge4_sort['rank'] = rank
In [56]:
merge3['score'] = abs(merge3['up'] - merge3['down'])
count = -1
for each in merge3['score']:
count = count + 1
order = log(max(each, 1), 10)
merge3.set_value(count, 'score', order)
merge3_sort = merge3.sort_values(by='score', ascending=0)
merge3_sort = merge3_sort[merge3_sort.username != 'DoctorClu']
merge3_sort['rank'] = rank
In [57]:
merge2['score'] = abs(merge2['up'] - merge2['down'])
count = -1
for each in merge2['score']:
count = count + 1
order = log(max(each, 1), 10)
merge2.set_value(count, 'score', order)
merge2_sort = merge2.sort_values(by='score', ascending=0)
merge2_sort = merge2_sort[merge2_sort.username != 'DoctorClu']
merge2_sort['rank'] = rank
In [58]:
merge1['score'] = abs(merge1['up'] - merge1['down'])
count = -1
for each in merge1['score']:
count = count + 1
order = log(max(each, 1), 10)
merge1.set_value(count, 'score', order)
merge1_sort = merge1.sort_values(by='score', ascending=0)
merge1_sort = merge1_sort[merge1_sort.username != 'DoctorClu']
merge1_sort['rank'] = rank
In [59]:
#merge11_sort[merge11_sort.username == top20[0]]['rank']
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[0]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[0]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[0]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[0]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[0]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[0]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[0]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[0]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[0]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[0]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[0]]
top1rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [60]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[1]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[1]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[1]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[1]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[1]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[1]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[1]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[1]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[1]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[1]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[1]]
top2rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [61]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[2]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[2]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[2]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[2]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[2]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[2]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[2]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[2]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[2]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[2]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[2]]
top3rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [62]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[3]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[3]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[3]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[3]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[3]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[3]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[3]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[3]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[3]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[3]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[3]]
top4rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [63]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[4]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[4]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[4]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[4]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[4]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[4]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[4]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[4]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[4]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[4]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[4]]
top5rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [64]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[5]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[5]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[5]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[5]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[5]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[5]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[5]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[5]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[5]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[5]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[5]]
top6rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[6]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[6]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[6]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[6]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[6]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[6]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[6]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[6]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[6]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[6]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[6]]
top7rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[7]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[7]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[7]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[7]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[7]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[7]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[7]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[7]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[7]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[7]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[7]]
top8rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[8]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[8]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[8]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[8]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[8]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[8]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[8]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[8]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[8]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[8]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[8]]
top9rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [65]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[9]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[9]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[9]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[9]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[9]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[9]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[9]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[9]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[9]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[9]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[9]]
top10rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[10]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[10]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[10]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[10]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[10]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[10]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[10]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[10]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[10]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[10]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[10]]
top11rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[11]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[11]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[11]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[11]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[11]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[11]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[11]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[11]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[11]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[11]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[11]]
top12rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[12]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[12]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[12]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[12]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[12]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[12]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[12]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[12]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[12]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[12]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[12]]
top13rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [66]:
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[13]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[13]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[13]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[13]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[13]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[13]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[13]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[13]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[13]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[13]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[13]]
top14rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[14]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[14]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[14]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[14]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[14]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[14]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[14]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[14]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[14]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[14]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[14]]
top15rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[15]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[15]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[15]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[15]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[15]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[15]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[15]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[15]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[15]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[15]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[15]]
top16rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[16]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[16]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[16]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[16]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[16]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[16]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[16]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[16]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[16]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[16]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[16]]
top17rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[17]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[17]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[17]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[17]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[17]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[17]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[17]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[17]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[17]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[17]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[17]]
top18rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[18]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[18]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[18]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[18]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[18]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[18]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[18]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[18]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[18]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[18]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[18]]
top19rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['rank'].values[merge1_sort['username'].values == top20[19]]
b = merge2_sort['rank'].values[merge2_sort['username'].values == top20[19]]
c = merge3_sort['rank'].values[merge3_sort['username'].values == top20[19]]
d = merge4_sort['rank'].values[merge4_sort['username'].values == top20[19]]
e = merge5_sort['rank'].values[merge5_sort['username'].values == top20[19]]
f = merge6_sort['rank'].values[merge6_sort['username'].values == top20[19]]
g = merge7_sort['rank'].values[merge7_sort['username'].values == top20[19]]
h = merge8_sort['rank'].values[merge8_sort['username'].values == top20[19]]
i = merge9_sort['rank'].values[merge9_sort['username'].values == top20[19]]
j = merge10_sort['rank'].values[merge10_sort['username'].values == top20[19]]
k = merge11_sort['rank'].values[merge11_sort['username'].values == top20[19]]
top20rank = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [67]:
data = {top20[0]: top1rank, top20[1]: top2rank, top20[2]: top3rank, top20[3]: top4rank, top20[4]: top5rank,
top20[5]: top6rank, top20[6]: top7rank, top20[7]: top8rank, top20[8]: top9rank, top20[9]: top10rank,
top20[10]: top11rank, top20[11]: top12rank, top20[12]: top13rank, top20[13]: top14rank, top20[14]: top15rank,
top20[15]: top16rank, top20[16]: top17rank, top20[17]: top18rank, top20[18]: top19rank, top20[19]: top20rank}
totalRank = pd.DataFrame(data)
In [68]:
totalRank['date'] = {'201401', '201402', '201403', '201404', '201405', '201406', '201407', '201408', '201409', '201410', '201411'}
In [69]:
totalRank.to_csv('totalrank.tsv', sep='\t', encoding='utf-8')
In [70]:
totalRank = totalRank.set_index('date')
In [71]:
top20
Out[71]:
In [72]:
totalRank
Out[72]:
In [73]:
top3rank
Out[73]:
In [74]:
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[0]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[0]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[0]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[0]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[0]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[0]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[0]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[0]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[0]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[0]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[0]]
top1score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[1]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[1]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[1]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[1]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[1]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[1]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[1]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[1]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[1]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[1]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[1]]
top2score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[2]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[2]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[2]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[2]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[2]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[2]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[2]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[2]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[2]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[2]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[2]]
top3score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[3]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[3]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[3]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[3]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[3]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[3]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[3]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[3]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[3]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[3]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[3]]
top4score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[4]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[4]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[4]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[4]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[4]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[4]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[4]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[4]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[4]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[4]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[4]]
top5score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[5]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[5]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[5]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[5]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[5]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[5]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[5]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[5]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[5]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[5]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[5]]
top6score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[6]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[6]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[6]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[6]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[6]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[6]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[6]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[6]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[6]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[6]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[6]]
top7score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[7]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[7]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[7]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[7]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[7]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[7]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[7]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[7]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[7]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[7]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[7]]
top8score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[8]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[8]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[8]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[8]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[8]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[8]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[8]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[8]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[8]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[8]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[8]]
top9score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[9]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[9]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[9]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[9]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[9]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[9]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[9]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[9]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[9]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[9]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[9]]
top10score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[10]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[10]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[10]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[10]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[10]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[10]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[10]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[10]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[10]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[10]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[10]]
top11score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[11]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[11]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[11]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[11]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[11]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[11]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[11]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[11]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[11]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[11]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[11]]
top12score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[12]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[12]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[12]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[12]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[12]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[12]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[12]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[12]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[12]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[12]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[12]]
top13score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[13]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[13]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[13]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[13]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[13]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[13]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[13]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[13]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[13]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[13]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[13]]
top14score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[14]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[14]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[14]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[14]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[14]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[14]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[14]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[14]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[14]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[14]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[14]]
top15score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[15]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[15]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[15]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[15]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[15]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[15]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[15]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[15]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[15]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[15]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[15]]
top16score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[16]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[16]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[16]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[16]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[16]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[16]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[16]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[16]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[16]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[16]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[16]]
top17score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[17]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[17]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[17]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[17]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[17]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[17]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[17]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[17]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[17]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[17]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[17]]
top18score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[18]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[18]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[18]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[18]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[18]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[18]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[18]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[18]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[18]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[18]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[18]]
top19score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
a = merge1_sort['score'].values[merge1_sort['username'].values == top20[19]]
b = merge2_sort['score'].values[merge2_sort['username'].values == top20[19]]
c = merge3_sort['score'].values[merge3_sort['username'].values == top20[19]]
d = merge4_sort['score'].values[merge4_sort['username'].values == top20[19]]
e = merge5_sort['score'].values[merge5_sort['username'].values == top20[19]]
f = merge6_sort['score'].values[merge6_sort['username'].values == top20[19]]
g = merge7_sort['score'].values[merge7_sort['username'].values == top20[19]]
h = merge8_sort['score'].values[merge8_sort['username'].values == top20[19]]
i = merge9_sort['score'].values[merge9_sort['username'].values == top20[19]]
j = merge10_sort['score'].values[merge10_sort['username'].values == top20[19]]
k = merge11_sort['score'].values[merge11_sort['username'].values == top20[19]]
top20score = np.concatenate([a,b,c,d,e,f,g,h,i,j,k])
In [75]:
data = {top20[0]: top1score, top20[1]: top2score, top20[2]: top3score, top20[3]: top4score, top20[4]: top5score,
top20[5]: top6score, top20[6]: top7score, top20[7]: top8score, top20[8]: top9score, top20[9]: top10score,
top20[10]: top11score, top20[11]: top12score, top20[12]: top13score, top20[13]: top14score, top20[14]: top15score,
top20[15]: top16score, top20[16]: top17score, top20[17]: top18score, top20[18]: top19score, top20[19]: top20score}
totalScore = pd.DataFrame(data)
In [76]:
totalScore.to_csv('totalscore.tsv', sep='\t', encoding='utf-8')
In [77]:
memberType = df_1.position.unique()
In [78]:
member1 = [] # newbie
member1.append(df_1[df_1['position'] == memberType[1]].count().values[0]) # jan newbie
member1.append(df_2[df_2['position'] == memberType[1]].count().values[0]) # feb newbie
member1.append(df_3[df_3['position'] == memberType[1]].count().values[0]) # jan newbie
member1.append(df_4[df_4['position'] == memberType[1]].count().values[0]) # feb newbie
member1.append(df_5[df_5['position'] == memberType[1]].count().values[0]) # jan newbie
member1.append(df_6[df_6['position'] == memberType[1]].count().values[0]) # feb newbie
member1.append(df_7[df_7['position'] == memberType[1]].count().values[0]) # jan newbie
member1.append(df_8[df_8['position'] == memberType[1]].count().values[0]) # feb newbie
member1.append(df_9[df_9['position'] == memberType[1]].count().values[0]) # jan newbie
member1.append(df_10[df_10['position'] == memberType[1]].count().values[0]) # jan newbie
member1.append(df_11[df_11['position'] == memberType[1]].count().values[0]) # feb newbie
member2 = [] # jr. member
member2.append(df_1[df_1['position'] == memberType[0]].count().values[0]) # jan newbie
member2.append(df_2[df_2['position'] == memberType[0]].count().values[0]) # feb newbie
member2.append(df_3[df_3['position'] == memberType[0]].count().values[0]) # jan newbie
member2.append(df_4[df_4['position'] == memberType[0]].count().values[0]) # feb newbie
member2.append(df_5[df_5['position'] == memberType[0]].count().values[0]) # jan newbie
member2.append(df_6[df_6['position'] == memberType[0]].count().values[0]) # feb newbie
member2.append(df_7[df_7['position'] == memberType[0]].count().values[0]) # jan newbie
member2.append(df_8[df_8['position'] == memberType[0]].count().values[0]) # feb newbie
member2.append(df_9[df_9['position'] == memberType[0]].count().values[0]) # jan newbie
member2.append(df_10[df_10['position'] == memberType[0]].count().values[0]) # jan newbie
member2.append(df_11[df_11['position'] == memberType[0]].count().values[0]) # feb newbie
#member1m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member2m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member3m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member4m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member5m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member6m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member7m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member8m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member9m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
#member10m1 = df_1[df_1['position'] == memberType[0]].count().values[0]
In [79]:
member1
Out[79]:
In [80]:
member2
Out[80]:
In [81]:
memberType
Out[81]:
In [82]:
member3 = [] # FULL. member
member3.append(df_1[df_1['position'] == memberType[3]].count().values[0]) # jan newbie
member3.append(df_2[df_2['position'] == memberType[3]].count().values[0]) # feb newbie
member3.append(df_3[df_3['position'] == memberType[3]].count().values[0]) # jan newbie
member3.append(df_4[df_4['position'] == memberType[3]].count().values[0]) # feb newbie
member3.append(df_5[df_5['position'] == memberType[3]].count().values[0]) # jan newbie
member3.append(df_6[df_6['position'] == memberType[3]].count().values[0]) # feb newbie
member3.append(df_7[df_7['position'] == memberType[3]].count().values[0]) # jan newbie
member3.append(df_8[df_8['position'] == memberType[3]].count().values[0]) # feb newbie
member3.append(df_9[df_9['position'] == memberType[3]].count().values[0]) # jan newbie
member3.append(df_10[df_10['position'] == memberType[3]].count().values[0]) # jan newbie
member3.append(df_11[df_11['position'] == memberType[3]].count().values[0]) # feb newbie
In [83]:
member3
Out[83]:
In [84]:
member4 = [] # jr. member
member4.append(df_1[df_1['position'] == memberType[5]].count().values[0]) # jan newbie
member4.append(df_2[df_2['position'] == memberType[5]].count().values[0]) # feb newbie
member4.append(df_3[df_3['position'] == memberType[5]].count().values[0]) # jan newbie
member4.append(df_4[df_4['position'] == memberType[5]].count().values[0]) # feb newbie
member4.append(df_5[df_5['position'] == memberType[5]].count().values[0]) # jan newbie
member4.append(df_6[df_6['position'] == memberType[5]].count().values[0]) # feb newbie
member4.append(df_7[df_7['position'] == memberType[5]].count().values[0]) # jan newbie
member4.append(df_8[df_8['position'] == memberType[5]].count().values[0]) # feb newbie
member4.append(df_9[df_9['position'] == memberType[5]].count().values[0]) # jan newbie
member4.append(df_10[df_10['position'] == memberType[5]].count().values[0]) # jan newbie
member4.append(df_11[df_11['position'] == memberType[5]].count().values[0]) # feb newbie
In [85]:
member4
Out[85]:
In [86]:
member5 = [] # jr. member
member5.append(df_1[df_1['position'] == memberType[7]].count().values[0]) # jan newbie
member5.append(df_2[df_2['position'] == memberType[7]].count().values[0]) # feb newbie
member5.append(df_3[df_3['position'] == memberType[7]].count().values[0]) # jan newbie
member5.append(df_4[df_4['position'] == memberType[7]].count().values[0]) # feb newbie
member5.append(df_5[df_5['position'] == memberType[7]].count().values[0]) # jan newbie
member5.append(df_6[df_6['position'] == memberType[7]].count().values[0]) # feb newbie
member5.append(df_7[df_7['position'] == memberType[7]].count().values[0]) # jan newbie
member5.append(df_8[df_8['position'] == memberType[7]].count().values[0]) # feb newbie
member5.append(df_9[df_9['position'] == memberType[7]].count().values[0]) # jan newbie
member5.append(df_10[df_10['position'] == memberType[7]].count().values[0]) # jan newbie
member5.append(df_11[df_11['position'] == memberType[7]].count().values[0]) # feb newbie
In [87]:
member5
Out[87]:
In [88]:
member6 = [] # jr. member
member6.append(df_1[df_1['position'] == memberType[2]].count().values[0]) # jan newbie
member6.append(df_2[df_2['position'] == memberType[2]].count().values[0]) # feb newbie
member6.append(df_3[df_3['position'] == memberType[2]].count().values[0]) # jan newbie
member6.append(df_4[df_4['position'] == memberType[2]].count().values[0]) # feb newbie
member6.append(df_5[df_5['position'] == memberType[2]].count().values[0]) # jan newbie
member6.append(df_6[df_6['position'] == memberType[2]].count().values[0]) # feb newbie
member6.append(df_7[df_7['position'] == memberType[2]].count().values[0]) # jan newbie
member6.append(df_8[df_8['position'] == memberType[2]].count().values[0]) # feb newbie
member6.append(df_9[df_9['position'] == memberType[2]].count().values[0]) # jan newbie
member6.append(df_10[df_10['position'] == memberType[2]].count().values[0]) # jan newbie
member6.append(df_11[df_11['position'] == memberType[2]].count().values[0]) # feb newbie
In [93]:
member6
Out[93]:
In [90]:
In [91]:
other1 = 0
other1 += df_1[df_1['position'] == memberType[4]].count().values[0]
other1 += df_1[df_1['position'] == memberType[6]].count().values[0]
other1 += df_1[df_1['position'] == memberType[8]].count().values[0]
other1 += df_1[df_1['position'] == memberType[9]].count().values[0]
other1 += df_1[df_1['position'] == memberType[10]].count().values[0]
In [92]:
other1
Out[92]:
In [165]:
other2 = 0
other2 += df_2[df_2['position'] == memberType[4]].count().values[0]
other2 += df_2[df_2['position'] == memberType[6]].count().values[0]
other2 += df_2[df_2['position'] == memberType[8]].count().values[0]
other2 += df_2[df_2['position'] == memberType[9]].count().values[0]
other2 += df_2[df_2['position'] == memberType[10]].count().values[0]
other2
Out[165]:
In [167]:
other3 = 0
other3 += df_3[df_3['position'] == memberType[4]].count().values[0]
other3 += df_3[df_3['position'] == memberType[6]].count().values[0]
other3 += df_3[df_3['position'] == memberType[8]].count().values[0]
other3 += df_3[df_3['position'] == memberType[9]].count().values[0]
other3 += df_3[df_3['position'] == memberType[10]].count().values[0]
other3
Out[167]:
In [169]:
other4 = 0
other4 += df_4[df_4['position'] == memberType[4]].count().values[0]
other4 += df_4[df_4['position'] == memberType[6]].count().values[0]
other4 += df_4[df_4['position'] == memberType[8]].count().values[0]
other4 += df_4[df_4['position'] == memberType[9]].count().values[0]
other4 += df_4[df_4['position'] == memberType[10]].count().values[0]
other4
Out[169]:
In [170]:
other5 = 0
other5 += df_5[df_5['position'] == memberType[4]].count().values[0]
other5 += df_5[df_5['position'] == memberType[6]].count().values[0]
other5 += df_5[df_5['position'] == memberType[8]].count().values[0]
other5 += df_5[df_5['position'] == memberType[9]].count().values[0]
other5 += df_5[df_5['position'] == memberType[10]].count().values[0]
other5
Out[170]:
In [181]:
other6 = 0
other6 += df_6[df_6['position'] == memberType[4]].count().values[0]
other6 += df_6[df_6['position'] == memberType[6]].count().values[0]
other6 += df_6[df_6['position'] == memberType[8]].count().values[0]
other6 += df_6[df_6['position'] == memberType[9]].count().values[0]
other6 += df_6[df_6['position'] == memberType[10]].count().values[0]
other6
Out[181]:
In [182]:
other7 = 0
other7 += df_7[df_7['position'] == memberType[4]].count().values[0]
other7 += df_7[df_7['position'] == memberType[6]].count().values[0]
other7 += df_7[df_7['position'] == memberType[8]].count().values[0]
other7 += df_7[df_7['position'] == memberType[9]].count().values[0]
other7 += df_7[df_7['position'] == memberType[10]].count().values[0]
other7
Out[182]:
In [183]:
other8 = 0
other8 += df_8[df_8['position'] == memberType[4]].count().values[0]
other8 += df_8[df_8['position'] == memberType[6]].count().values[0]
other8 += df_8[df_8['position'] == memberType[8]].count().values[0]
other8 += df_8[df_8['position'] == memberType[9]].count().values[0]
other8 += df_8[df_8['position'] == memberType[10]].count().values[0]
other8
Out[183]:
In [184]:
other9 = 0
other9 += df_9[df_9['position'] == memberType[4]].count().values[0]
other9 += df_9[df_9['position'] == memberType[6]].count().values[0]
other9 += df_9[df_9['position'] == memberType[8]].count().values[0]
other9 += df_9[df_9['position'] == memberType[9]].count().values[0]
other9 += df_9[df_9['position'] == memberType[10]].count().values[0]
other9
Out[184]:
In [185]:
other10 = 0
other10 += df_10[df_10['position'] == memberType[4]].count().values[0]
other10 += df_10[df_10['position'] == memberType[6]].count().values[0]
other10 += df_10[df_10['position'] == memberType[8]].count().values[0]
other10 += df_10[df_10['position'] == memberType[9]].count().values[0]
other10 += df_10[df_10['position'] == memberType[10]].count().values[0]
other10
Out[185]:
In [186]:
other11 = 0
other11 += df_11[df_11['position'] == memberType[4]].count().values[0]
other11 += df_11[df_11['position'] == memberType[6]].count().values[0]
other11 += df_11[df_11['position'] == memberType[8]].count().values[0]
other11 += df_11[df_11['position'] == memberType[9]].count().values[0]
other11 += df_11[df_11['position'] == memberType[10]].count().values[0]
other11
Out[186]:
In [94]:
# add month recorded in prep for concatenation of all data
#merge1['monthRecorded'] = 'January'
#merge2['monthRecorded'] = 'February'
#merge3['monthRecorded'] = 'March'
#merge4['monthRecorded'] = 'April'
#merge5['monthRecorded'] = 'May'
#merge6['monthRecorded'] = 'June'
#merge7['monthRecorded'] = 'July'
#merge8['monthRecorded'] = 'August'
#merge9['monthRecorded'] = 'September'
#merge10['monthRecorded'] = 'October'
#merge11['monthRecorded'] = 'November'
# append all records to each other
result = merge1.append(merge2).append(merge3).append(merge4).append(merge5).append(merge6).append(merge7).append(merge8).append(merge9).append(merge10).append(merge11)
result['position'].unique()
Out[94]:
In [95]:
# Remove Newbies, Jr. Members, and irregular users "DoctorClu" and "Cirrus".
# Also perform anonymization of usernames using a dictionary so values are consistent across months.
# create a new set to store unique usernames
uniqueUsername = set()
for username in result.username :
uniqueUsername.add(username)
# create a dictionary that contains mappings between unique usernames as keys and anonymized strings as values
anonUserNames = {}
for i, username in enumerate(uniqueUsername):
#print(i, username)
anonUserNames[username] = "User" + str(i)
# anonymization function
def anonymize(originalUsername):
return anonUserNames[originalUsername]
merges = [merge1, merge2, merge3, merge4, merge5, merge6, merge7, merge8, merge9, merge10, merge11]
for i in xrange(len(merges)):
merges[i] = merges[i][merges[i].position != "Newbie"]
merges[i] = merges[i][merges[i].position != "Jr. Member"]
merges[i] = merges[i][merges[i].username != "DoctorClu"]
merges[i] = merges[i][merges[i].username != "Cirrus"]
# perform anonymization of usernames
merges[i]['username'] = merges[i]['username'].apply(anonymize)
In [98]:
top20result = merge11_sort[:20]
In [108]:
uniqueUsername = set()
for username in top20result.username :
uniqueUsername.add(username)
anonUserNames = {}
for i, username in enumerate(uniqueUsername):
anonUserNames[username] = "User" + str(i)
def anonymize(originalUsername):
return anonUserNames[originalUsername]
top20result['username'] = top20result['username'].apply(anonymize)
In [109]:
top20result
Out[109]:
In [112]:
Out[112]:
In [ ]: