In [1]:
import pandas as pd
import math
df = pd.read_csv("pearsons_corr.csv", encoding='utf-8', index_col=0)

In [2]:
df.head()


Out[2]:
神明的一天世界(God's One Day World) ! That Bastard Is Trying To Steal Our Gold ! "Glow Ball" - The billiard puzzle game #SelfieTennis #SkiJump #killallzombies $1 Ride 'n Verlore Verstand - Arcane Raise - -- none -- [Not currently available] ... ファイナルファンタジーXIV: 新生エオルゼア (JP version) 丛林守望者(Ranger of the jungle) 乖離性ミリオンアーサー VR 侠客风云传(Tale of Wuxia) 大海战 Navy Field IV 天使帝國四《Empire of Angels IV》 画境(Picturesque) VR 神楽道中記(KaguraDouchuuki) 軒轅劍外傳穹之扉(The Gate of Firmament) 軒轅劍外傳穹之扉音樂精選集(Sound Collection of Xuan-Yuan Sword EX:The Gate of Firmament)
神明的一天世界(God's One Day World) 1.000000 0.446981 0.255948 -0.001260 -0.001202 0.120596 0.286055 0.511553 0.168028 0.362105 ... -0.001520 0.648984 -0.001202 0.066986 0.149641 0.102985 0.484835 0.198169 0.082066 0.012757
! That Bastard Is Trying To Steal Our Gold ! 0.446981 1.000000 0.318198 -0.001043 -0.000994 0.166967 0.287437 0.514008 0.189452 0.414878 ... -0.001258 0.499728 -0.000994 0.039776 0.145112 0.030090 0.473657 0.108262 0.031560 0.016011
"Glow Ball" - The billiard puzzle game 0.255948 0.318198 1.000000 -0.002209 -0.002107 0.057979 0.249759 0.258954 0.237199 0.212174 ... -0.002665 0.263664 -0.002107 0.010296 0.187531 0.019505 0.220938 0.050064 0.010558 0.005421
#SelfieTennis -0.001260 -0.001043 -0.002209 1.000000 0.285861 -0.000284 -0.002649 0.022147 -0.001173 -0.000772 ... -0.000315 -0.001093 0.285861 -0.000568 -0.001121 -0.000377 -0.000902 -0.000273 0.033480 0.068853
#SkiJump -0.001202 -0.000994 -0.002107 0.285861 1.000000 -0.000271 -0.002526 -0.000980 -0.001118 -0.000736 ... 0.078781 -0.001042 1.000000 -0.000541 -0.001069 -0.000360 -0.000860 -0.000260 0.035180 0.072246

5 rows × 12950 columns


In [5]:
games_nodes_size = {}
with open("nodes_weights.csv", encoding='utf-8') as f:
    for line in f.readlines():
        game, _, weight = line.split("\t")
        games_nodes_size[game] = weight
print(len(games_nodes_size))


12951

In [6]:
#output_df=pd.DataFrame()
file = open("pearsons_cleaned_for_gephi_squared.csv", 'w', encoding='utf')
file.write("Source\tTarget\tWeight\n")
game_number=0
for source, row in df.iterrows():
    mean = row.mean()
    std = row.std()
    treshold=mean+2*std
    
    #print(i,"/12950", source, "mean:",str(mean),"std",str(std),"treshold:",str(treshold))
    if (game_number%1000 == 0): print(game_number)
    #print(i,"/12950")
    #game_df=pd.DataFrame()
    
    edges_t = {}
    edges_w = {}
    
    for target, weight in row.iteritems():
        if ((source==target)|(weight<treshold)):
            continue
        edges_t[target] = target
        edges_w[target] = weight ** 2
    
    sorted_targets = sorted(edges_t, key=edges_w.__getitem__, reverse=True)
    
    iter_range = min(len(edges_t),1*float(games_nodes_size[source]))
    for i in range(int(iter_range)):
        #print(source+"\t"+edges_t[sorted_targets[i]]+"\t"+str(edges_w[sorted_targets[i]])+"\n")
        file.write(source+"\t"+edges_t[sorted_targets[i]]+"\t"+str(edges_w[sorted_targets[i]])+"\n")
    #break
    #file.write(source+","+target+","+str(weight)+"\n")
        #series=pd.Series({'Source':source, 'Target':target, 'Weight':weight})
        #game_df = pd.concat([game_df,series])
        #game_df = game_df.append(series, ignore_index=True)
    #output_df = pd.concat([output_df, game_df])
    game_number+=1
file.close()
#output_df.head()


0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000

In [9]:
file.close()

In [ ]: