notebook.community

Edit and run



In [1]:

    
import pandas as pd
import math
df = pd.read_csv("pearsons_corr.csv", encoding='utf-8', index_col=0)



In [2]:

    
df.head()









    Out[2]:






  
    
      
      神明的一天世界(God's One Day World)
      ! That Bastard Is Trying To Steal Our Gold !
      "Glow Ball" - The billiard puzzle game
      #SelfieTennis
      #SkiJump
      #killallzombies
      $1 Ride
      'n Verlore Verstand
      - Arcane Raise -
      -- none -- [Not currently available]
      ...
      ファイナルファンタジーXIV: 新生エオルゼア (JP version)
      丛林守望者（Ranger of the jungle）
      乖離性ミリオンアーサー VR
      侠客风云传(Tale of Wuxia)
      大海战 Navy Field IV
      天使帝國四《Empire of Angels IV》
      画境(Picturesque) VR
      神楽道中記(KaguraDouchuuki)
      軒轅劍外傳穹之扉(The Gate of Firmament)
      軒轅劍外傳穹之扉音樂精選集(Sound Collection of Xuan-Yuan Sword EX：The Gate of Firmament)
    
  
  
    
      神明的一天世界(God's One Day World)
      1.000000
      0.446981
      0.255948
      -0.001260
      -0.001202
      0.120596
      0.286055
      0.511553
      0.168028
      0.362105
      ...
      -0.001520
      0.648984
      -0.001202
      0.066986
      0.149641
      0.102985
      0.484835
      0.198169
      0.082066
      0.012757
    
    
      ! That Bastard Is Trying To Steal Our Gold !
      0.446981
      1.000000
      0.318198
      -0.001043
      -0.000994
      0.166967
      0.287437
      0.514008
      0.189452
      0.414878
      ...
      -0.001258
      0.499728
      -0.000994
      0.039776
      0.145112
      0.030090
      0.473657
      0.108262
      0.031560
      0.016011
    
    
      "Glow Ball" - The billiard puzzle game
      0.255948
      0.318198
      1.000000
      -0.002209
      -0.002107
      0.057979
      0.249759
      0.258954
      0.237199
      0.212174
      ...
      -0.002665
      0.263664
      -0.002107
      0.010296
      0.187531
      0.019505
      0.220938
      0.050064
      0.010558
      0.005421
    
    
      #SelfieTennis
      -0.001260
      -0.001043
      -0.002209
      1.000000
      0.285861
      -0.000284
      -0.002649
      0.022147
      -0.001173
      -0.000772
      ...
      -0.000315
      -0.001093
      0.285861
      -0.000568
      -0.001121
      -0.000377
      -0.000902
      -0.000273
      0.033480
      0.068853
    
    
      #SkiJump
      -0.001202
      -0.000994
      -0.002107
      0.285861
      1.000000
      -0.000271
      -0.002526
      -0.000980
      -0.001118
      -0.000736
      ...
      0.078781
      -0.001042
      1.000000
      -0.000541
      -0.001069
      -0.000360
      -0.000860
      -0.000260
      0.035180
      0.072246
    
  

5 rows × 12950 columns



In [5]:

    
games_nodes_size = {}
with open("nodes_weights.csv", encoding='utf-8') as f:
    for line in f.readlines():
        game, _, weight = line.split("\t")
        games_nodes_size[game] = weight
print(len(games_nodes_size))



In [6]:

    
#output_df=pd.DataFrame()
file = open("pearsons_cleaned_for_gephi_squared.csv", 'w', encoding='utf')
file.write("Source\tTarget\tWeight\n")
game_number=0
for source, row in df.iterrows():
    mean = row.mean()
    std = row.std()
    treshold=mean+2*std
    
    #print(i,"/12950", source, "mean:",str(mean),"std",str(std),"treshold:",str(treshold))
    if (game_number%1000 == 0): print(game_number)
    #print(i,"/12950")
    #game_df=pd.DataFrame()
    
    edges_t = {}
    edges_w = {}
    
    for target, weight in row.iteritems():
        if ((source==target)|(weight<treshold)):
            continue
        edges_t[target] = target
        edges_w[target] = weight ** 2
    
    sorted_targets = sorted(edges_t, key=edges_w.__getitem__, reverse=True)
    
    iter_range = min(len(edges_t),1*float(games_nodes_size[source]))
    for i in range(int(iter_range)):
        #print(source+"\t"+edges_t[sorted_targets[i]]+"\t"+str(edges_w[sorted_targets[i]])+"\n")
        file.write(source+"\t"+edges_t[sorted_targets[i]]+"\t"+str(edges_w[sorted_targets[i]])+"\n")
    #break
    #file.write(source+","+target+","+str(weight)+"\n")
        #series=pd.Series({'Source':source, 'Target':target, 'Weight':weight})
        #game_df = pd.concat([game_df,series])
        #game_df = game_df.append(series, ignore_index=True)
    #output_df = pd.concat([output_df, game_df])
    game_number+=1
file.close()
#output_df.head()



In [9]:

    
file.close()



In [ ]:

	神明的一天世界(God's One Day World)	! That Bastard Is Trying To Steal Our Gold !	"Glow Ball" - The billiard puzzle game	#SelfieTennis	#SkiJump	#killallzombies	$1 Ride	'n Verlore Verstand	- Arcane Raise -	-- none -- [Not currently available]	...	ファイナルファンタジーXIV: 新生エオルゼア (JP version)	丛林守望者（Ranger of the jungle）	乖離性ミリオンアーサー VR	侠客风云传(Tale of Wuxia)	大海战 Navy Field IV	天使帝國四《Empire of Angels IV》	画境(Picturesque) VR	神楽道中記(KaguraDouchuuki)	軒轅劍外傳穹之扉(The Gate of Firmament)	軒轅劍外傳穹之扉音樂精選集(Sound Collection of Xuan-Yuan Sword EX：The Gate of Firmament)
神明的一天世界(God's One Day World)	1.000000	0.446981	0.255948	-0.001260	-0.001202	0.120596	0.286055	0.511553	0.168028	0.362105	...	-0.001520	0.648984	-0.001202	0.066986	0.149641	0.102985	0.484835	0.198169	0.082066	0.012757
! That Bastard Is Trying To Steal Our Gold !	0.446981	1.000000	0.318198	-0.001043	-0.000994	0.166967	0.287437	0.514008	0.189452	0.414878	...	-0.001258	0.499728	-0.000994	0.039776	0.145112	0.030090	0.473657	0.108262	0.031560	0.016011
"Glow Ball" - The billiard puzzle game	0.255948	0.318198	1.000000	-0.002209	-0.002107	0.057979	0.249759	0.258954	0.237199	0.212174	...	-0.002665	0.263664	-0.002107	0.010296	0.187531	0.019505	0.220938	0.050064	0.010558	0.005421
#SelfieTennis	-0.001260	-0.001043	-0.002209	1.000000	0.285861	-0.000284	-0.002649	0.022147	-0.001173	-0.000772	...	-0.000315	-0.001093	0.285861	-0.000568	-0.001121	-0.000377	-0.000902	-0.000273	0.033480	0.068853
#SkiJump	-0.001202	-0.000994	-0.002107	0.285861	1.000000	-0.000271	-0.002526	-0.000980	-0.001118	-0.000736	...	0.078781	-0.001042	1.000000	-0.000541	-0.001069	-0.000360	-0.000860	-0.000260	0.035180	0.072246