In [1]:
import pandas as pd
import math
df = pd.read_csv("pearsons_corr.csv", encoding='utf-8', index_col=0)
In [2]:
df.head()
Out[2]:
In [5]:
games_nodes_size = {}
with open("nodes_weights.csv", encoding='utf-8') as f:
for line in f.readlines():
game, _, weight = line.split("\t")
games_nodes_size[game] = weight
print(len(games_nodes_size))
In [6]:
#output_df=pd.DataFrame()
file = open("pearsons_cleaned_for_gephi_squared.csv", 'w', encoding='utf')
file.write("Source\tTarget\tWeight\n")
game_number=0
for source, row in df.iterrows():
mean = row.mean()
std = row.std()
treshold=mean+2*std
#print(i,"/12950", source, "mean:",str(mean),"std",str(std),"treshold:",str(treshold))
if (game_number%1000 == 0): print(game_number)
#print(i,"/12950")
#game_df=pd.DataFrame()
edges_t = {}
edges_w = {}
for target, weight in row.iteritems():
if ((source==target)|(weight<treshold)):
continue
edges_t[target] = target
edges_w[target] = weight ** 2
sorted_targets = sorted(edges_t, key=edges_w.__getitem__, reverse=True)
iter_range = min(len(edges_t),1*float(games_nodes_size[source]))
for i in range(int(iter_range)):
#print(source+"\t"+edges_t[sorted_targets[i]]+"\t"+str(edges_w[sorted_targets[i]])+"\n")
file.write(source+"\t"+edges_t[sorted_targets[i]]+"\t"+str(edges_w[sorted_targets[i]])+"\n")
#break
#file.write(source+","+target+","+str(weight)+"\n")
#series=pd.Series({'Source':source, 'Target':target, 'Weight':weight})
#game_df = pd.concat([game_df,series])
#game_df = game_df.append(series, ignore_index=True)
#output_df = pd.concat([output_df, game_df])
game_number+=1
file.close()
#output_df.head()
In [9]:
file.close()
In [ ]: