In [1]:
import pandas as pd
import numpy as np
In [2]:
origin_df = pd.DataFrame.from_csv("networks/real2.csv")
# ceil = 1000
In [3]:
source = set(origin_df.index.unique())
target = set(origin_df['1'].unique())
nodes = source | target
len(nodes)
Out[3]:
In [4]:
nodes = list(nodes)[:ceil]
len_nodes = len(nodes)
In [5]:
len_nodes
Out[5]:
In [11]:
matrix = np.zeros((len_nodes, len_nodes))
new_df = pd.DataFrame(matrix, index=nodes, columns=nodes, dtype=np.int32)
In [15]:
for i in range(len_nodes):
# filtra casos que não estão no index
if i in source:
outgoing_nodes = np.array(origin_df.ix[i, '1']).flatten()
for j in [var for var in outgoing_nodes if var < ceil]:
new_df.ix[i, j] = 1
In [16]:
# df2.ix[:, 102].tail();
In [21]:
np.bincount(new_df.ix[:, 389] > 0)
Out[21]:
In [142]:
df2.to_csv('input_test.csv')
In [ ]:
In [99]:
df2[df2.ix[:, 370] > 0]
Out[99]:
In [105]:
df2[df2.ix[:, 369] > 0]
Out[105]:
In [110]:
np.bincount(df2.sum(axis=0))
Out[110]:
In [111]:
import pickle
In [112]:
a = pd.Series([1,2,3,3])
In [113]:
a.to_pickle('test2.p')
In [ ]:
with open('filename.pickle', 'wb') as handle:
pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('filename.pickle', 'rb') as handle:
b = pickle.load(handle)
In [122]:
dir(pickle);
In [121]:
b = pickle.loads(open('test2.p', 'rb'))
In [123]:
with open('a.p', 'wb') as handle:
pickle.dump(a, handle)
with open('a.p', 'rb') as handle:
b = pickle.load(handle)
In [143]:
with open('page_rank_results.p', 'rb') as handle:
pr = pickle.load(handle)
pr.order(ascending=False)
Out[143]:
In [129]:
import pandas
import numpy
def to_matrix(origin_df):
# get unique nodes
source = set(origin_df.index.unique())
target = set(origin_df['1'].unique())
nodes = source | target
# create zero matrix and init dataframe
len_nodes = len(nodes)
zero_matrix = numpy.zeros((len_nodes, len_nodes))
new_df = pandas.DataFrame(zero_matrix, index=nodes, columns=nodes, dtype=numpy.int32)
# itera e cria a matriz
ceil= 1000
for i in range(len_nodes):
# filtra casos que nao estao no index
if i in source:
# cria a lista com os target/outgoing
target = numpy.array(origin_df.ix[i, '1']).flatten()
# itero pelo target
for j in [var for var in target if var < ceil]:
# atualizo o valor na matriz
new_df.ix[i, j] = 1
return new_df
In [ ]:
mt = to_matrix(pd.DataFrame.from_csv("networks/real2.csv"))
In [32]:
test_df = pd.read_csv('input_test.csv',dtype=np.int32, index_col=0)
In [33]:
test_df.head()
Out[33]:
In [34]:
test_df.dtypes
Out[34]:
In [36]:
new_df.columns = new_df.columns
In [41]:
new_df.columns.value()
In [ ]: