In [ ]:
import sframe as sf

In [ ]:
import sframe as sf
from gensim import corpora, models, similarities
from gensim.utils import simple_preprocess

class MyCorpus(object):
    
    def __init__(self,path):
        self.data = sf.SFrame.read_csv(path,sep="\t",header=False)
        
    def __iter__(self):
        for sent in self.data['X1']:
            yield simple_preprocess(sent)
            
    def klass(self):
        return self.data['X2']

In [ ]:
tweets = MyCorpus("Bases/Prontas/superbow/sentic.patter.en-superbow2013.txt")

In [ ]:
tweets.data.stack

In [ ]:
dictionary = corpora.Dictionary(tweets)
corpus = [dictionary.doc2bow(text) for text in tweets]
tfidf = models.TfidfModel(corpus)

In [ ]:
G = sf.SGraph()

# cada texto é um nó do grafo
# a classe do texto é um atributo do nó do grafo
k = 0
for v in tweets.klass():
    G = G.add_vertices(sf.Vertex(k, attr={'klass' : v}))
    k = k+1

In [ ]:
index = similarities.Similarity('tmp',tfidf[corpus],num_features=len(dictionary.keys()),num_best=11)

for tid, tweet in enumerate(tweets):
    edges = []
    for nn in index[tfidf[dictionary.doc2bow(tweet)]]:
        if not tid==nn[0]:
            edges.append(sf.Edge(tid,nn[0],attr={'weight':nn[1]}))
    
    #gambiarra para contornar o 0.0 neutral
    try:
        G = G.add_edges(edges)
    except:
        pass

In [ ]:


In [ ]:
G = sf.SGraph()

# cada texto é um nó do grafo
# a classe do texto é um atributo do nó do grafo
klass = tweets.klass()

for k in range(0,klass.size()):
   G = G.add_vertices(sf.Vertex(k, attr={'klass' : klass[k]}))

In [ ]: