notebook.community

Edit and run



In [2]:

    
from SPARQLWrapper import SPARQLWrapper, JSON
from configuracao import *
import string, networkx as x, nltk as k
import __builtin__

PREFIX="""PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ops: <http://purl.org/socialparticipation/ops#>
PREFIX opa: <http://purl.org/socialparticipation/opa#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dc: <http://purl.org/dc/terms/>
PREFIX tsioc: <http://rdfs.org/sioc/types#>
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX schema: <http://schema.org/>"""



In [3]:

    
q="SELECT ?comentario ?titulo ?texto WHERE \
               {?comentario dc:type tsioc:Comment.\
              OPTIONAL {?comentario dc:title ?titulo . }\
               OPTIONAL {?comentario schema:text ?texto .}}"
sparql=SPARQLWrapper(URL_ENDPOINT_)
sparql.setQuery(PREFIX+q)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
msgs_=results["results"]["bindings"]
msgs=[mm for mm in msgs_ if ("titulo" not in mm.keys()) or
       (("teste de stress" not in mm["titulo"]["value"].lower())
       and ("comunidade de desenvolvedores e nesse caso, quanto mais"
          not in mm["texto"]["value"].lower()))]
palavras=string.join([i["texto"]["value"].lower() for i in msgs])
palavras = ''.join(ch for ch in palavras if ch not in EXCLUDE)
palavras_=palavras.split()
palavras__=[pp for pp in palavras_ if pp not in STOPWORDS]
fdist_=k.FreqDist(palavras__)
# escolhendo as 400 palavras mais incidentes para referência
palavras_escolhidas=fdist_.keys()[:400]
__builtin__.palavras_escolhidas=palavras_escolhidas
__builtin__.fdist_=fdist_



In [ ]: