Here, we see an example of 'rake' algorithm for generating top 3 key phrases from the given text.
In [ ]:
import sys
sys.path.insert(0, '../utils')
import rake
query = "From the engineering side, we've also been working on the ability to parallelize training of neural network"
rake1 = rake.Rake("../utils/SmartStoplist.txt")
vals = rake1.run(query)
print vals[0][0]
print vals[1][0]
print vals[2][0]
Here, we see an example of simple search used to find relevant links in News, Social Media and TV with long and short queries.
In [ ]:
# -*- coding: utf-8 -*-
import sys
sys.path.insert(0, '../search')
import SimpleKeywordSearch
xlimerec = SimpleKeywordSearch.XlimeAdvancedRecommender()
long_query = 'From the engineering side, we have also been working on the ability to parallelize training of neural network models over multiple GPU cards simultaneously.'
short_query = 'GPU cards'
messagelist = xlimerec.recommender(short_query)
print '..........Results of Short query..........'
print messagelist
messagelist = xlimerec.recommender(long_query)
print '..........Results of Long query..........'
print messagelist
Here, we see an example of two documents comparison (long or short) both monolingual and cross-lingual.
In [ ]:
# -*- coding: utf-8 -*-
import sys
sys.path.insert(0, '../docsim')
import CompareTwoDocs
text1 = "I do not speak english"
text2 = "München bietet in seinen diversen Abteilungen immer"
comparedoc = CompareTwoDocs.CompareDocContent()
score = comparedoc.compare(text1,text2) # Cross-lingual
print '[Score between 0 and 1]: ', score
In [ ]:
# -*- coding: utf-8 -*-
import pymongo
from pymongo import MongoClient
import re
class CountMongo:
def __init__(self,configdic):
self.configdict = configdic
def count(self):
if self.configdict['MongoDBPath']!="":
client = MongoClient(self.configdict['MongoDBPath'])
if self.configdict['MongoDBUserName']!="" and self.configdict['MongoDBPassword']!="":
client.the_database.authenticate(self.configdict['MongoDBUserName'],self.configdict['MongoDBPassword'],source=self.configdict['MongoDBStorage'])
storedb = client[self.configdict['MongoDBStorage']]
collection,collection1,collection2,collection3,collection4 = storedb[self.configdict['KafkaTopicTVMetadata']], storedb[self.configdict["KafkaTopicSocialMedia"]],storedb[self.configdict['KafkaTopicNews']],storedb[self.configdict['KafkaTopicASR']],storedb[self.configdict['KafkaTopicSubtitles']]
print "Total Docs in Collection [TV Metadata], [Social Media],[News],[TV ASR],[TV SubTitles]:: ", collection.find().count(), collection1.find().count(), collection2.find().count(), collection3.find().count(),collection4.find().count()
def main():
configdict={}
config = '../config/Config.conf'
with open(config) as config_file:
for lines in config_file:
if re.search(r'=',lines):
key = lines.strip('\n').strip().split('=')
configdict[key[0]]=key[1]
testmongo = CountMongo(configdict)
testmongo.count()
if __name__ == "__main__":
main()
In [ ]:
In [ ]: