In [26]:
from textblob.classifiers import NaiveBayesClassifier
def correct_format_training(line):
string = line.replace("\n", "").replace("- ", "").replace("the ", " ").replace(" and", " ").replace(" from", " ")
label = string.split(", ")[1]
words = string.split(", ")[0].split(" ")
return (words, label)
def correct_format_production(line):
string = line.replace("\n", "").replace("- ", "").replace("the ", " ").replace(" and", " ").replace(" from", " ")
words = string.split(" ")
return words
training_set = []
with open("training_data/training_shuffled_data.txt", "r") as ins:
for line in ins:
training_set.append(correct_format(line))
NBC = NaiveBayesClassifier(training_set)
print NBC.classify("Refactoring something or other")
validation_set = []
with open("training_data/validation_shuffled_data.txt", "r") as ins:
for line in ins:
validation_set.append(correct_format(line))
NBC.accuracy(validation_set)
Out[26]:
In [40]:
import pickle
f = open('my_classifier.pickle', 'wb')
pickle.dump(NBC, f)
f.close()
In [41]:
f = open('my_classifier.pickle', 'rb')
pickled_NBC = pickle.load(f)
f.close()
In [44]:
pickled_NBC.classify("Refactoring") == "Unknown"
Out[44]:
In [ ]:
from github import Github
from random import randint
g = Github("username", "password")
input_string = "facebook/react" #Replaced with user given string
repo = g.get_repo(input_string, False)
root_dir = repo.get_git_tree(sha="master", recursive=True)
fileHash = {}
for file in root_dir.tree:
fileHash[file.path] = [0,0,0,0]
for key in magicHash:
commits = repo.get_commits(path=key)
for commit in commits:
fileHash[key][randint(0,3)]+= 1 #Replace with model results.
print g.rate_limiting