In [26]:
    
from textblob.classifiers import NaiveBayesClassifier
def correct_format_training(line):
    string = line.replace("\n", "").replace("- ", "").replace("the ", " ").replace(" and", " ").replace(" from", " ")
    label = string.split(", ")[1]
    words = string.split(", ")[0].split(" ")
    return (words, label)
def correct_format_production(line):
    string = line.replace("\n", "").replace("- ", "").replace("the ", " ").replace(" and", " ").replace(" from", " ")
    words = string.split(" ")
    return words
training_set = []
with open("training_data/training_shuffled_data.txt", "r") as ins:
    for line in ins:
        training_set.append(correct_format(line))
        
NBC = NaiveBayesClassifier(training_set)
print NBC.classify("Refactoring something or other")
validation_set = []
with open("training_data/validation_shuffled_data.txt", "r") as ins:
    for line in ins:
        validation_set.append(correct_format(line))
        
NBC.accuracy(validation_set)
    
    
    Out[26]:
In [40]:
    
import pickle
f = open('my_classifier.pickle', 'wb')
pickle.dump(NBC, f)
f.close()
    
In [41]:
    
f = open('my_classifier.pickle', 'rb')
pickled_NBC = pickle.load(f)
f.close()
    
In [44]:
    
pickled_NBC.classify("Refactoring") == "Unknown"
    
    Out[44]:
In [ ]:
    
from github import Github
from random import randint
g = Github("username", "password")
input_string = "facebook/react" #Replaced with user given string
repo = g.get_repo(input_string, False)
root_dir = repo.get_git_tree(sha="master", recursive=True)
fileHash = {}
for file in root_dir.tree:
    fileHash[file.path] = [0,0,0,0]
    
for key in magicHash:
    commits = repo.get_commits(path=key)
    for commit in commits:
        fileHash[key][randint(0,3)]+= 1 #Replace with model results.
print g.rate_limiting