In this notebook I will explore some of the results obtained by running the run_lda.py
file!
There are 3 main things that I do:
LDA_identified_bad_handles.pkl
.verified_handles_lda.pkl
final_database_lda_verified.pkl
In [1]:
%run helper_functions.py
In [2]:
lst = unpickle_object("2nd_degree_connections_LDA_complete.pkl")
In [20]:
lst[8] #an example of a bad handle dictionary
Out[20]:
In [3]:
handle_names = []
for dictionary in lst:
name = list(dictionary.keys())
handle_names.append(name)
In [4]:
handle_names = sum(handle_names, [])
In [5]:
#an example of me finding which user's in my LDA results tweet about "machine" --> alluding to "machine learning"
cnt = -1
for handle in handle_names:
cnt +=1
try:
topics = lst[cnt][handle]['LDA']
if "machine" in topics:
print(handle)
except:
pass
In [7]:
# handles to be removed as they do not have valid LDA analysis
handle_to_remove = []
cnt = -1
for handle in handle_names:
cnt += 1
sub_dict = lst[cnt][handle]
if "LDA" not in sub_dict:
handle_to_remove.append(handle)
indicies = []
for handle in handle_to_remove:
index = handle_names.index(handle)
indicies.append(index)
In [14]:
#extracting the valid LDA handle
verified_handles_lda = [v for i,v in enumerate(handle_names) if i not in frozenset(indicies)]
In [34]:
handle_to_remove[:5] #a peek at the 'bad handles'
Out[34]:
In [17]:
pickle_object(verified_handles_lda, "verified_handles_lda")
In [18]:
pickle_object(handle_to_remove, "LDA_identified_bad_handles")
In [30]:
#extracting the appropriate dictionaries to be used in TF-IDF analysis
final_database_lda_verified = [v for i,v in enumerate(lst) if i not in frozenset(indicies)]
In [33]:
pickle_object(final_database_lda_verified, "final_database_lda_verified")
In [ ]: