notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import networkx as nx
import csv
import re
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from networkx.algorithms.connectivity import minimum_st_edge_cut
from networkx.algorithms.flow import shortest_augmenting_path
from sklearn.cluster import KMeans



In [2]:

    
def L1 (x,y):
    dist = 0
    if len(x)==len(y):
        for i in range(len(x)):
            dist += math.fabs(x[i]-y[i])
        return(dist)
    else:
        print('vectors must be equal length for L1')
        return (Null)



In [3]:

    
i = 0

with open('training.1600000.processed.noemoticon.csv') as f_in:
    for line in f_in:
        print (list(csv.reader(line, skipinitialspace=True)))
        print (line)
        i+=1
        if i>3:
            break









    



[['0'], ['', ''], ['1467810369'], ['', ''], ['Mon Apr 06 22:19:45 PDT 2009'], ['', ''], ['NO_QUERY'], ['', ''], ['_TheSpecialOne_'], ['', ''], ["@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D"], []]
"0","1467810369","Mon Apr 06 22:19:45 PDT 2009","NO_QUERY","_TheSpecialOne_","@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D"

[['0'], ['', ''], ['1467810672'], ['', ''], ['Mon Apr 06 22:19:49 PDT 2009'], ['', ''], ['NO_QUERY'], ['', ''], ['scotthamilton'], ['', ''], ["is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!"], []]
"0","1467810672","Mon Apr 06 22:19:49 PDT 2009","NO_QUERY","scotthamilton","is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!"

[['0'], ['', ''], ['1467810917'], ['', ''], ['Mon Apr 06 22:19:53 PDT 2009'], ['', ''], ['NO_QUERY'], ['', ''], ['mattycus'], ['', ''], ['@Kenichan I dived many times for the ball. Managed to save 50%  The rest go out of bounds'], []]
"0","1467810917","Mon Apr 06 22:19:53 PDT 2009","NO_QUERY","mattycus","@Kenichan I dived many times for the ball. Managed to save 50%  The rest go out of bounds"

[['0'], ['', ''], ['1467811184'], ['', ''], ['Mon Apr 06 22:19:57 PDT 2009'], ['', ''], ['NO_QUERY'], ['', ''], ['ElleCTF'], ['', ''], ['my whole body feels itchy and like its on fire '], []]
"0","1467811184","Mon Apr 06 22:19:57 PDT 2009","NO_QUERY","ElleCTF","my whole body feels itchy and like its on fire "



In [14]:

    
"""0 - the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)
1 - the id of the tweet (2087)
2 - the date of the tweet (Sat May 16 23:58:44 UTC 2009)
3 - the query. If there is no query, then this value is NO_QUERY.
4 - the user that tweeted
5 - the text of the tweet"""
cols = ['polarity','tweetID','date','Query','UserID','text']
df = pd.read_csv('training.1600000.processed.noemoticon.csv',names=cols,encoding='latin-1')#names=m_cols ,



In [4]:

    
G=nx.Graph()
m=0
n=0

# with open('training.1600000.processed.noemoticon.csv', encoding='latin-1') as f_in:
with open('training.1600000.processed.noemoticon.csv') as f_in:
    for line in f_in:
        lineX = list(csv.reader(line, skipinitialspace=True))
        G.add_node(lineX[8][0])
        if '@' in lineX[10][0]:
            m+=1
            for t in re.split('[^a-zA-Z\_\@]', lineX[10][0]):
                if t!='' and t[0]=='@' and t!='@':
                    G.add_edge(lineX[8][0],t[1:])
                    n+=1
        if n%100000==0:
            print(n)
print(nx.number_of_nodes(G))



In [6]:

    
print(nx.number_of_edges(G))



In [ ]:



In [7]:

    
"""0 - the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)
1 - the id of the tweet (2087)
2 - the date of the tweet (Sat May 16 23:58:44 UTC 2009)
3 - the query. If there is no query, then this value is NO_QUERY.
4 - the user that tweeted
5 - the text of the tweet"""
cols = ['polarity','tweetID','date','Query','UserID','text']
df = pd.read_csv('training.1600000.processed.noemoticon.csv',names=cols,encoding='latin-1')#names=m_cols ,



In [15]:

    
G=nx.Graph() # so that we dont destroy G if we start running this cell

m=0
n=0

for index, row in df.iterrows():
    G.add_node(row[4])
    if '@' in row[5]:
        m+=1
        for t in re.split('[^a-zA-Z\_\@]', row[5]):
            if t!='' and t[0]=='@':
                G.add_edge(row[4],t[1:])
                n+=1



In [16]:

    
len(G)









    Out[16]:





889335



In [8]:

    
float(nx.number_of_edges(G))/float(nx.number_of_nodes(G))









    Out[8]:





0.6931726437986179



In [ ]:



In [16]:

    
DegList = list(nx.degree(G).values())
DegDic = {}
for D in DegList:
    if D in DegDic:
        DegDic[D] += 1
    else:
        DegDic[D] = 1



In [19]:

    
plt.yscale('log')
plt.ylabel('Log Count')
plt.title('Log plot of Degree Distribution of Graph')
plt.xscale('linear')
plt.xlabel('Degree')
plt.hist(DegList,bins=100)









    Out[19]:





(array([  8.88118000e+05,   8.90000000e+02,   1.70000000e+02,
          5.80000000e+01,   2.70000000e+01,   1.40000000e+01,
          9.00000000e+00,   5.00000000e+00,   1.30000000e+01,
          4.00000000e+00,   6.00000000e+00,   2.00000000e+00,
          2.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          2.00000000e+00,   0.00000000e+00,   2.00000000e+00,
          1.00000000e+00,   0.00000000e+00,   2.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   2.00000000e+00,
          1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   1.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          1.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   1.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          1.00000000e+00]),
 array([    0. ,    34.5,    69. ,   103.5,   138. ,   172.5,   207. ,
          241.5,   276. ,   310.5,   345. ,   379.5,   414. ,   448.5,
          483. ,   517.5,   552. ,   586.5,   621. ,   655.5,   690. ,
          724.5,   759. ,   793.5,   828. ,   862.5,   897. ,   931.5,
          966. ,  1000.5,  1035. ,  1069.5,  1104. ,  1138.5,  1173. ,
         1207.5,  1242. ,  1276.5,  1311. ,  1345.5,  1380. ,  1414.5,
         1449. ,  1483.5,  1518. ,  1552.5,  1587. ,  1621.5,  1656. ,
         1690.5,  1725. ,  1759.5,  1794. ,  1828.5,  1863. ,  1897.5,
         1932. ,  1966.5,  2001. ,  2035.5,  2070. ,  2104.5,  2139. ,
         2173.5,  2208. ,  2242.5,  2277. ,  2311.5,  2346. ,  2380.5,
         2415. ,  2449.5,  2484. ,  2518.5,  2553. ,  2587.5,  2622. ,
         2656.5,  2691. ,  2725.5,  2760. ,  2794.5,  2829. ,  2863.5,
         2898. ,  2932.5,  2967. ,  3001.5,  3036. ,  3070.5,  3105. ,
         3139.5,  3174. ,  3208.5,  3243. ,  3277.5,  3312. ,  3346.5,
         3381. ,  3415.5,  3450. ]),
 <a list of 100 Patch objects>)



In [20]:

    
DegList = list(nx.degree(G).items())
for D in DegList:
    if D[1]>3000:
        print(D)









    



('mileycyrus', 3450)



In [21]:

    
plt.title('Log-Log of Degree Distribution of Graph')
plt.ylabel('Log Count')
plt.xlabel('Log Degree')
DegList = sorted(DegDic.items())
Xlist, Ylist = zip(*DegList)
plt.loglog(Xlist,Ylist, basex=np.e, basey=np.e)
del Xlist
del Ylist



In [ ]:



In [27]:

    
del DegDic
del DegList



In [9]:

    
LargestCC = max(nx.connected_component_subgraphs(G), key=len) # largest connected component
print(nx.number_of_nodes(LargestCC))



In [120]:

    
LargestCC.remove_edges_from(LargestCC.selfloop_edges())
scaler = MinMaxScaler((50,800))



In [ ]:

    
CoreCounts = []
for i in range(2,10):
    core_i = nx.k_core(LargestCC, i)
    CoreCounts.append(nx.number_of_nodes(core_i))
del core_i



In [ ]:

    
plt.yscale('log')
plt.plot(range(2,10),CoreCounts)



In [37]:

    
#WOWWWWWWWW worth including









    Out[37]:





[<matplotlib.lines.Line2D at 0x7f8727360208>]



In [10]:

    
core7 = nx.k_core(LargestCC,7)









    



---------------------------------------------------------------------------
NetworkXError                             Traceback (most recent call last)
<ipython-input-10-fb53b157c762> in <module>()
----> 1 core7 = nx.k_core(LargestCC,7)

/Users/km/anaconda/lib/python2.7/site-packages/networkx/algorithms/core.pyc in k_core(G, k, core_number)
    151     """
    152     if core_number is None:
--> 153         core_number=nx.core_number(G)
    154     if k is None:
    155         k=max(core_number.values()) # max core

/Users/km/anaconda/lib/python2.7/site-packages/networkx/algorithms/core.pyc in core_number(G)
     68         raise nx.NetworkXError(
     69                 'Input graph has self loops; the core number is not defined.',
---> 70                 'Consider using G.remove_edges_from(G.selfloop_edges()).')
     71 
     72     if G.is_directed():

NetworkXError: ('Input graph has self loops; the core number is not defined.', 'Consider using G.remove_edges_from(G.selfloop_edges()).')



In [11]:

    
Bcent = np.array(list(nx.betweenness_centrality(core7,normalized = True).values()))
scaledBC = scaler.fit_transform(Bcent)









    



/home/ubuntu/anaconda3/lib/python3.6/site-packages/sklearn/preprocessing/data.py:321: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
/home/ubuntu/anaconda3/lib/python3.6/site-packages/sklearn/preprocessing/data.py:356: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)



In [12]:

    
Ecent = np.array(list(nx.eigenvector_centrality_numpy(core7).values()))
scaledEC = scaler.fit_transform(Ecent[:,np.newaxis])



In [ ]:

    
Ccent = np.array(list(nx.closeness_centrality(core7).values()))
scaledCC = scaler.fit_transform(Ccent[:,np.newaxis])



In [17]:

    
L1_dist = [L1(scaledCC,scaledBC),L1(scaledEC,scaledBC),L1(scaledEC,scaledCC)]
print("""From the three measures we have explored, Eigenvalue centality
Betweenness centality and Closeness centality. We can now evaluate the
L1 distance between the measures""")
D = L1_dist[0]
print("The L1 distance between Closeness centality and Betweenness Centrality is %d implying average distance of %f"%\
      (D, D*1.0/nx.number_of_nodes(core7)))
D = L1_dist[1]
print("The L1 distance between Eigenvalue centality and Betweenness Centrality is %d implying average distance of %f"%\
      (D, D*1.0/nx.number_of_nodes(core7)))
D = L1_dist[2]
print("The L1 distance between Closeness centality and Eigenvalue Centrality is %d implying average distance of %f"%\
      (D, D*1.0/nx.number_of_nodes(core7)))









    



From the three measures we have explored, Eigenvalue centality
Betweenness centality and Closeness centality. We can now evaluate the
L1 distance between the measures
The L1 distance between Closeness centality and Betweenness Centrality is 386829 implying average distance of 391.527373
The L1 distance between Eigenvalue centality and Betweenness Centrality is 45735 implying average distance of 46.290495
The L1 distance between Closeness centality and Eigenvalue Centrality is 371185 implying average distance of 375.693507



In [18]:

    
plt.yscale('log')
plt.hist(scaledBC)









    Out[18]:





(array([ 929.,   19.,   23.,    5.,    6.,    2.,    2.,    0.,    1.,    1.]),
 array([  50.,  125.,  200.,  275.,  350.,  425.,  500.,  575.,  650.,
         725.,  800.]),
 <a list of 10 Patch objects>)



In [19]:

    
plt.yscale('log')
plt.hist(scaledEC)









    Out[19]:





(array([ 780.,  146.,   48.,    7.,    0.,    2.,    0.,    2.,    1.,    2.]),
 array([  50.,  125.,  200.,  275.,  350.,  425.,  500.,  575.,  650.,
         725.,  800.]),
 <a list of 10 Patch objects>)



In [21]:

    
plt.yscale('log')
plt.hist(scaledCC)









    Out[21]:





(array([  11.,   29.,   31.,   57.,  175.,  252.,  332.,   71.,   21.,    9.]),
 array([  50.,  125.,  200.,  275.,  350.,  425.,  500.,  575.,  650.,
         725.,  800.]),
 <a list of 10 Patch objects>)



In [11]:

    
f = nx.fiedler_vector(core7)
s = np.zeros(len(f),dtype='int')
s[f>0]=1









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-a697dcd2cdf7> in <module>()
----> 1 f = nx.fiedler_vector(core7)
      2 s = np.zeros(len(f),dtype='int')
      3 s[f>0]=1

NameError: name 'core7' is not defined



In [12]:

    
colors = ['#d7191c', '#2b83ba']
node_colors = [colors[s[v]] for v in range(nx.number_of_nodes(core7))]
nx.draw(core7, node_color=node_colors,node_size=10)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-495c7a8b012f> in <module>()
      1 colors = ['#d7191c', '#2b83ba']
----> 2 node_colors = [colors[s[v]] for v in range(nx.number_of_nodes(core7))]
      3 nx.draw(core7, node_color=node_colors,node_size=10)

NameError: name 'core7' is not defined



In [55]:

    
L = nx.laplacian_matrix(core7).todense()
w, v = np.linalg.eig(L)
v = np.array(v)
worder = np.argsort(w)
#pos = {i: np.array([f[0], f[1]]) for i, f in enumerate(zip(v[:,worder[1]], v[:,worder[2]]))}



In [56]:

    
X = v @ np.diag(w)
X = X[:,worder]



In [57]:

    
error = np.zeros(9)
for k in range(2,11):
    kmeans = KMeans(init='k-means++', n_clusters=k, n_init=10)
    kmeans.fit_predict(X[:,1:3])
    error[k-2] = kmeans.inertia_



In [58]:

    
plt.plot(range(2,11),error)









    Out[58]:





[<matplotlib.lines.Line2D at 0x7f61c24a39b0>]



In [68]:

    
kmeans = KMeans(init='k-means++', n_clusters=6, n_init=10)
kmeans.fit_predict(X[:,1:3])
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
error = kmeans.inertia_



In [70]:

    
colors = ['#d7191c', '#ffffbf', '#2b83ba', 'green','orange','maroon']
node_colors = [colors[labels[i]] for i in range(nx.number_of_nodes(core7))]
nx.draw(core7, node_color=node_colors,node_size=10)









    



/home/ubuntu/anaconda3/lib/python3.6/site-packages/networkx/drawing/nx_pylab.py:126: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
/home/ubuntu/anaconda3/lib/python3.6/site-packages/networkx/drawing/nx_pylab.py:138: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
/home/ubuntu/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
/home/ubuntu/anaconda3/lib/python3.6/site-packages/matplotlib/rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")



In [23]:

    
import sklearn
print sklearn.__version__



In [18]:

    
import 
from sklearn import mixture

mixture.GaussianMixture()









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-18-a1a84fb1c0cf> in <module>()
      2 from sklearn import mixture
      3 
----> 4 mixture.GaussianMixture()

AttributeError: 'module' object has no attribute 'GaussianMixture'



In [67]:

    
vectorizer = TfidfVectorizer(stop_words='english', min_df=8, max_df=0.8)
dtm = vectorizer.fit_transform(TextList)

del TextList
terms = vectorizer.get_feature_names()
print("Finished")









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-67-b0bc613432c5> in <module>()
----> 1 vectorizer = TfidfVectorizer(stop_words='english', min_df=8, max_df=0.8)
      2 dtm = vectorizer.fit_transform(TextList)
      3 
      4 del TextList
      5 terms = vectorizer.get_feature_names()

NameError: name 'TfidfVectorizer' is not defined



In [5]:

    
df = pd.read_csv('training.1600000.processed.noemoticon.csv',names=cols,encoding='latin-1')#names=m_cols ,









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-7153293e4f30> in <module>()
----> 1 df = pd.read_csv('training.1600000.processed.noemoticon.csv',names=cols,encoding='latin-1')#names=m_cols ,

NameError: name 'cols' is not defined



In [25]:

    
prefix = './trainingandtestdata/'

testfile = prefix + 'testdata.manual.2009.06.14.csv'
trainfile = prefix + 'training.1600000.processed.noemoticon.csv'



In [27]:

    
df = pd.read_csv(trainfile,names=cols,encoding='latin-1')#names=m_cols ,



In [28]:

    
len(df)









    Out[28]:





1600000



In [29]:

    
df.head()









    Out[29]:






  
    
      
      polarity
      tweetID
      date
      Query
      UserID
      text
    
  
  
    
      0
      0
      1467810369
      Mon Apr 06 22:19:45 PDT 2009
      NO_QUERY
      _TheSpecialOne_
      @switchfoot http://twitpic.com/2y1zl - Awww, t...
    
    
      1
      0
      1467810672
      Mon Apr 06 22:19:49 PDT 2009
      NO_QUERY
      scotthamilton
      is upset that he can't update his Facebook by ...
    
    
      2
      0
      1467810917
      Mon Apr 06 22:19:53 PDT 2009
      NO_QUERY
      mattycus
      @Kenichan I dived many times for the ball. Man...
    
    
      3
      0
      1467811184
      Mon Apr 06 22:19:57 PDT 2009
      NO_QUERY
      ElleCTF
      my whole body feels itchy and like its on fire
    
    
      4
      0
      1467811193
      Mon Apr 06 22:19:57 PDT 2009
      NO_QUERY
      Karoli
      @nationwideclass no, it's not behaving at all....



In [41]:

    
df_small = df.iloc[:600000]



In [31]:

    
len(df_small)









    Out[31]:





1000



In [44]:

    
#long
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english', min_df=8,max_df=0.8)
M = vectorizer.fit_transform(df.text)



In [48]:

    
print type(M)
M









    



<class 'scipy.sparse.csr.csr_matrix'>






    Out[48]:





<1600000x52521 sparse matrix of type '<type 'numpy.float64'>'
	with 10140434 stored elements in Compressed Sparse Row format>



In [55]:

    
#doesnt work well

# from scipy import io

# with open('M.mtx','w') as fout:
#     io.mmwrite(fout, M)#, comment='', field=None, precision=None, symmetry=None)[source]

# io.mmwrite('M', M)



In [53]:

    
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=50, n_iter=10, random_state=42)
X = svd.fit_transform(M)



In [139]:

    
svd.components_









    Out[139]:





array([[  2.72001580e-03,   1.69149426e-03,   1.09716343e-04, ...,
          2.72950482e-05,   3.23040176e-06,   5.18766711e-06],
       [ -4.29279707e-04,   1.22860704e-03,   5.74032116e-05, ...,
          3.49286375e-05,   7.78247908e-06,   1.07673030e-05],
       [ -6.20510551e-04,  -2.94362336e-04,  -4.71793942e-05, ...,
          3.79566391e-06,  -8.31496087e-08,   3.20352253e-06],
       ..., 
       [ -1.17122879e-04,  -6.05301438e-05,   1.80451782e-05, ...,
         -1.61457500e-05,   8.85458117e-07,  -1.28697769e-06],
       [ -5.88878369e-04,  -3.37554068e-04,   7.58171236e-05, ...,
          1.73194680e-06,   4.35612975e-07,  -2.37956566e-06],
       [ -8.27196817e-04,  -2.17366492e-04,  -2.30040004e-05, ...,
         -3.65960547e-05,   1.45690214e-06,  -1.99006957e-06]])



In [56]:

    
X.shape









    Out[56]:





(1600000, 50)



In [57]:

    
#Kmeans

# Clustering with some parameters.

from sklearn.cluster import KMeans
ncl = 8
k = 30
kmeans = KMeans(n_clusters=ncl, init='k-means++', max_iter=100, n_init=10,random_state=0)
y = kmeans.fit_predict(X[:,:k])
# centroids = kmeans.cluster_centers_
# labels = kmeans.labels_
# error = kmeans.inertia_
y









    Out[57]:





array([6, 7, 0, ..., 0, 7, 0], dtype=int32)



In [ ]:

    
for tweets in df[:1000]



In [59]:

    
df_ = df.copy()



In [131]:

    
df_['class'] = y_30



In [138]:

    
df_[df_['class']==4]









    Out[138]:






  
    
      
      polarity
      tweetID
      date
      Query
      UserID
      text
      class
    
  
  
    
      103
      0
      1467837470
      Mon Apr 06 22:26:43 PDT 2009
      NO_QUERY
      annette414
      watching &quot;House&quot;
      4
    
    
      119
      0
      1467839586
      Mon Apr 06 22:27:18 PDT 2009
      NO_QUERY
      sonyolmos
      @eRRe_sC aaw i miss ya all too.. im leaving to...
      4
    
    
      124
      0
      1467840552
      Mon Apr 06 22:27:34 PDT 2009
      NO_QUERY
      weefranniev
      Late night snack, glass of OJ b/c I'm &quot;do...
      4
    
    
      183
      0
      1467858363
      Mon Apr 06 22:32:12 PDT 2009
      NO_QUERY
      schammy
      Downloading NIN's new album &quot;the slip&quo...
      4
    
    
      206
      0
      1467862710
      Mon Apr 06 22:33:20 PDT 2009
      NO_QUERY
      Jemimus
      My mind and body are severely protesting this ...
      4
    
    
      213
      0
      1467863684
      Mon Apr 06 22:33:35 PDT 2009
      NO_QUERY
      DjGundam
      Awwh babs... you look so sad underneith that s...
      4
    
    
      286
      0
      1467881920
      Mon Apr 06 22:38:28 PDT 2009
      NO_QUERY
      mumu1210
      FML: So much for seniority, bc of technologica...
      4
    
    
      319
      0
      1467894593
      Mon Apr 06 22:41:52 PDT 2009
      NO_QUERY
      Tanja71
      @JonathanRKnight Oh! Did I mention it? &quot;G...
      4
    
    
      393
      0
      1467911624
      Mon Apr 06 22:46:32 PDT 2009
      NO_QUERY
      Mati_UOIT
      Sitting here wondering why &quot;ED&quot; stil...
      4
    
    
      400
      0
      1467913111
      Mon Apr 06 22:46:57 PDT 2009
      NO_QUERY
      nssmom
      #3 woke up and was having an accident - &quot;...
      4
    
    
      473
      0
      1467931070
      Mon Apr 06 22:52:06 PDT 2009
      NO_QUERY
      calee01
      &quot;On popular music&quot; by T.W.Adorno is ...
      4
    
    
      492
      0
      1467934004
      Mon Apr 06 22:52:56 PDT 2009
      NO_QUERY
      malice_sin
      pears &amp; Brie, bottle of Cabernet, and &quo...
      4
    
    
      510
      0
      1467943007
      Mon Apr 06 22:55:30 PDT 2009
      NO_QUERY
      vibratoria
      @stuiy never again will I click on a link that...
      4
    
    
      569
      0
      1467952985
      Mon Apr 06 22:58:24 PDT 2009
      NO_QUERY
      mickeyness
      @daniela_95616 hahaa!! i just realized &quot;i...
      4
    
    
      592
      0
      1467962336
      Mon Apr 06 23:00:55 PDT 2009
      NO_QUERY
      umfoo
      my heart is broken every morning dropping Foo ...
      4
    
    
      725
      0
      1467992696
      Mon Apr 06 23:09:35 PDT 2009
      NO_QUERY
      pop_corn_
      I feel like I am the only &quot;twitterer&quot...
      4
    
    
      742
      0
      1467996096
      Mon Apr 06 23:10:33 PDT 2009
      NO_QUERY
      Beatmixology
      @djsoulsister yeah, great vid. I had the 12&qu...
      4
    
    
      747
      0
      1467998037
      Mon Apr 06 23:11:06 PDT 2009
      NO_QUERY
      amitgupta
      Have an invite for &quot;Healthy Dining&quot; ...
      4
    
    
      817
      0
      1468013020
      Mon Apr 06 23:15:30 PDT 2009
      NO_QUERY
      melvinchia
      has a mild left inner ear infection.. and its ...
      4
    
    
      879
      0
      1468031882
      Mon Apr 06 23:21:15 PDT 2009
      NO_QUERY
      NesbyPhips
      So Im done editing &quot;The Phipstape&quot;. ...
      4
    
    
      905
      0
      1468035396
      Mon Apr 06 23:22:24 PDT 2009
      NO_QUERY
      noslennai
      Watching &quot;a league of their own&quot;...m...
      4
    
    
      979
      0
      1468052239
      Mon Apr 06 23:27:43 PDT 2009
      NO_QUERY
      ConstanceClark
      @MarcusMims wow i didn't get an &quot;hello&qu...
      4
    
    
      987
      0
      1468053198
      Mon Apr 06 23:28:02 PDT 2009
      NO_QUERY
      grinc
      Reading Buyology before bedtime... great premi...
      4
    
    
      990
      0
      1468053804
      Mon Apr 06 23:28:13 PDT 2009
      NO_QUERY
      princeralph
      omg... &quot;The Reader&quot; is making me
      4
    
    
      1044
      0
      1468068303
      Mon Apr 06 23:32:33 PDT 2009
      NO_QUERY
      rob_fitzpatrick
      @gracedent it's her &quot;hair&quot; I can't d...
      4
    
    
      1048
      0
      1468068726
      Mon Apr 06 23:32:42 PDT 2009
      NO_QUERY
      verruca
      Why isn't there a &quot;fake&quot; Verruca on ...
      4
    
    
      1071
      0
      1468074597
      Mon Apr 06 23:34:32 PDT 2009
      NO_QUERY
      PeterHC
      Used the term &quot;Fail Whale&quot; to a clie...
      4
    
    
      1086
      0
      1468076942
      Mon Apr 06 23:35:15 PDT 2009
      NO_QUERY
      geekpondering
      @transbay &quot;SFMTA Budget Proposal Hearing:...
      4
    
    
      1104
      0
      1468080524
      Mon Apr 06 23:36:22 PDT 2009
      NO_QUERY
      Msjackson88
      still no &quot;followers&quot;   please some1 ...
      4
    
    
      1204
      0
      1468107293
      Mon Apr 06 23:45:12 PDT 2009
      NO_QUERY
      norawatkins
      VIP guests today -________-&quot;   blohheeee ...
      4
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1598559
      4
      2193190901
      Tue Jun 16 08:07:11 PDT 2009
      NO_QUERY
      MsSophieBelle
      @KempEquine nothing wrong  Mom has one for me(...
      4
    
    
      1598567
      4
      2193220591
      Tue Jun 16 08:09:39 PDT 2009
      NO_QUERY
      katyhelena
      @o0omunkieo0o Thanks! I think so too. I call h...
      4
    
    
      1598655
      4
      2193224813
      Tue Jun 16 08:10:00 PDT 2009
      NO_QUERY
      JanayS
      &quot;i wake up it's a bad dream no one on my ...
      4
    
    
      1598668
      4
      2193251846
      Tue Jun 16 08:12:11 PDT 2009
      NO_QUERY
      RunWithForest
      @explosivityy I'm writing now.  Di ko na sinas...
      4
    
    
      1598718
      4
      2193254120
      Tue Jun 16 08:12:22 PDT 2009
      NO_QUERY
      christicox
      I asked a 3yr old how old I was &amp; he said,...
      4
    
    
      1598749
      4
      2193255731
      Tue Jun 16 08:12:31 PDT 2009
      NO_QUERY
      jointuletz
      am plecat la &quot;la comedie&quot;...v-astept...
      4
    
    
      1598770
      4
      2193277168
      Tue Jun 16 08:14:18 PDT 2009
      NO_QUERY
      peachtweet
      @JKL_Katie omg, i know. it's annoying! they're...
      4
    
    
      1598938
      4
      2193306490
      Tue Jun 16 08:16:41 PDT 2009
      NO_QUERY
      Emm_Jay
      @Twyst  That would be great... &quot;looking f...
      4
    
    
      1598969
      4
      2193319050
      Tue Jun 16 08:17:43 PDT 2009
      NO_QUERY
      howardsublett
      @dougalcorn: It is actually just @daveminor wa...
      4
    
    
      1599053
      4
      2193343042
      Tue Jun 16 08:19:44 PDT 2009
      NO_QUERY
      CardboxDiva
      @thejoshlynn You are! BTW send me an email. Wa...
      4
    
    
      1599188
      4
      2193371978
      Tue Jun 16 08:22:04 PDT 2009
      NO_QUERY
      cunderwo
      @meganlm i rented &quot;dead silence&quot; at ...
      4
    
    
      1599264
      4
      2193402232
      Tue Jun 16 08:24:36 PDT 2009
      NO_QUERY
      krystlerb
      @TANGG GT was a good movie...although I spent ...
      4
    
    
      1599337
      4
      2193426650
      Tue Jun 16 08:26:37 PDT 2009
      NO_QUERY
      LucasSchmitt
      @maryzlane that sucks. :/ i guess you have to ...
      4
    
    
      1599410
      4
      2193428898
      Tue Jun 16 08:26:48 PDT 2009
      NO_QUERY
      caileighamazing
      &quot;Wow, What A Tight Fit&quot; Lmao, Shutup.
      4
    
    
      1599418
      4
      2193429153
      Tue Jun 16 08:26:49 PDT 2009
      NO_QUERY
      erikalanzer19
      @brendanlover11 I really love &quot;Starlight&...
      4
    
    
      1599424
      4
      2193450427
      Tue Jun 16 08:28:32 PDT 2009
      NO_QUERY
      HelloGracey
      Exploring the world of Twitter   Listening to ...
      4
    
    
      1599460
      4
      2193452390
      Tue Jun 16 08:28:42 PDT 2009
      NO_QUERY
      comradephil
      @aw16 I must have skipped the &quot;pun&quot; ...
      4
    
    
      1599478
      4
      2193453100
      Tue Jun 16 08:28:45 PDT 2009
      NO_QUERY
      jgunzz
      Why don't we name tomorrow &quot;the Official ...
      4
    
    
      1599557
      4
      2193475600
      Tue Jun 16 08:30:33 PDT 2009
      NO_QUERY
      johnbertr
      @chinkchilla there's this brilliant add-on for...
      4
    
    
      1599612
      4
      2193478589
      Tue Jun 16 08:30:47 PDT 2009
      NO_QUERY
      teotarafas
      @InesVargas 17&quot;?! Hope you dont plan on t...
      4
    
    
      1599615
      4
      2193478782
      Tue Jun 16 08:30:48 PDT 2009
      NO_QUERY
      MilanTeh
      @brothrsaw The count would lead to shutter dea...
      4
    
    
      1599669
      4
      2193503005
      Tue Jun 16 08:32:45 PDT 2009
      NO_QUERY
      jtsosnowski
      You know there's way too much going on when yo...
      4
    
    
      1599682
      4
      2193503503
      Tue Jun 16 08:32:48 PDT 2009
      NO_QUERY
      oscarbarber
      @perequintana ara sÃ que ets tot un &quot;pir...
      4
    
    
      1599702
      4
      2193504328
      Tue Jun 16 08:32:52 PDT 2009
      NO_QUERY
      SweetTartelette
      @ang_w It's been forever since I have had &quo...
      4
    
    
      1599802
      4
      2193529779
      Tue Jun 16 08:34:56 PDT 2009
      NO_QUERY
      maeubhminor
      woooo elliot minor in 8 days!!! and i got my E...
      4
    
    
      1599835
      4
      2193551788
      Tue Jun 16 08:36:44 PDT 2009
      NO_QUERY
      annamadeleine
      @alexandervelky that's polite version - i only...
      4
    
    
      1599840
      4
      2193552024
      Tue Jun 16 08:36:44 PDT 2009
      NO_QUERY
      bdottie
      What a pretty day  &quot;Just smile&quot;
      4
    
    
      1599946
      4
      2193577228
      Tue Jun 16 08:38:49 PDT 2009
      NO_QUERY
      Sirley
      @chriscuzzy someone wanted a &quot;Cuzzy&quot;...
      4
    
    
      1599977
      4
      2193578386
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      TeamUKskyvixen
      @MayorDorisWolfe Thats my girl - dishing out t...
      4
    
    
      1599985
      4
      2193578982
      Tue Jun 16 08:38:58 PDT 2009
      NO_QUERY
      LISKFEST
      if ur the lead singer in a band, beware fallin...
      4
    
  

33136 rows × 7 columns



In [133]:

    
df_[df_['class']==5]









    Out[133]:






  
    
      
      polarity
      tweetID
      date
      Query
      UserID
      text
      class
    
  
  
    
      99
      0
      1467836859
      Mon Apr 06 22:26:33 PDT 2009
      NO_QUERY
      willy_chaz
      A bad nite for the favorite teams: Astros and ...
      5
    
    
      122
      0
      1467840016
      Mon Apr 06 22:27:25 PDT 2009
      NO_QUERY
      BustaBusta
      I know my life has been flipped upside down wh...
      5
    
    
      160
      0
      1467853356
      Mon Apr 06 22:30:54 PDT 2009
      NO_QUERY
      dbmendel
      Picked Mich St to win it all from the get go. ...
      5
    
    
      178
      0
      1467857297
      Mon Apr 06 22:31:56 PDT 2009
      NO_QUERY
      amanda5280
      Today I realized I am too good at hiding thing...
      5
    
    
      192
      0
      1467859820
      Mon Apr 06 22:32:36 PDT 2009
      NO_QUERY
      msbutt3rfly14
      spencer is not a good guy.
      5
    
    
      219
      0
      1467871040
      Mon Apr 06 22:35:31 PDT 2009
      NO_QUERY
      MTLarson1224
      @DonnieWahlberg I hope i can make it to the au...
      5
    
    
      222
      0
      1467871545
      Mon Apr 06 22:35:40 PDT 2009
      NO_QUERY
      Cherye101
      @PaulaAbdul awww, Good luck Paula!! Please don...
      5
    
    
      290
      0
      1467882902
      Mon Apr 06 22:38:44 PDT 2009
      NO_QUERY
      usagiko
      @LevenRambin: Take it easy, and be good to you.
      5
    
    
      298
      0
      1467889791
      Mon Apr 06 22:40:33 PDT 2009
      NO_QUERY
      jennhelvering
      Just called Hillsong again - they said they co...
      5
    
    
      320
      0
      1467894600
      Mon Apr 06 22:41:51 PDT 2009
      NO_QUERY
      dreaaa
      throat is closing up and i had some string che...
      5
    
    
      348
      0
      1467899025
      Mon Apr 06 22:43:06 PDT 2009
      NO_QUERY
      oup
      still sick. feeling a bit better, got some new...
      5
    
    
      441
      0
      1467924690
      Mon Apr 06 22:50:17 PDT 2009
      NO_QUERY
      FlyRice
      Good GOD they ruined my belly button!!!
      5
    
    
      444
      0
      1467925657
      Mon Apr 06 22:50:34 PDT 2009
      NO_QUERY
      aisyahsamsudin
      running nose + spinning head = not a good comb...
      5
    
    
      470
      0
      1467930341
      Mon Apr 06 22:51:53 PDT 2009
      NO_QUERY
      GemDoughnut
      MORNING!!! Good im bloody knackered!!! Work is...
      5
    
    
      475
      0
      1467931501
      Mon Apr 06 22:52:13 PDT 2009
      NO_QUERY
      soulonfire68
      We've been good. I'm not liking the snow right...
      5
    
    
      508
      0
      1467937402
      Mon Apr 06 22:53:55 PDT 2009
      NO_QUERY
      haunter_
      @Houndour ...i wish i was there...i'm pretty g...
      5
    
    
      532
      0
      1467947005
      Mon Apr 06 22:56:40 PDT 2009
      NO_QUERY
      tamisara
      Good morning! Ready 2 go, but I want 2 go back...
      5
    
    
      567
      0
      1467952123
      Mon Apr 06 22:58:08 PDT 2009
      NO_QUERY
      TurkishDelite
      @JonathanRKnight Good Knight hun! Looking forw...
      5
    
    
      603
      0
      1467964229
      Mon Apr 06 23:01:26 PDT 2009
      NO_QUERY
      farty_brando
      @greggrunberg hey you said matt was gonna go a...
      5
    
    
      653
      0
      1467979491
      Mon Apr 06 23:05:45 PDT 2009
      NO_QUERY
      TeresaUlring
      @PaulColes  hmmm...greed is good when it motiv...
      5
    
    
      689
      0
      1467985114
      Mon Apr 06 23:07:22 PDT 2009
      NO_QUERY
      gladyschock
      not feeling v good abt myself
      5
    
    
      745
      0
      1467997236
      Mon Apr 06 23:10:55 PDT 2009
      NO_QUERY
      alyssaisrad916
      UpdatingFFE.  That gives me nothing to do for ...
      5
    
    
      746
      0
      1467997817
      Mon Apr 06 23:11:03 PDT 2009
      NO_QUERY
      bellalucia
      Hot compress not rily workng for pulled muscle...
      5
    
    
      782
      0
      1468004641
      Mon Apr 06 23:12:59 PDT 2009
      NO_QUERY
      kow_shik
      @vivekg86 Good to hear that we have support in...
      5
    
    
      791
      0
      1468006362
      Mon Apr 06 23:13:30 PDT 2009
      NO_QUERY
      woflln
      Ugh can't sleep.  Wish i had a good cuddle to ...
      5
    
    
      869
      0
      1468030831
      Mon Apr 06 23:20:55 PDT 2009
      NO_QUERY
      StrAbZ
      good morning everybody! pkoi y fais pas beau  ...
      5
    
    
      910
      0
      1468035840
      Mon Apr 06 23:22:31 PDT 2009
      NO_QUERY
      Catmoo
      @Sephystryx I've been looking about for good s...
      5
    
    
      1003
      0
      1468060375
      Mon Apr 06 23:30:10 PDT 2009
      NO_QUERY
      BrandesAsh
      'study group extraordinare' about to leave cam...
      5
    
    
      1026
      0
      1468063973
      Mon Apr 06 23:31:15 PDT 2009
      NO_QUERY
      HilaryBays
      @BenPritchett goodness me, how did you find me...
      5
    
    
      1028
      0
      1468064339
      Mon Apr 06 23:31:18 PDT 2009
      NO_QUERY
      Jenoah1908
      my poor little girl has a baaaad rash on her b...
      5
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1599486
      4
      2193453323
      Tue Jun 16 08:28:46 PDT 2009
      NO_QUERY
      janiecwales
      @fudgecrumpet Oh good, it will be good to get ...
      5
    
    
      1599511
      4
      2193454228
      Tue Jun 16 08:28:51 PDT 2009
      NO_QUERY
      panicxx
      Good cinema exam!!
      5
    
    
      1599527
      4
      2193474286
      Tue Jun 16 08:30:27 PDT 2009
      NO_QUERY
      siobhandemers
      Good Morning!!  I'm making cookies today
      5
    
    
      1599546
      4
      2193475013
      Tue Jun 16 08:30:30 PDT 2009
      NO_QUERY
      nataliefyffe
      I totally rocked drunken Rockband! Nothin' bet...
      5
    
    
      1599589
      4
      2193477152
      Tue Jun 16 08:30:40 PDT 2009
      NO_QUERY
      jhollenbach
      @MikeRuocco good luck
      5
    
    
      1599592
      4
      2193477292
      Tue Jun 16 08:30:41 PDT 2009
      NO_QUERY
      kenyonknight
      Hmm I dreamed that I woke up in good ol'New Ha...
      5
    
    
      1599609
      4
      2193478306
      Tue Jun 16 08:30:46 PDT 2009
      NO_QUERY
      loyalwolf06
      is SO excited for The Fray concert today!!!!!!...
      5
    
    
      1599614
      4
      2193478763
      Tue Jun 16 08:30:48 PDT 2009
      NO_QUERY
      Nelinski_254
      #mw2 Click on more from this user.. and rate m...
      5
    
    
      1599629
      4
      2193501365
      Tue Jun 16 08:32:37 PDT 2009
      NO_QUERY
      pianogeek
      Good morning everyone!  And to my fabulous UK ...
      5
    
    
      1599642
      4
      2193501998
      Tue Jun 16 08:32:40 PDT 2009
      NO_QUERY
      PaulHarriott
      @hype6477 Good to hear all's cool. I've just s...
      5
    
    
      1599656
      4
      2193502444
      Tue Jun 16 08:32:42 PDT 2009
      NO_QUERY
      ireneagh
      @pinkpeony_etsy you have a lovely shop on etsy...
      5
    
    
      1599660
      4
      2193502743
      Tue Jun 16 08:32:44 PDT 2009
      NO_QUERY
      Ritter33
      rain is good
      5
    
    
      1599690
      4
      2193503753
      Tue Jun 16 08:32:49 PDT 2009
      NO_QUERY
      mommyperks
      @Travelwthemagic Good morning, here. Still jus...
      5
    
    
      1599762
      4
      2193527876
      Tue Jun 16 08:34:47 PDT 2009
      NO_QUERY
      MKeithHarris
      @denniswords good times indeed
      5
    
    
      1599768
      4
      2193528075
      Tue Jun 16 08:34:48 PDT 2009
      NO_QUERY
      rach3lizabeth
      Good morning everyone.
      5
    
    
      1599782
      4
      2193528659
      Tue Jun 16 08:34:51 PDT 2009
      NO_QUERY
      annaasaywhat
      http://bln.kr/T1/ good song (Y)
      5
    
    
      1599821
      4
      2193551359
      Tue Jun 16 08:36:41 PDT 2009
      NO_QUERY
      HildeM_EN
      @Piewacket1 good   sometimes a shot attention ...
      5
    
    
      1599825
      4
      2193551473
      Tue Jun 16 08:36:42 PDT 2009
      NO_QUERY
      james_bertram
      @tonyhawk Yess! Good choice of laptop there, t...
      5
    
    
      1599827
      4
      2193551560
      Tue Jun 16 08:36:42 PDT 2009
      NO_QUERY
      JadoreConcierge
      Good morning Chicago, I love this city!
      5
    
    
      1599828
      4
      2193551571
      Tue Jun 16 08:36:42 PDT 2009
      NO_QUERY
      crside
      @dfinchalicious  It ends up like that when goo...
      5
    
    
      1599841
      4
      2193552033
      Tue Jun 16 08:36:44 PDT 2009
      NO_QUERY
      josephranseth
      Good morning everyone! I hope you take some ti...
      5
    
    
      1599850
      4
      2193552448
      Tue Jun 16 08:36:46 PDT 2009
      NO_QUERY
      jenniehager
      sick, sick, sick today, but still fingers cros...
      5
    
    
      1599856
      4
      2193552668
      Tue Jun 16 08:36:48 PDT 2009
      NO_QUERY
      CintiaXimena
      taking a twit break...a lil blown about my bos...
      5
    
    
      1599869
      4
      2193553340
      Tue Jun 16 08:36:51 PDT 2009
      NO_QUERY
      nirvannah
      @carahsollins Good Morning Lady
      5
    
    
      1599875
      4
      2193553559
      Tue Jun 16 08:36:52 PDT 2009
      NO_QUERY
      MichiTheReal
      @ashleytisdale hih good morning  in austria it...
      5
    
    
      1599907
      4
      2193575210
      Tue Jun 16 08:38:39 PDT 2009
      NO_QUERY
      themunny
      @gabespears morning
      5
    
    
      1599916
      4
      2193575737
      Tue Jun 16 08:38:41 PDT 2009
      NO_QUERY
      MzGr33n4ppL3
      Hello good morning!  i wanna do something today.
      5
    
    
      1599937
      4
      2193576655
      Tue Jun 16 08:38:46 PDT 2009
      NO_QUERY
      eratyptin
      @siahoney I am good thanks!    How is #Eric, I...
      5
    
    
      1599988
      4
      2193579191
      Tue Jun 16 08:38:59 PDT 2009
      NO_QUERY
      tellman
      @Roy_Everitt ha- good job. that's right - we g...
      5
    
    
      1599994
      4
      2193579489
      Tue Jun 16 08:39:00 PDT 2009
      NO_QUERY
      EvolveTom
      @Cliff_Forster Yeah, that does work better tha...
      5
    
  

67484 rows × 7 columns



In [ ]:



In [103]:

    
df_[df_['UserID']=='usagiko']









    Out[103]:






  
    
      
      polarity
      tweetID
      date
      Query
      UserID
      text
      class
    
  
  
    
      290
      0
      1467882902
      Mon Apr 06 22:38:44 PDT 2009
      NO_QUERY
      usagiko
      @LevenRambin: Take it easy, and be good to you.
      5
    
    
      121160
      0
      1833443461
      Sun May 17 23:18:10 PDT 2009
      NO_QUERY
      usagiko
      @danielthomsen: I hope you don't get the hamth...
      7
    
    
      122488
      0
      1833720820
      Mon May 18 00:13:28 PDT 2009
      NO_QUERY
      usagiko
      @rocketgirl13: I'd hug you but I'd be hugging ...
      0
    
    
      132187
      0
      1835698381
      Mon May 18 06:33:52 PDT 2009
      NO_QUERY
      usagiko
      Why am I awake?
      0
    
    
      188985
      0
      1969000232
      Fri May 29 22:45:38 PDT 2009
      NO_QUERY
      usagiko
      @aelysian: we're too popular for our own good,...
      5
    
    
      231465
      0
      1978970242
      Sun May 31 01:07:24 PDT 2009
      NO_QUERY
      usagiko
      @churunga: I still have your gifts to send. I ...
      7
    
    
      258601
      0
      1985229042
      Sun May 31 16:41:30 PDT 2009
      NO_QUERY
      usagiko
      @roanapur: oh honey.  I'm so sorry.
      7
    
    
      379545
      0
      2052364361
      Sat Jun 06 00:29:06 PDT 2009
      NO_QUERY
      usagiko
      @trifluorides: I wish I could use those. But b...
      7
    
    
      380562
      0
      2052591055
      Sat Jun 06 01:16:00 PDT 2009
      NO_QUERY
      usagiko
      @DarthRyu666: I got the raw, since I'm hardcor...
      7
    
    
      411700
      0
      2060181181
      Sat Jun 06 18:25:00 PDT 2009
      NO_QUERY
      usagiko
      @kiptripsyc: I want the new forme of pokemans.
      4
    
    
      415956
      0
      2061190664
      Sat Jun 06 20:21:43 PDT 2009
      NO_QUERY
      usagiko
      @imbrifer: YAYYYY! Also, it'll probably take s...
      7
    
    
      445539
      0
      2068152200
      Sun Jun 07 13:35:19 PDT 2009
      NO_QUERY
      usagiko
      So, depending on what the diagnosis is, we may...
      0
    
    
      446967
      0
      2068569121
      Sun Jun 07 14:18:30 PDT 2009
      NO_QUERY
      usagiko
      @kiddetective: oh honey.
      0
    
    
      529910
      0
      2195692030
      Tue Jun 16 11:28:29 PDT 2009
      NO_QUERY
      usagiko
      @hadleyk: In a &quot;Doctor Who&quot; kinda wa...
      4
    
    
      675244
      0
      2248034283
      Fri Jun 19 20:00:09 PDT 2009
      NO_QUERY
      usagiko
      @LevenRambin: They're making her do what? That...
      7
    
    
      676006
      0
      2248270018
      Fri Jun 19 20:21:41 PDT 2009
      NO_QUERY
      usagiko
      @sampo_ilmari http://twitpic.com/7vaq8 - You l...
      1
    
    
      688126
      0
      2251447717
      Sat Jun 20 03:32:10 PDT 2009
      NO_QUERY
      usagiko
      @finding_jay: I know. You had a very early sta...
      7
    
    
      688951
      0
      2251644443
      Sat Jun 20 04:09:05 PDT 2009
      NO_QUERY
      usagiko
      @finding_jay: My bb.  [gives you massage]
      0
    
    
      719533
      0
      2260622542
      Sat Jun 20 19:30:53 PDT 2009
      NO_QUERY
      usagiko
      Found aloe, so itchy.
      0
    
    
      799748
      0
      2329114201
      Thu Jun 25 10:21:59 PDT 2009
      NO_QUERY
      usagiko
      @finding_jay: I'm sorry I wasn't there, bb.  F...
      0
    
    
      938726
      4
      1793524597
      Thu May 14 03:01:32 PDT 2009
      NO_QUERY
      usagiko
      @LevenRambin: pics or it didn't happen, dude. ...
      0
    
    
      1155596
      4
      1979023887
      Sun May 31 01:19:57 PDT 2009
      NO_QUERY
      usagiko
      @churunga: Also, do you liek Spock?
      0
    
    
      1265419
      4
      1999438146
      Mon Jun 01 20:13:04 PDT 2009
      NO_QUERY
      usagiko
      @socalanon: are we getting an award?
      0



In [130]:

    
classes = {}
for user in df_['UserID'].unique():
    vals = {}
    for i in df_[df_['UserID']==user]['class']:
        if i!=1:
            if i in vals:
                vals[i]+=1
            else:
                vals[i]=1

    try:
        classes[user] = max(vals, key=stats.get)
    except ValueError:
        classes[user] = 1









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-130-d8b44f498784> in <module>()
      2 for user in df_['UserID'].unique():
      3     vals = {}
----> 4     for i in df_[df_['UserID']==user]['class']:
      5         if i!=1:
      6             if i in vals:

/Users/km/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in wrapper(self, other, axis)
    853 
    854             with np.errstate(all='ignore'):
--> 855                 res = na_op(values, other)
    856             if isscalar(res):
    857                 raise TypeError('Could not compare %s type with Series' %

/Users/km/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in na_op(x, y)
    757 
    758         if is_object_dtype(x.dtype):
--> 759             result = _comp_method_OBJECT_ARRAY(op, x, y)
    760         else:
    761 

/Users/km/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in _comp_method_OBJECT_ARRAY(op, x, y)
    737         result = lib.vec_compare(x, y, op)
    738     else:
--> 739         result = lib.scalar_compare(x, y, op)
    740     return result
    741 

KeyboardInterrupt:



In [ ]:

    
import statistics
from statistics import StatisticsError
import random
import math


# colors = ['#d7191c', '#ffffbf', '#2b83ba', 'green','orange','maroon','black']
user_class = []
for g in core7:
    try:
        try:
            Id_Pred = df_[df_['UserID']==user]['class']
            X = statistics.mode(ID_Pred[g])
            node_colors.append(colors[X])
        except StatisticsError:
            node_colors.append(colors[ID_Pred[g][random.randint(0,len(ID_Pred[g])-1)]])
    except KeyError:
        node_colors.append(colors[6])



In [112]:

    
# df_[df_['UserID']=='ForzaRagazza']
vals = {}
for i in df_[df_['UserID']=='usagiko']['class']:
    if i!=1:
        if i in vals:
            vals[i]+=1
        else:
            vals[i]=1

max(vals, key=stats.get)









    Out[112]:





0



In [100]:

    
stats = {'a':2,'b':4,'c':1}
max(stats, key=stats.get)









    Out[100]:





'b'



In [97]:

    
df_









    Out[97]:






  
    
      
      polarity
      tweetID
      date
      Query
      UserID
      text
      class
    
  
  
    
      0
      0
      1467810369
      Mon Apr 06 22:19:45 PDT 2009
      NO_QUERY
      _TheSpecialOne_
      @switchfoot http://twitpic.com/2y1zl - Awww, t...
      6
    
    
      1
      0
      1467810672
      Mon Apr 06 22:19:49 PDT 2009
      NO_QUERY
      scotthamilton
      is upset that he can't update his Facebook by ...
      7
    
    
      2
      0
      1467810917
      Mon Apr 06 22:19:53 PDT 2009
      NO_QUERY
      mattycus
      @Kenichan I dived many times for the ball. Man...
      0
    
    
      3
      0
      1467811184
      Mon Apr 06 22:19:57 PDT 2009
      NO_QUERY
      ElleCTF
      my whole body feels itchy and like its on fire
      7
    
    
      4
      0
      1467811193
      Mon Apr 06 22:19:57 PDT 2009
      NO_QUERY
      Karoli
      @nationwideclass no, it's not behaving at all....
      0
    
    
      5
      0
      1467811372
      Mon Apr 06 22:20:00 PDT 2009
      NO_QUERY
      joy_wolf
      @Kwesidei not the whole crew
      0
    
    
      6
      0
      1467811592
      Mon Apr 06 22:20:03 PDT 2009
      NO_QUERY
      mybirch
      Need a hug
      0
    
    
      7
      0
      1467811594
      Mon Apr 06 22:20:03 PDT 2009
      NO_QUERY
      coZZ
      @LOLTrish hey  long time no see! Yes.. Rains a...
      4
    
    
      8
      0
      1467811795
      Mon Apr 06 22:20:05 PDT 2009
      NO_QUERY
      2Hood4Hollywood
      @Tatiana_K nope they didn't have it
      0
    
    
      9
      0
      1467812025
      Mon Apr 06 22:20:09 PDT 2009
      NO_QUERY
      mimismo
      @twittera que me muera ?
      0
    
    
      10
      0
      1467812416
      Mon Apr 06 22:20:16 PDT 2009
      NO_QUERY
      erinx3leannexo
      spring break in plain city... it's snowing
      0
    
    
      11
      0
      1467812579
      Mon Apr 06 22:20:17 PDT 2009
      NO_QUERY
      pardonlauren
      I just re-pierced my ears
      7
    
    
      12
      0
      1467812723
      Mon Apr 06 22:20:19 PDT 2009
      NO_QUERY
      TLeC
      @caregiving I couldn't bear to watch it.  And ...
      0
    
    
      13
      0
      1467812771
      Mon Apr 06 22:20:19 PDT 2009
      NO_QUERY
      robrobbierobert
      @octolinz16 It it counts, idk why I did either...
      0
    
    
      14
      0
      1467812784
      Mon Apr 06 22:20:20 PDT 2009
      NO_QUERY
      bayofwolves
      @smarrison i would've been the first, but i di...
      7
    
    
      15
      0
      1467812799
      Mon Apr 06 22:20:20 PDT 2009
      NO_QUERY
      HairByJess
      @iamjazzyfizzle I wish I got to watch it with ...
      4
    
    
      16
      0
      1467812964
      Mon Apr 06 22:20:22 PDT 2009
      NO_QUERY
      lovesongwriter
      Hollis' death scene will hurt me severely to w...
      0
    
    
      17
      0
      1467813137
      Mon Apr 06 22:20:25 PDT 2009
      NO_QUERY
      armotley
      about to file taxes
      0
    
    
      18
      0
      1467813579
      Mon Apr 06 22:20:31 PDT 2009
      NO_QUERY
      starkissed
      @LettyA ahh ive always wanted to see rent  lov...
      0
    
    
      19
      0
      1467813782
      Mon Apr 06 22:20:34 PDT 2009
      NO_QUERY
      gi_gi_bee
      @FakerPattyPattz Oh dear. Were you drinking ou...
      0
    
    
      20
      0
      1467813985
      Mon Apr 06 22:20:37 PDT 2009
      NO_QUERY
      quanvu
      @alydesigns i was out most of the day so didn'...
      4
    
    
      21
      0
      1467813992
      Mon Apr 06 22:20:38 PDT 2009
      NO_QUERY
      swinspeedx
      one of my friend called me, and asked to meet ...
      7
    
    
      22
      0
      1467814119
      Mon Apr 06 22:20:40 PDT 2009
      NO_QUERY
      cooliodoc
      @angry_barista I baked you a cake but I ated it
      0
    
    
      23
      0
      1467814180
      Mon Apr 06 22:20:40 PDT 2009
      NO_QUERY
      viJILLante
      this week is not going as i had hoped
      7
    
    
      24
      0
      1467814192
      Mon Apr 06 22:20:41 PDT 2009
      NO_QUERY
      Ljelli3166
      blagh class at 8 tomorrow
      0
    
    
      25
      0
      1467814438
      Mon Apr 06 22:20:44 PDT 2009
      NO_QUERY
      ChicagoCubbie
      I hate when I have to call and wake people up
      7
    
    
      26
      0
      1467814783
      Mon Apr 06 22:20:50 PDT 2009
      NO_QUERY
      KatieAngell
      Just going to cry myself to sleep after watchi...
      4
    
    
      27
      0
      1467814883
      Mon Apr 06 22:20:52 PDT 2009
      NO_QUERY
      gagoo
      im sad now  Miss.Lilly
      7
    
    
      28
      0
      1467815199
      Mon Apr 06 22:20:56 PDT 2009
      NO_QUERY
      abel209
      ooooh.... LOL  that leslie.... and ok I won't ...
      0
    
    
      29
      0
      1467815753
      Mon Apr 06 22:21:04 PDT 2009
      NO_QUERY
      BaptisteTheFool
      Meh... Almost Lover is the exception... this t...
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1599970
      4
      2193578196
      Tue Jun 16 08:38:54 PDT 2009
      NO_QUERY
      adbillingsley
      Thanks @eastwestchic &amp; @wangyip Thanks! Th...
      4
    
    
      1599971
      4
      2193578237
      Tue Jun 16 08:38:54 PDT 2009
      NO_QUERY
      gekkko
      @marttn thanks Martin. not the most imaginativ...
      0
    
    
      1599972
      4
      2193578269
      Tue Jun 16 08:38:54 PDT 2009
      NO_QUERY
      millerslab
      @MikeJonesPhoto Congrats Mike  Way to go!
      0
    
    
      1599973
      4
      2193578319
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      luckygeorgeblog
      http://twitpic.com/7jp4n - OMG! Office Space.....
      1
    
    
      1599974
      4
      2193578345
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      Kristah_Diggs
      @yrclndstnlvr ahaha nooo you were just away fr...
      7
    
    
      1599975
      4
      2193578347
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      CoachChic
      @BizCoachDeb  Hey, I'm baack! And, thanks so m...
      0
    
    
      1599976
      4
      2193578348
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      serianna
      @mattycus Yeah, my conscience would be clear i...
      0
    
    
      1599977
      4
      2193578386
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      TeamUKskyvixen
      @MayorDorisWolfe Thats my girl - dishing out t...
      7
    
    
      1599978
      4
      2193578395
      Tue Jun 16 08:38:55 PDT 2009
      NO_QUERY
      LaurenMoo10
      @shebbs123 i second that
      0
    
    
      1599979
      4
      2193578576
      Tue Jun 16 08:38:57 PDT 2009
      NO_QUERY
      angel_sammy04
      In the garden
      0
    
    
      1599980
      4
      2193578679
      Tue Jun 16 08:38:56 PDT 2009
      NO_QUERY
      puchal_ek
      @myheartandmind jo jen by nemuselo zrovna tÃ© ...
      0
    
    
      1599981
      4
      2193578716
      Tue Jun 16 08:38:57 PDT 2009
      NO_QUERY
      youtubelatest
      Another Commenting Contest! [;: Yay!!!  http:/...
      1
    
    
      1599982
      4
      2193578739
      Tue Jun 16 08:38:57 PDT 2009
      NO_QUERY
      Mandi_Davenport
      @thrillmesoon i figured out how to see my twee...
      0
    
    
      1599983
      4
      2193578758
      Tue Jun 16 08:38:57 PDT 2009
      NO_QUERY
      xoAurixo
      @oxhot theri tomorrow, drinking coffee, talkin...
      7
    
    
      1599984
      4
      2193578847
      Tue Jun 16 08:38:57 PDT 2009
      NO_QUERY
      RobFoxKerr
      You heard it here first -- We're having a girl...
      0
    
    
      1599985
      4
      2193578982
      Tue Jun 16 08:38:58 PDT 2009
      NO_QUERY
      LISKFEST
      if ur the lead singer in a band, beware fallin...
      6
    
    
      1599986
      4
      2193579087
      Tue Jun 16 08:38:58 PDT 2009
      NO_QUERY
      marhgil
      @tarayqueen too much ads on my blog.
      0
    
    
      1599987
      4
      2193579092
      Tue Jun 16 08:38:58 PDT 2009
      NO_QUERY
      cathriiin
      @La_r_a NEVEER  I think that you both will get...
      7
    
    
      1599988
      4
      2193579191
      Tue Jun 16 08:38:59 PDT 2009
      NO_QUERY
      tellman
      @Roy_Everitt ha- good job. that's right - we g...
      5
    
    
      1599989
      4
      2193579211
      Tue Jun 16 08:38:59 PDT 2009
      NO_QUERY
      jazzstixx
      @Ms_Hip_Hop im glad ur doing well
      7
    
    
      1599990
      4
      2193579249
      Tue Jun 16 08:38:59 PDT 2009
      NO_QUERY
      razzberry5594
      WOOOOO! Xbox is back
      0
    
    
      1599991
      4
      2193579284
      Tue Jun 16 08:38:59 PDT 2009
      NO_QUERY
      AgustinaP
      @rmedina @LaTati Mmmm  That sounds absolutely ...
      7
    
    
      1599992
      4
      2193579434
      Tue Jun 16 08:39:00 PDT 2009
      NO_QUERY
      sdancingsteph
      ReCoVeRiNg FrOm ThE lOnG wEeKeNd
      0
    
    
      1599993
      4
      2193579477
      Tue Jun 16 08:39:00 PDT 2009
      NO_QUERY
      ChloeAmisha
      @SCOOBY_GRITBOYS
      0
    
    
      1599994
      4
      2193579489
      Tue Jun 16 08:39:00 PDT 2009
      NO_QUERY
      EvolveTom
      @Cliff_Forster Yeah, that does work better tha...
      4
    
    
      1599995
      4
      2193601966
      Tue Jun 16 08:40:49 PDT 2009
      NO_QUERY
      AmandaMarie1028
      Just woke up. Having no school is the best fee...
      4
    
    
      1599996
      4
      2193601969
      Tue Jun 16 08:40:49 PDT 2009
      NO_QUERY
      TheWDBoards
      TheWDB.com - Very cool to hear old Walt interv...
      6
    
    
      1599997
      4
      2193601991
      Tue Jun 16 08:40:49 PDT 2009
      NO_QUERY
      bpbabe
      Are you ready for your MoJo Makeover? Ask me f...
      0
    
    
      1599998
      4
      2193602064
      Tue Jun 16 08:40:49 PDT 2009
      NO_QUERY
      tinydiamondz
      Happy 38th Birthday to my boo of alll time!!! ...
      7
    
    
      1599999
      4
      2193602129
      Tue Jun 16 08:40:50 PDT 2009
      NO_QUERY
      RyanTrevMorris
      happy #charitytuesday @theNSPCC @SparksCharity...
      0
    
  

1600000 rows × 7 columns



In [95]:

    
dfff = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 3],'B': [5, 6, 5, 5, 5, 7, 6]})
# df.mode()



In [73]:

    
# df.columns#
df[u'UserID'].unique()









    Out[73]:





array([u'_TheSpecialOne_', u'scotthamilton', u'mattycus', ...,
       u'EvolveTom', u'AmandaMarie1028', u'bpbabe'], dtype=object)



In [66]:

    
#Kmeans

# Clustering with some parameters.

from sklearn.cluster import KMeans
ncl = 8
k = 30
kmeans_30 = KMeans(n_clusters=ncl, init='k-means++', max_iter=100, n_init=10,random_state=0)
y_30 = kmeans_30.fit_predict(X[:,:30])
# centroids = kmeans.cluster_centers_
# labels = kmeans.labels_
# error = kmeans.inertia_
y_30









    Out[66]:





array([0, 1, 1, ..., 1, 1, 1], dtype=int32)



In [ ]:

    
# # GG=nx.Graph() # so that we dont destroy G if we start running this cell

# m=0
# n=0

# for index, row in df_.iterrows():
#     G.add_node(row[4])
#     if '@' in row[5]:
#         m+=1
#         for t in re.split('[^a-zA-Z\_\@]', row[5]):
#             if t!='' and t[0]=='@':
#                 G.add_edge(row[4],t[1:])
#                 n+=1



In [127]:

    
colors = ['b','w','r','g','c','m','y','k']
# g : green.
# r : red.
# c : cyan.
# m : magenta.
# y : yellow.
# k : black.
# w : white.



In [125]:

    
i=0
labeled_nodes = []
for x in core7.nodes():
    labeled_nodes.append( (x, 1) )
    try:
        labeled_nodes[i]=( (x,classes[x]) )
    except KeyError:
        pass
    i+=1



In [ ]:

    
labeled



In [129]:

    
fig = plt.figure(figsize=(12,6))
ax = plt.subplot(111)

# labeled_nodes = [(x,classes[x]) for x in core7.nodes()]
    
# which = np.random.choice(range(len(labeled_nodes)),500)
which = range(len(core7))
# mini_g = core7.subgraph([labeled_nodes[i][0] for i in which])
mini_g = core7
node_colors = [colors[labeled_nodes[i][1]] for i in which]

nx.draw(mini_g, node_color=node_colors,node_size=100, ax=ax, with_labels='False',
       alpha =0.2, font_size=0,width=0.1)
# nx.draw(Gc_core,nodelist=Gc_core.nodes()[:100], node_color=node_colors,node_size=100, ax=ax, with_labels='False',
#        alpha =0.2, font_size=0,width=0.1)
# nx.draw(Gc_core, node_color=node_colors,node_size=10, ax=ax, with_labels='True', font_size=16)



In [74]:

    
type(G.nodes()[0])









    Out[74]:





str



In [79]:

    
G.nodes()[1]









    Out[79]:





'elmoberry'



In [107]:

    
list(y_30).count(0)









    Out[107]:





52429



In [108]:

    
list(y_30).count(1)









    Out[108]:





1247302



In [110]:

    
for i in xrange(ncl):
    print list(y_30).count(i)



In [111]:

    
# #Kmeans

# # Clustering with some parameters.

# from sklearn.cluster import KMeans
# ncl = 20
# k = 10
# kmeans_10_100 = KMeans(n_clusters=ncl, init='k-means++', max_iter=100, n_init=10,random_state=0)
# y_10_100 = kmeans_10_100.fit_predict(X[:,:10])
# # centroids = kmeans.cluster_centers_
# # labels = kmeans.labels_
# # error = kmeans.inertia_
# y_10_100









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-111-dc0405c5162d> in <module>()
      7 k = 10
      8 kmeans_10_100 = KMeans(n_clusters=ncl, init='k-means++', max_iter=100, n_init=10,random_state=0)
----> 9 y_10_100 = kmeans_10_100.fit_predict(X[:,:10])
     10 # centroids = kmeans.cluster_centers_
     11 # labels = kmeans.labels_

/Users/km/anaconda/lib/python2.7/site-packages/sklearn/cluster/k_means_.pyc in fit_predict(self, X, y)
    828         predict(X).
    829         """
--> 830         return self.fit(X).labels_
    831 
    832     def fit_transform(self, X, y=None):

/Users/km/anaconda/lib/python2.7/site-packages/sklearn/cluster/k_means_.pyc in fit(self, X, y)
    819                 precompute_distances=self.precompute_distances,
    820                 tol=self.tol, random_state=random_state, copy_x=self.copy_x,
--> 821                 n_jobs=self.n_jobs)
    822         return self
    823 

/Users/km/anaconda/lib/python2.7/site-packages/sklearn/cluster/k_means_.pyc in k_means(X, n_clusters, init, precompute_distances, n_init, max_iter, verbose, tol, random_state, copy_x, n_jobs, return_n_iter)
    322                 X, n_clusters, max_iter=max_iter, init=init, verbose=verbose,
    323                 precompute_distances=precompute_distances, tol=tol,
--> 324                 x_squared_norms=x_squared_norms, random_state=random_state)
    325             # determine if these results are the best so far
    326             if best_inertia is None or inertia < best_inertia:

/Users/km/anaconda/lib/python2.7/site-packages/sklearn/cluster/k_means_.pyc in _kmeans_single(X, n_clusters, x_squared_norms, max_iter, init, verbose, random_state, tol, precompute_distances)
    445             _labels_inertia(X, x_squared_norms, centers,
    446                             precompute_distances=precompute_distances,
--> 447                             distances=distances)
    448 
    449         # computation of the means is also called the M-step of EM

/Users/km/anaconda/lib/python2.7/site-packages/sklearn/cluster/k_means_.pyc in _labels_inertia(X, x_squared_norms, centers, precompute_distances, distances)
    576                                                     centers, distances)
    577         inertia = _k_means._assign_labels_array(
--> 578             X, x_squared_norms, centers, labels, distances=distances)
    579     return labels, inertia
    580 

KeyboardInterrupt:



In [ ]:

    
for i in xrange(100):
    if pred[i]==1:
        print TextList[i]

	tweetID	date	Query	UserID	text
0	1467810369	Mon Apr 06 22:19:45 PDT 2009	NO_QUERY	_TheSpecialOne_	@switchfoot http://twitpic.com/2y1zl - Awww, t...
1	1467810672	Mon Apr 06 22:19:49 PDT 2009	NO_QUERY	scotthamilton	is upset that he can't update his Facebook by ...
2	1467810917	Mon Apr 06 22:19:53 PDT 2009	NO_QUERY	mattycus	@Kenichan I dived many times for the ball. Man...
3	1467811184	Mon Apr 06 22:19:57 PDT 2009	NO_QUERY	ElleCTF	my whole body feels itchy and like its on fire
4	1467811193	Mon Apr 06 22:19:57 PDT 2009	NO_QUERY	Karoli	@nationwideclass no, it's not behaving at all....

	polarity	tweetID	date	Query	UserID	text	class
103	0	1467837470	Mon Apr 06 22:26:43 PDT 2009	NO_QUERY	annette414	watching "House"	4
119	0	1467839586	Mon Apr 06 22:27:18 PDT 2009	NO_QUERY	sonyolmos	@eRRe_sC aaw i miss ya all too.. im leaving to...	4
124	0	1467840552	Mon Apr 06 22:27:34 PDT 2009	NO_QUERY	weefranniev	Late night snack, glass of OJ b/c I'm "do...	4
183	0	1467858363	Mon Apr 06 22:32:12 PDT 2009	NO_QUERY	schammy	Downloading NIN's new album "the slip&quo...	4
206	0	1467862710	Mon Apr 06 22:33:20 PDT 2009	NO_QUERY	Jemimus	My mind and body are severely protesting this ...	4
213	0	1467863684	Mon Apr 06 22:33:35 PDT 2009	NO_QUERY	DjGundam	Awwh babs... you look so sad underneith that s...	4
286	0	1467881920	Mon Apr 06 22:38:28 PDT 2009	NO_QUERY	mumu1210	FML: So much for seniority, bc of technologica...	4
319	0	1467894593	Mon Apr 06 22:41:52 PDT 2009	NO_QUERY	Tanja71	@JonathanRKnight Oh! Did I mention it? "G...	4
393	0	1467911624	Mon Apr 06 22:46:32 PDT 2009	NO_QUERY	Mati_UOIT	Sitting here wondering why "ED" stil...	4
400	0	1467913111	Mon Apr 06 22:46:57 PDT 2009	NO_QUERY	nssmom	#3 woke up and was having an accident - "...	4
473	0	1467931070	Mon Apr 06 22:52:06 PDT 2009	NO_QUERY	calee01	"On popular music" by T.W.Adorno is ...	4
492	0	1467934004	Mon Apr 06 22:52:56 PDT 2009	NO_QUERY	malice_sin	pears & Brie, bottle of Cabernet, and &quo...	4
510	0	1467943007	Mon Apr 06 22:55:30 PDT 2009	NO_QUERY	vibratoria	@stuiy never again will I click on a link that...	4
569	0	1467952985	Mon Apr 06 22:58:24 PDT 2009	NO_QUERY	mickeyness	@daniela_95616 hahaa!! i just realized "i...	4
592	0	1467962336	Mon Apr 06 23:00:55 PDT 2009	NO_QUERY	umfoo	my heart is broken every morning dropping Foo ...	4
725	0	1467992696	Mon Apr 06 23:09:35 PDT 2009	NO_QUERY	pop_corn_	I feel like I am the only "twitterer&quot...	4
742	0	1467996096	Mon Apr 06 23:10:33 PDT 2009	NO_QUERY	Beatmixology	@djsoulsister yeah, great vid. I had the 12&qu...	4
747	0	1467998037	Mon Apr 06 23:11:06 PDT 2009	NO_QUERY	amitgupta	Have an invite for "Healthy Dining" ...	4
817	0	1468013020	Mon Apr 06 23:15:30 PDT 2009	NO_QUERY	melvinchia	has a mild left inner ear infection.. and its ...	4
879	0	1468031882	Mon Apr 06 23:21:15 PDT 2009	NO_QUERY	NesbyPhips	So Im done editing "The Phipstape". ...	4
905	0	1468035396	Mon Apr 06 23:22:24 PDT 2009	NO_QUERY	noslennai	Watching "a league of their own"...m...	4
979	0	1468052239	Mon Apr 06 23:27:43 PDT 2009	NO_QUERY	ConstanceClark	@MarcusMims wow i didn't get an "hello&qu...	4
987	0	1468053198	Mon Apr 06 23:28:02 PDT 2009	NO_QUERY	grinc	Reading Buyology before bedtime... great premi...	4
990	0	1468053804	Mon Apr 06 23:28:13 PDT 2009	NO_QUERY	princeralph	omg... "The Reader" is making me	4
1044	0	1468068303	Mon Apr 06 23:32:33 PDT 2009	NO_QUERY	rob_fitzpatrick	@gracedent it's her "hair" I can't d...	4
1048	0	1468068726	Mon Apr 06 23:32:42 PDT 2009	NO_QUERY	verruca	Why isn't there a "fake" Verruca on ...	4
1071	0	1468074597	Mon Apr 06 23:34:32 PDT 2009	NO_QUERY	PeterHC	Used the term "Fail Whale" to a clie...	4
1086	0	1468076942	Mon Apr 06 23:35:15 PDT 2009	NO_QUERY	geekpondering	@transbay "SFMTA Budget Proposal Hearing:...	4
1104	0	1468080524	Mon Apr 06 23:36:22 PDT 2009	NO_QUERY	Msjackson88	still no "followers" please some1 ...	4
1204	0	1468107293	Mon Apr 06 23:45:12 PDT 2009	NO_QUERY	norawatkins	VIP guests today -________-" blohheeee ...	4
...	...	...	...	...	...	...	...
1598559	4	2193190901	Tue Jun 16 08:07:11 PDT 2009	NO_QUERY	MsSophieBelle	@KempEquine nothing wrong Mom has one for me(...	4
1598567	4	2193220591	Tue Jun 16 08:09:39 PDT 2009	NO_QUERY	katyhelena	@o0omunkieo0o Thanks! I think so too. I call h...	4
1598655	4	2193224813	Tue Jun 16 08:10:00 PDT 2009	NO_QUERY	JanayS	"i wake up it's a bad dream no one on my ...	4
1598668	4	2193251846	Tue Jun 16 08:12:11 PDT 2009	NO_QUERY	RunWithForest	@explosivityy I'm writing now. Di ko na sinas...	4
1598718	4	2193254120	Tue Jun 16 08:12:22 PDT 2009	NO_QUERY	christicox	I asked a 3yr old how old I was & he said,...	4
1598749	4	2193255731	Tue Jun 16 08:12:31 PDT 2009	NO_QUERY	jointuletz	am plecat la "la comedie"...v-astept...	4
1598770	4	2193277168	Tue Jun 16 08:14:18 PDT 2009	NO_QUERY	peachtweet	@JKL_Katie omg, i know. it's annoying! they're...	4
1598938	4	2193306490	Tue Jun 16 08:16:41 PDT 2009	NO_QUERY	Emm_Jay	@Twyst That would be great... "looking f...	4
1598969	4	2193319050	Tue Jun 16 08:17:43 PDT 2009	NO_QUERY	howardsublett	@dougalcorn: It is actually just @daveminor wa...	4
1599053	4	2193343042	Tue Jun 16 08:19:44 PDT 2009	NO_QUERY	CardboxDiva	@thejoshlynn You are! BTW send me an email. Wa...	4
1599188	4	2193371978	Tue Jun 16 08:22:04 PDT 2009	NO_QUERY	cunderwo	@meganlm i rented "dead silence" at ...	4
1599264	4	2193402232	Tue Jun 16 08:24:36 PDT 2009	NO_QUERY	krystlerb	@TANGG GT was a good movie...although I spent ...	4
1599337	4	2193426650	Tue Jun 16 08:26:37 PDT 2009	NO_QUERY	LucasSchmitt	@maryzlane that sucks. :/ i guess you have to ...	4
1599410	4	2193428898	Tue Jun 16 08:26:48 PDT 2009	NO_QUERY	caileighamazing	"Wow, What A Tight Fit" Lmao, Shutup.	4
1599418	4	2193429153	Tue Jun 16 08:26:49 PDT 2009	NO_QUERY	erikalanzer19	@brendanlover11 I really love "Starlight&...	4
1599424	4	2193450427	Tue Jun 16 08:28:32 PDT 2009	NO_QUERY	HelloGracey	Exploring the world of Twitter Listening to ...	4
1599460	4	2193452390	Tue Jun 16 08:28:42 PDT 2009	NO_QUERY	comradephil	@aw16 I must have skipped the "pun" ...	4
1599478	4	2193453100	Tue Jun 16 08:28:45 PDT 2009	NO_QUERY	jgunzz	Why don't we name tomorrow "the Official ...	4
1599557	4	2193475600	Tue Jun 16 08:30:33 PDT 2009	NO_QUERY	johnbertr	@chinkchilla there's this brilliant add-on for...	4
1599612	4	2193478589	Tue Jun 16 08:30:47 PDT 2009	NO_QUERY	teotarafas	@InesVargas 17"?! Hope you dont plan on t...	4
1599615	4	2193478782	Tue Jun 16 08:30:48 PDT 2009	NO_QUERY	MilanTeh	@brothrsaw The count would lead to shutter dea...	4
1599669	4	2193503005	Tue Jun 16 08:32:45 PDT 2009	NO_QUERY	jtsosnowski	You know there's way too much going on when yo...	4
1599682	4	2193503503	Tue Jun 16 08:32:48 PDT 2009	NO_QUERY	oscarbarber	@perequintana ara sÃ que ets tot un "pir...	4
1599702	4	2193504328	Tue Jun 16 08:32:52 PDT 2009	NO_QUERY	SweetTartelette	@ang_w It's been forever since I have had &quo...	4
1599802	4	2193529779	Tue Jun 16 08:34:56 PDT 2009	NO_QUERY	maeubhminor	woooo elliot minor in 8 days!!! and i got my E...	4
1599835	4	2193551788	Tue Jun 16 08:36:44 PDT 2009	NO_QUERY	annamadeleine	@alexandervelky that's polite version - i only...	4
1599840	4	2193552024	Tue Jun 16 08:36:44 PDT 2009	NO_QUERY	bdottie	What a pretty day "Just smile"	4
1599946	4	2193577228	Tue Jun 16 08:38:49 PDT 2009	NO_QUERY	Sirley	@chriscuzzy someone wanted a "Cuzzy"...	4
1599977	4	2193578386	Tue Jun 16 08:38:55 PDT 2009	NO_QUERY	TeamUKskyvixen	@MayorDorisWolfe Thats my girl - dishing out t...	4
1599985	4	2193578982	Tue Jun 16 08:38:58 PDT 2009	NO_QUERY	LISKFEST	if ur the lead singer in a band, beware fallin...	4

	polarity	tweetID	date	Query	UserID	text	class
99	0	1467836859	Mon Apr 06 22:26:33 PDT 2009	NO_QUERY	willy_chaz	A bad nite for the favorite teams: Astros and ...	5
122	0	1467840016	Mon Apr 06 22:27:25 PDT 2009	NO_QUERY	BustaBusta	I know my life has been flipped upside down wh...	5
160	0	1467853356	Mon Apr 06 22:30:54 PDT 2009	NO_QUERY	dbmendel	Picked Mich St to win it all from the get go. ...	5
178	0	1467857297	Mon Apr 06 22:31:56 PDT 2009	NO_QUERY	amanda5280	Today I realized I am too good at hiding thing...	5
192	0	1467859820	Mon Apr 06 22:32:36 PDT 2009	NO_QUERY	msbutt3rfly14	spencer is not a good guy.	5
219	0	1467871040	Mon Apr 06 22:35:31 PDT 2009	NO_QUERY	MTLarson1224	@DonnieWahlberg I hope i can make it to the au...	5
222	0	1467871545	Mon Apr 06 22:35:40 PDT 2009	NO_QUERY	Cherye101	@PaulaAbdul awww, Good luck Paula!! Please don...	5
290	0	1467882902	Mon Apr 06 22:38:44 PDT 2009	NO_QUERY	usagiko	@LevenRambin: Take it easy, and be good to you.	5
298	0	1467889791	Mon Apr 06 22:40:33 PDT 2009	NO_QUERY	jennhelvering	Just called Hillsong again - they said they co...	5
320	0	1467894600	Mon Apr 06 22:41:51 PDT 2009	NO_QUERY	dreaaa	throat is closing up and i had some string che...	5
348	0	1467899025	Mon Apr 06 22:43:06 PDT 2009	NO_QUERY	oup	still sick. feeling a bit better, got some new...	5
441	0	1467924690	Mon Apr 06 22:50:17 PDT 2009	NO_QUERY	FlyRice	Good GOD they ruined my belly button!!!	5
444	0	1467925657	Mon Apr 06 22:50:34 PDT 2009	NO_QUERY	aisyahsamsudin	running nose + spinning head = not a good comb...	5
470	0	1467930341	Mon Apr 06 22:51:53 PDT 2009	NO_QUERY	GemDoughnut	MORNING!!! Good im bloody knackered!!! Work is...	5
475	0	1467931501	Mon Apr 06 22:52:13 PDT 2009	NO_QUERY	soulonfire68	We've been good. I'm not liking the snow right...	5
508	0	1467937402	Mon Apr 06 22:53:55 PDT 2009	NO_QUERY	haunter_	@Houndour ...i wish i was there...i'm pretty g...	5
532	0	1467947005	Mon Apr 06 22:56:40 PDT 2009	NO_QUERY	tamisara	Good morning! Ready 2 go, but I want 2 go back...	5
567	0	1467952123	Mon Apr 06 22:58:08 PDT 2009	NO_QUERY	TurkishDelite	@JonathanRKnight Good Knight hun! Looking forw...	5
603	0	1467964229	Mon Apr 06 23:01:26 PDT 2009	NO_QUERY	farty_brando	@greggrunberg hey you said matt was gonna go a...	5
653	0	1467979491	Mon Apr 06 23:05:45 PDT 2009	NO_QUERY	TeresaUlring	@PaulColes hmmm...greed is good when it motiv...	5
689	0	1467985114	Mon Apr 06 23:07:22 PDT 2009	NO_QUERY	gladyschock	not feeling v good abt myself	5
745	0	1467997236	Mon Apr 06 23:10:55 PDT 2009	NO_QUERY	alyssaisrad916	UpdatingFFE. That gives me nothing to do for ...	5
746	0	1467997817	Mon Apr 06 23:11:03 PDT 2009	NO_QUERY	bellalucia	Hot compress not rily workng for pulled muscle...	5
782	0	1468004641	Mon Apr 06 23:12:59 PDT 2009	NO_QUERY	kow_shik	@vivekg86 Good to hear that we have support in...	5
791	0	1468006362	Mon Apr 06 23:13:30 PDT 2009	NO_QUERY	woflln	Ugh can't sleep. Wish i had a good cuddle to ...	5
869	0	1468030831	Mon Apr 06 23:20:55 PDT 2009	NO_QUERY	StrAbZ	good morning everybody! pkoi y fais pas beau ...	5
910	0	1468035840	Mon Apr 06 23:22:31 PDT 2009	NO_QUERY	Catmoo	@Sephystryx I've been looking about for good s...	5
1003	0	1468060375	Mon Apr 06 23:30:10 PDT 2009	NO_QUERY	BrandesAsh	'study group extraordinare' about to leave cam...	5
1026	0	1468063973	Mon Apr 06 23:31:15 PDT 2009	NO_QUERY	HilaryBays	@BenPritchett goodness me, how did you find me...	5
1028	0	1468064339	Mon Apr 06 23:31:18 PDT 2009	NO_QUERY	Jenoah1908	my poor little girl has a baaaad rash on her b...	5
...	...	...	...	...	...	...	...
1599486	4	2193453323	Tue Jun 16 08:28:46 PDT 2009	NO_QUERY	janiecwales	@fudgecrumpet Oh good, it will be good to get ...	5
1599511	4	2193454228	Tue Jun 16 08:28:51 PDT 2009	NO_QUERY	panicxx	Good cinema exam!!	5
1599527	4	2193474286	Tue Jun 16 08:30:27 PDT 2009	NO_QUERY	siobhandemers	Good Morning!! I'm making cookies today	5
1599546	4	2193475013	Tue Jun 16 08:30:30 PDT 2009	NO_QUERY	nataliefyffe	I totally rocked drunken Rockband! Nothin' bet...	5
1599589	4	2193477152	Tue Jun 16 08:30:40 PDT 2009	NO_QUERY	jhollenbach	@MikeRuocco good luck	5
1599592	4	2193477292	Tue Jun 16 08:30:41 PDT 2009	NO_QUERY	kenyonknight	Hmm I dreamed that I woke up in good ol'New Ha...	5
1599609	4	2193478306	Tue Jun 16 08:30:46 PDT 2009	NO_QUERY	loyalwolf06	is SO excited for The Fray concert today!!!!!!...	5
1599614	4	2193478763	Tue Jun 16 08:30:48 PDT 2009	NO_QUERY	Nelinski_254	#mw2 Click on more from this user.. and rate m...	5
1599629	4	2193501365	Tue Jun 16 08:32:37 PDT 2009	NO_QUERY	pianogeek	Good morning everyone! And to my fabulous UK ...	5
1599642	4	2193501998	Tue Jun 16 08:32:40 PDT 2009	NO_QUERY	PaulHarriott	@hype6477 Good to hear all's cool. I've just s...	5
1599656	4	2193502444	Tue Jun 16 08:32:42 PDT 2009	NO_QUERY	ireneagh	@pinkpeony_etsy you have a lovely shop on etsy...	5
1599660	4	2193502743	Tue Jun 16 08:32:44 PDT 2009	NO_QUERY	Ritter33	rain is good	5
1599690	4	2193503753	Tue Jun 16 08:32:49 PDT 2009	NO_QUERY	mommyperks	@Travelwthemagic Good morning, here. Still jus...	5
1599762	4	2193527876	Tue Jun 16 08:34:47 PDT 2009	NO_QUERY	MKeithHarris	@denniswords good times indeed	5
1599768	4	2193528075	Tue Jun 16 08:34:48 PDT 2009	NO_QUERY	rach3lizabeth	Good morning everyone.	5
1599782	4	2193528659	Tue Jun 16 08:34:51 PDT 2009	NO_QUERY	annaasaywhat	http://bln.kr/T1/ good song (Y)	5
1599821	4	2193551359	Tue Jun 16 08:36:41 PDT 2009	NO_QUERY	HildeM_EN	@Piewacket1 good sometimes a shot attention ...	5
1599825	4	2193551473	Tue Jun 16 08:36:42 PDT 2009	NO_QUERY	james_bertram	@tonyhawk Yess! Good choice of laptop there, t...	5
1599827	4	2193551560	Tue Jun 16 08:36:42 PDT 2009	NO_QUERY	JadoreConcierge	Good morning Chicago, I love this city!	5
1599828	4	2193551571	Tue Jun 16 08:36:42 PDT 2009	NO_QUERY	crside	@dfinchalicious It ends up like that when goo...	5
1599841	4	2193552033	Tue Jun 16 08:36:44 PDT 2009	NO_QUERY	josephranseth	Good morning everyone! I hope you take some ti...	5
1599850	4	2193552448	Tue Jun 16 08:36:46 PDT 2009	NO_QUERY	jenniehager	sick, sick, sick today, but still fingers cros...	5
1599856	4	2193552668	Tue Jun 16 08:36:48 PDT 2009	NO_QUERY	CintiaXimena	taking a twit break...a lil blown about my bos...	5
1599869	4	2193553340	Tue Jun 16 08:36:51 PDT 2009	NO_QUERY	nirvannah	@carahsollins Good Morning Lady	5
1599875	4	2193553559	Tue Jun 16 08:36:52 PDT 2009	NO_QUERY	MichiTheReal	@ashleytisdale hih good morning in austria it...	5
1599907	4	2193575210	Tue Jun 16 08:38:39 PDT 2009	NO_QUERY	themunny	@gabespears morning	5
1599916	4	2193575737	Tue Jun 16 08:38:41 PDT 2009	NO_QUERY	MzGr33n4ppL3	Hello good morning! i wanna do something today.	5
1599937	4	2193576655	Tue Jun 16 08:38:46 PDT 2009	NO_QUERY	eratyptin	@siahoney I am good thanks! How is #Eric, I...	5
1599988	4	2193579191	Tue Jun 16 08:38:59 PDT 2009	NO_QUERY	tellman	@Roy_Everitt ha- good job. that's right - we g...	5
1599994	4	2193579489	Tue Jun 16 08:39:00 PDT 2009	NO_QUERY	EvolveTom	@Cliff_Forster Yeah, that does work better tha...	5