notebook.community

Edit and run



In [2]:

    
%pylab









    



Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib



In [3]:

    
import matplotlib.pyplot as plt



In [4]:

    
import pandas as pd



In [5]:

    
from sentiment_demo import regularize









    



['conservative', 'labour', 'libdem', 'snp', 'ukip']
[0.41583143086816693, 0.4480580568720382, 0.5156653710247345, 0.4822474402730382, 0.48965395526459465]
[-0.42693501683501484, -0.4119061813186813, -0.3948863636363632, -0.43810431965442703, -0.43806816693944417]



In [6]:

    
import csv



In [7]:

    
infile = open('labeled_tweets.csv', encoding='utf8')
reader = csv.reader(infile)



In [8]:

    
regular = regularize(reader)



In [9]:

    
regular[0]









    Out[9]:





['conservative', 0.1779]



In [10]:

    
df = pd.DataFrame(regular, columns=['party', 'score'])



In [11]:

    
df









    Out[11]:






  
    
      
      party
      score
    
  
  
    
      0
      conservative
      0.1779
    
    
      1
      ukip
      0.4588
    
    
      2
      ukip
      0.3802
    
    
      3
      labour
      -0.4389
    
    
      4
      conservative
      -0.4389
    
    
      5
      conservative
      0.0258
    
    
      6
      labour
      0.2960
    
    
      7
      conservative
      0.2960
    
    
      8
      snp
      -0.4019
    
    
      9
      labour
      -0.4019
    
    
      10
      snp
      0.2732
    
    
      11
      labour
      0.2732
    
    
      12
      conservative
      0.2732
    
    
      13
      conservative
      0.8126
    
    
      14
      snp
      0.6956
    
    
      15
      labour
      0.6956
    
    
      16
      snp
      0.6996
    
    
      17
      labour
      0.4019
    
    
      18
      conservative
      0.0258
    
    
      19
      snp
      0.2003
    
    
      20
      conservative
      0.2003
    
    
      21
      libdem
      -0.2263
    
    
      22
      conservative
      -0.2263
    
    
      23
      snp
      0.3182
    
    
      24
      labour
      0.5267
    
    
      25
      snp
      0.0772
    
    
      26
      labour
      0.0772
    
    
      27
      snp
      -0.8225
    
    
      28
      labour
      -0.8225
    
    
      29
      conservative
      -0.8225
    
    
      ...
      ...
      ...
    
    
      21894
      labour
      -0.2960
    
    
      21895
      snp
      0.3818
    
    
      21896
      ukip
      0.0972
    
    
      21897
      snp
      0.0516
    
    
      21898
      libdem
      0.0516
    
    
      21899
      ukip
      0.0516
    
    
      21900
      labour
      0.0516
    
    
      21901
      conservative
      0.0516
    
    
      21902
      conservative
      -0.4404
    
    
      21903
      labour
      0.8519
    
    
      21904
      libdem
      0.8885
    
    
      21905
      snp
      -0.4404
    
    
      21906
      labour
      -0.4404
    
    
      21907
      labour
      0.4019
    
    
      21908
      conservative
      0.4019
    
    
      21909
      ukip
      0.8889
    
    
      21910
      ukip
      0.0772
    
    
      21911
      ukip
      0.3612
    
    
      21912
      labour
      0.4019
    
    
      21913
      conservative
      0.4019
    
    
      21914
      conservative
      -0.5574
    
    
      21915
      conservative
      -0.2960
    
    
      21916
      ukip
      0.3182
    
    
      21917
      conservative
      0.5574
    
    
      21918
      labour
      -0.7500
    
    
      21919
      conservative
      -0.7500
    
    
      21920
      snp
      -0.2960
    
    
      21921
      labour
      -0.2960
    
    
      21922
      ukip
      0.2732
    
    
      21923
      snp
      -0.1027
    
  

21924 rows × 2 columns



In [11]:

    
grouped = df['score'].groupby(df['party'])



In [12]:

    
grouped.describe()









    Out[12]:





party              
conservative  count    2135.000000
              mean        0.064120
              std         0.481144
              min        -0.982100
              25%        -0.340000
              50%         0.128000
              75%         0.473900
              max         0.957100
labour        count    1572.000000
              mean        0.049805
              std         0.477151
              min        -0.927400
              25%        -0.421500
              50%         0.140600
              75%         0.458800
              max         0.928600
libdem        count     503.000000
              mean        0.117412
              std         0.497660
              min        -0.881900
              25%        -0.361200
              50%         0.202300
              75%         0.624900
              max         0.932500
snp           count    1049.000000
              mean        0.076029
              std         0.502835
              min        -0.953800
              25%        -0.416600
              50%         0.153100
              75%         0.510600
              max         0.911700
ukip          count    3055.000000
              mean        0.118565
              std         0.505380
              min        -0.943500
              25%        -0.340000
              50%         0.275500
              75%         0.525500
              max         0.941300
dtype: float64



In [18]:

    
grouped.mad()









    Out[18]:





party
conservative    0.414290
labour          0.428085
libdem          0.452359
snp             0.455146
ukip            0.447803
dtype: float64



In [17]:

    
grouped.median()









    Out[17]:





party
conservative    0.1280
labour          0.1406
libdem          0.2023
snp             0.1531
ukip            0.2755
Name: score, dtype: float64



In [21]:

    
grouped.nlargest(20)









    Out[21]:





party             
conservative  6148    0.9571
              6243    0.9571
              6310    0.9571
              6402    0.9571
              6752    0.9571
              7744    0.9571
              7776    0.9571
              8076    0.9571
              1708    0.9413
              2342    0.9410
              2716    0.9410
              7091    0.9410
              5525    0.9319
              1811    0.9186
              1513    0.9133
              1203    0.9100
              2758    0.9100
              1004    0.9062
              1488    0.9062
              1672    0.9062
labour        1296    0.9286
              2026    0.9286
              4058    0.9286
              2128    0.8910
              1171    0.8777
              805     0.8674
              939     0.8674
              4254    0.8674
              4158    0.8519
              4536    0.8519
                       ...  
snp           124     0.8814
              374     0.8814
              6849    0.8807
              2220    0.8730
              6768    0.8442
              8132    0.8360
              1929    0.8225
              1630    0.8168
              2240    0.8151
              2372    0.8151
ukip          6400    0.9413
              1766    0.9348
              6301    0.9303
              7317    0.9303
              2582    0.9222
              7327    0.9200
              4541    0.9140
              6538    0.9076
              6982    0.9076
              7089    0.9076
              7494    0.9076
              7554    0.9076
              7694    0.9076
              8141    0.9076
              8146    0.9076
              8183    0.9076
              8188    0.9076
              8276    0.9076
              81      0.9005
              8144    0.9005
dtype: float64



In [22]:

    
fig = plt.figure()



In [23]:

    
ax = fig.add_subplot(111)



In [24]:

    
df.plot()









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x11020e208>



In [38]:

    
for name, group in grouped:
    plt.plot(sorted(group), label=name)
plt.legend(loc='best')









    Out[38]:





<matplotlib.legend.Legend at 0x119688b70>



In [42]:

    
grouped.quantile(0.5)









    Out[42]:





party
conservative    0.1280
labour          0.1406
libdem          0.2023
snp             0.1531
ukip            0.2755
dtype: float64



In [62]:

    
for name, group in grouped:
    group.plot(kind='hist')



In [65]:

    
l = [group for name, group in grouped]



In [71]:

    
s = sorted(l[0])



In [78]:

    
counts = pd.Series(s)



In [83]:

    
plt.hist(counts.value_counts(), bins=20)









    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-83-52f972626bef> in <module>()
----> 1 plt.hist(counts.value_counts(), bins=20)

/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/matplotlib/pyplot.py in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, **kwargs)
   2894                       histtype=histtype, align=align, orientation=orientation,
   2895                       rwidth=rwidth, log=log, color=color, label=label,
-> 2896                       stacked=stacked, **kwargs)
   2897         draw_if_interactive()
   2898     finally:

/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/matplotlib/axes/_axes.py in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
   5602         # Massage 'x' for processing.
   5603         # NOTE: Be sure any changes here is also done below to 'weights'
-> 5604         if isinstance(x, np.ndarray) or not iterable(x[0]):
   5605             # TODO: support masked arrays;
   5606             x = np.asarray(x)

/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/series.py in __getitem__(self, key)
    512     def __getitem__(self, key):
    513         try:
--> 514             result = self.index.get_value(self, key)
    515 
    516             if not np.isscalar(result):

/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/index.py in get_value(self, series, key)
   2761 
   2762         k = _values_from_object(key)
-> 2763         loc = self.get_loc(k)
   2764         new_values = _values_from_object(series)[loc]
   2765 

/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/index.py in get_loc(self, key, method)
   2818         except (TypeError, NotImplementedError):
   2819             pass
-> 2820         return super(Float64Index, self).get_loc(key, method=method)
   2821 
   2822     @property

/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/index.py in get_loc(self, key, method)
   1435         """
   1436         if method is None:
-> 1437             return self._engine.get_loc(_values_from_object(key))
   1438 
   1439         indexer = self.get_indexer([key], method=method)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()

pandas/hashtable.pyx in pandas.hashtable.Float64HashTable.get_item (pandas/hashtable.c:9964)()

pandas/hashtable.pyx in pandas.hashtable.Float64HashTable.get_item (pandas/hashtable.c:9902)()

KeyError: 0.0



In [ ]:

	party	score
0	conservative	0.1779
1	ukip	0.4588
2	ukip	0.3802
3	labour	-0.4389
4	conservative	-0.4389
5	conservative	0.0258
6	labour	0.2960
7	conservative	0.2960
8	snp	-0.4019
9	labour	-0.4019
10	snp	0.2732
11	labour	0.2732
12	conservative	0.2732
13	conservative	0.8126
14	snp	0.6956
15	labour	0.6956
16	snp	0.6996
17	labour	0.4019
18	conservative	0.0258
19	snp	0.2003
20	conservative	0.2003
21	libdem	-0.2263
22	conservative	-0.2263
23	snp	0.3182
24	labour	0.5267
25	snp	0.0772
26	labour	0.0772
27	snp	-0.8225
28	labour	-0.8225
29	conservative	-0.8225
...	...	...
21894	labour	-0.2960
21895	snp	0.3818
21896	ukip	0.0972
21897	snp	0.0516
21898	libdem	0.0516
21899	ukip	0.0516
21900	labour	0.0516
21901	conservative	0.0516
21902	conservative	-0.4404
21903	labour	0.8519
21904	libdem	0.8885
21905	snp	-0.4404
21906	labour	-0.4404
21907	labour	0.4019
21908	conservative	0.4019
21909	ukip	0.8889
21910	ukip	0.0772
21911	ukip	0.3612
21912	labour	0.4019
21913	conservative	0.4019
21914	conservative	-0.5574
21915	conservative	-0.2960
21916	ukip	0.3182
21917	conservative	0.5574
21918	labour	-0.7500
21919	conservative	-0.7500
21920	snp	-0.2960
21921	labour	-0.2960
21922	ukip	0.2732
21923	snp	-0.1027