Libs


In [64]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from datetime import date, timedelta,datetime
import scipy
import os
import math
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [88]:
def calc_score(self):
        """
        Invented in 1927 by Edwin B. Wilson
        (http://www.evanmiller.org/how-not-to-sort-by-average-rating.html)

        ****
        Deprecated, as this calculation is moved into SQL
        ****
        """
        n = self.allvotes()
        if n == 0:
            return 0
        pos = self.upvotes_count()
        z = 1.96
        p = 1.0 * pos / n
        score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
        return score

In [105]:
def calc_score(num_sents,positive):
        """
        Invented in 1927 by Edwin B. Wilson
        (http://www.evanmiller.org/how-not-to-sort-by-average-rating.html)

        ****
        Deprecated, as this calculation is moved into SQL
        ****
        """
        n = num_sents
        if n == 0:
            return 0
        pos = positive
        z = 1.96
        p = 1.0 * pos / n
        score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
        return score

In [106]:
#wilson=calc_score()
calc_score(10,3)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-106-ec889281165a> in <module>()
      1 #wilson=calc_score()
----> 2 calc_score(10,3)

<ipython-input-105-cced03bc4506> in calc_score(num_sents, positive)
     14         z = 1.96
     15         p = 1.0 * pos / n
---> 16         score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
     17         return score

NameError: global name 'sqrt' is not defined

In [94]:
# location of data
sent_results=pd.read_csv('../../output/data.csv')

Plot polarity (no normalization)


In [82]:
plt.plot(sent_results['paragraph'],sent_results[' polarity'])
plt.xlabel('paragraph')
plt.ylabel('polarity')


Out[82]:
<matplotlib.text.Text at 0xeffc0f0>

Plot polarity (normalized by 'count')


In [83]:
plt.plot(sent_results['paragraph'],sent_results[' polarity']/sent_results[' count'])
plt.xlabel('paragraph')
plt.ylabel('polarity (normalised)')


Out[83]:
<matplotlib.text.Text at 0x1041aac8>

Plot positive and negative word counts


In [86]:
plt.plot(sent_results['paragraph'],sent_results[' negative'],label='negative')
plt.plot(sent_results['paragraph'],sent_results[' positive'],label='positive')
plt.legend()
plt.xlabel('paragraph')


Out[86]:
<matplotlib.text.Text at 0x10a365f8>

Plot positive and negative word counts (normalized by 'count')


In [85]:
plt.plot(sent_results['paragraph'],sent_results[' negative']/sent_results[' count'],label='negative')
plt.plot(sent_results['paragraph'],sent_results[' positive']/sent_results[' count'],label='positive')
plt.legend()
plt.xlabel('paragraph')


Out[85]:
<matplotlib.text.Text at 0x1093a358>

In [87]:
sent_results


Out[87]:
paragraph count polarity positive negative
0 0 607 0.125000 9 7
1 1 1743 -0.176471 7 10
2 2 1625 0.043478 12 11
3 3 394 0.250000 5 3
4 4 984 0.157895 11 8
5 5 547 0.600000 8 2
6 6 799 0.230769 8 5
7 7 1276 0.750000 14 2
8 8 961 0.428571 10 4
9 9 554 -0.333333 2 4
10 10 1010 0.647059 14 3
11 11 419 0.000000 4 4
12 12 1245 -0.125000 7 9
13 13 595 0.333333 10 5
14 14 3472 0.155556 26 19
15 15 433 0.333333 4 2
16 16 362 -1.000000 0 4
17 17 243 1.000000 3 0
18 18 123 -0.999999 0 1
19 19 1144 0.000000 7 7
20 20 878 0.363636 15 7
21 21 506 0.142857 4 3
22 22 762 -0.272727 4 7
23 23 669 0.111111 5 4
24 24 316 -0.250000 3 5
25 25 287 -0.999999 0 1
26 26 243 -0.333333 1 2
27 27 326 0.166667 7 5
28 28 415 -0.571429 3 11
29 29 700 0.066667 8 7
30 30 277 1.000000 3 0
31 31 143 -0.333333 1 2
32 32 240 -0.200000 2 3
33 33 312 0.000000 3 3
34 34 308 -0.428571 2 5
35 35 507 0.200000 6 4
36 36 647 0.000000 5 5
37 37 675 -0.384615 4 9
38 38 414 0.636364 9 2
39 39 204 0.500000 3 1
40 40 241 -0.400000 3 7
41 41 332 0.400000 7 3
42 42 482 0.166667 7 5
43 43 168 0.333333 2 1
44 44 447 -0.200000 2 3
45 45 405 0.428571 5 2
46 46 124 1.000000 2 0

In [ ]: