In [2]:
import pandas as pd

old_data = pd.DataFrame.from_csv('./Documents/Cornell/Courses/MPS Project/polarity_specific.csv')
list_pos = []
list_neg = []
list_neu = []
list_com = []

for i in range(1,old_data.shape[1]):
    if i % 4 == 1:
        list_pos.append(old_data.ix[:,i])
    elif i % 4 == 2:
        list_neg.append(old_data.ix[:,i])
    elif i % 4 == 3:
        list_neu.append(old_data.ix[:,i])
    elif i % 4 == 0:
        list_com.append(old_data.ix[:,i])

positive = pd.DataFrame(list_pos).T
# del positive['Date']
# del positive[0]

negative = pd.DataFrame(list_neg).T
neutral = pd.DataFrame(list_neu).T
compound = pd.DataFrame(list_com).T

output = pd.DataFrame([positive.mean(axis=1),negative.mean(axis=1),neutral.mean(axis=1),compound.mean(axis=1)],
                  ['pos','neg','neutral','compound']).T
# positive.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/positive.csv")
# negative.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/negative.csv")
# neutral.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/neutral.csv")
# compound.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/compound.csv")
output


Out[2]:
pos neg neutral compound
Date
2008-08-08 0.21636 0.74724 -0.365216 0.037875
2008-08-11 0.18980 0.73488 -0.161536 0.065083
2008-08-12 0.17504 0.76584 -0.241532 0.054625
2008-08-13 0.15856 0.78348 -0.155804 0.060417
2008-08-14 0.19188 0.71576 -0.176548 0.088500
2008-08-15 0.17156 0.73848 -0.137756 0.093833
2008-08-18 0.16352 0.79444 -0.214200 0.041958
2008-08-19 0.18964 0.76348 -0.280764 0.048792
2008-08-20 0.17536 0.79080 -0.248228 0.035250
2008-08-21 0.24132 0.71196 -0.309208 0.039917
2008-08-22 0.13768 0.76808 -0.082176 0.094333
2008-08-25 0.27320 0.71380 -0.424844 0.013458
2008-08-26 0.29960 0.62504 -0.311940 0.078500
2008-08-27 0.16816 0.77480 -0.214812 0.059417
2008-08-28 0.16172 0.71732 -0.092788 0.118333
2008-08-29 0.12712 0.79644 -0.106840 0.079625
2008-09-02 0.25508 0.69404 -0.310864 0.053042
2008-09-03 0.15836 0.78608 -0.154856 0.057875
2008-09-04 0.21512 0.69448 -0.185832 0.088708
2008-09-05 0.18676 0.73996 -0.255892 0.067083
2008-09-08 0.23064 0.73088 -0.339516 0.040042
2008-09-09 0.19412 0.75328 -0.281364 0.049792
2008-09-10 0.21476 0.74068 -0.304908 0.046417
2008-09-11 0.17324 0.78456 -0.166008 0.039042
2008-09-12 0.17020 0.78308 -0.185520 0.048667
2008-09-15 0.19844 0.71964 -0.218304 0.085292
2008-09-16 0.23060 0.71696 -0.356356 0.050667
2008-09-17 0.19036 0.77364 -0.230880 0.037500
2008-09-18 0.23840 0.74244 -0.335052 0.019958
2008-09-19 0.22080 0.74600 -0.293256 0.034500
... ... ... ... ...
2016-05-20 0.12736 0.81020 -0.175188 0.065042
2016-05-23 0.21744 0.72756 -0.319948 0.054417
2016-05-24 0.10420 0.82012 -0.141816 0.078875
2016-05-25 0.22692 0.74448 -0.343788 0.029750
2016-05-26 0.14748 0.81412 -0.237692 0.040042
2016-05-27 0.14516 0.79352 -0.249088 0.053292
2016-05-31 0.15692 0.77024 -0.164092 0.068917
2016-06-01 0.17928 0.76004 -0.358628 0.063167
2016-06-02 0.22424 0.70460 -0.274064 0.074083
2016-06-03 0.10036 0.85216 -0.122316 0.046625
2016-06-06 0.16564 0.75676 -0.216096 0.080750
2016-06-07 0.12168 0.82016 -0.125652 0.060667
2016-06-08 0.14196 0.74400 -0.097836 0.115417
2016-06-09 0.10656 0.81352 -0.112252 0.083208
2016-06-10 0.13060 0.80348 -0.168208 0.063500
2016-06-13 0.18340 0.70224 -0.155468 0.117708
2016-06-14 0.12180 0.80804 -0.173084 0.073042
2016-06-15 0.19164 0.76648 -0.304884 0.043583
2016-06-16 0.15096 0.81296 -0.294940 0.035458
2016-06-17 0.21112 0.71960 -0.295452 0.072250
2016-06-20 0.21044 0.72260 -0.307364 0.069750
2016-06-21 0.18456 0.75740 -0.357180 0.060417
2016-06-22 0.10736 0.80848 -0.084284 0.080458
2016-06-23 0.15228 0.77068 -0.182704 0.073000
2016-06-24 0.08844 0.86088 -0.060040 0.052708
2016-06-27 0.17112 0.74460 -0.142552 0.081375
2016-06-28 0.14528 0.75460 -0.052576 0.084458
2016-06-29 0.19696 0.68536 -0.151972 0.122708
2016-06-30 0.19316 0.73576 -0.292040 0.074000
2016-07-01 0.20772 0.71320 -0.362280 0.082375

1989 rows × 4 columns


In [12]:
# ori_price = pd.DataFrame.from_csv('./Documents/Cornell/Courses/MPS Project/DJIA_table.csv',index_col=0)
# ori_price
positive.describe()
negative.describe()
neutral.describe()
compound.describe()


Out[12]:
comTop1 comTop2 comTop3 comTop4 comTop5 comTop6 comTop7 comTop8 comTop9 comTop10 ... comTop16 comTop17 comTop18 comTop19 comTop20 comTop21 comTop22 comTop23 comTop24 comTop25
count 1989.000000 1989.000000 1989.000000 1989.000000 1989.00000 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000 ... 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000 1989.000000
mean -0.195257 -0.244356 -0.206016 -0.218765 -0.23567 -0.209543 -0.232516 -0.227572 -0.229615 -0.237790 ... -0.216250 -0.227294 -0.216468 -0.207204 -0.217184 -0.242517 -0.225665 -0.231830 -0.208458 -0.229798
std 0.464838 0.449513 0.453901 0.457649 0.45464 0.449819 0.449859 0.445318 0.447302 0.448638 ... 0.435431 0.439118 0.438239 0.444243 0.441309 0.433941 0.436844 0.435298 0.426768 0.435107
min -0.973200 -0.989800 -0.978100 -0.980800 -0.98250 -0.976400 -0.989600 -0.982600 -0.966600 -0.975200 ... -0.975800 -0.981400 -0.982500 -0.966300 -0.968200 -0.978600 -0.978800 -0.979400 -0.975000 -0.974100
25% -0.599400 -0.636900 -0.599400 -0.624900 -0.62490 -0.585900 -0.624900 -0.599400 -0.599400 -0.624900 ... -0.599400 -0.599400 -0.599400 -0.585900 -0.585900 -0.612400 -0.599400 -0.612400 -0.571900 -0.599400
50% -0.153100 -0.296000 -0.202300 -0.226300 -0.27320 -0.226300 -0.265600 -0.250000 -0.250000 -0.273200 ... -0.226300 -0.226300 -0.226300 -0.202300 -0.226300 -0.250000 -0.250000 -0.250000 -0.128000 -0.250000
75% 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
max 0.951700 0.962300 0.950900 0.952400 0.93310 0.950900 0.923100 0.949300 0.944200 0.921700 ... 0.911800 0.921700 0.941300 0.933700 0.915300 0.960800 0.926000 0.932400 0.945100 0.920100

8 rows × 25 columns


In [ ]: