In [2]:
import pandas as pd
old_data = pd.DataFrame.from_csv('./Documents/Cornell/Courses/MPS Project/polarity_specific.csv')
list_pos = []
list_neg = []
list_neu = []
list_com = []
for i in range(1,old_data.shape[1]):
if i % 4 == 1:
list_pos.append(old_data.ix[:,i])
elif i % 4 == 2:
list_neg.append(old_data.ix[:,i])
elif i % 4 == 3:
list_neu.append(old_data.ix[:,i])
elif i % 4 == 0:
list_com.append(old_data.ix[:,i])
positive = pd.DataFrame(list_pos).T
# del positive['Date']
# del positive[0]
negative = pd.DataFrame(list_neg).T
neutral = pd.DataFrame(list_neu).T
compound = pd.DataFrame(list_com).T
output = pd.DataFrame([positive.mean(axis=1),negative.mean(axis=1),neutral.mean(axis=1),compound.mean(axis=1)],
['pos','neg','neutral','compound']).T
# positive.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/positive.csv")
# negative.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/negative.csv")
# neutral.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/neutral.csv")
# compound.to_csv("./Documents/Cornell/Courses/MPS Project/Matlab/compound.csv")
output
Out[2]:
pos
neg
neutral
compound
Date
2008-08-08
0.21636
0.74724
-0.365216
0.037875
2008-08-11
0.18980
0.73488
-0.161536
0.065083
2008-08-12
0.17504
0.76584
-0.241532
0.054625
2008-08-13
0.15856
0.78348
-0.155804
0.060417
2008-08-14
0.19188
0.71576
-0.176548
0.088500
2008-08-15
0.17156
0.73848
-0.137756
0.093833
2008-08-18
0.16352
0.79444
-0.214200
0.041958
2008-08-19
0.18964
0.76348
-0.280764
0.048792
2008-08-20
0.17536
0.79080
-0.248228
0.035250
2008-08-21
0.24132
0.71196
-0.309208
0.039917
2008-08-22
0.13768
0.76808
-0.082176
0.094333
2008-08-25
0.27320
0.71380
-0.424844
0.013458
2008-08-26
0.29960
0.62504
-0.311940
0.078500
2008-08-27
0.16816
0.77480
-0.214812
0.059417
2008-08-28
0.16172
0.71732
-0.092788
0.118333
2008-08-29
0.12712
0.79644
-0.106840
0.079625
2008-09-02
0.25508
0.69404
-0.310864
0.053042
2008-09-03
0.15836
0.78608
-0.154856
0.057875
2008-09-04
0.21512
0.69448
-0.185832
0.088708
2008-09-05
0.18676
0.73996
-0.255892
0.067083
2008-09-08
0.23064
0.73088
-0.339516
0.040042
2008-09-09
0.19412
0.75328
-0.281364
0.049792
2008-09-10
0.21476
0.74068
-0.304908
0.046417
2008-09-11
0.17324
0.78456
-0.166008
0.039042
2008-09-12
0.17020
0.78308
-0.185520
0.048667
2008-09-15
0.19844
0.71964
-0.218304
0.085292
2008-09-16
0.23060
0.71696
-0.356356
0.050667
2008-09-17
0.19036
0.77364
-0.230880
0.037500
2008-09-18
0.23840
0.74244
-0.335052
0.019958
2008-09-19
0.22080
0.74600
-0.293256
0.034500
...
...
...
...
...
2016-05-20
0.12736
0.81020
-0.175188
0.065042
2016-05-23
0.21744
0.72756
-0.319948
0.054417
2016-05-24
0.10420
0.82012
-0.141816
0.078875
2016-05-25
0.22692
0.74448
-0.343788
0.029750
2016-05-26
0.14748
0.81412
-0.237692
0.040042
2016-05-27
0.14516
0.79352
-0.249088
0.053292
2016-05-31
0.15692
0.77024
-0.164092
0.068917
2016-06-01
0.17928
0.76004
-0.358628
0.063167
2016-06-02
0.22424
0.70460
-0.274064
0.074083
2016-06-03
0.10036
0.85216
-0.122316
0.046625
2016-06-06
0.16564
0.75676
-0.216096
0.080750
2016-06-07
0.12168
0.82016
-0.125652
0.060667
2016-06-08
0.14196
0.74400
-0.097836
0.115417
2016-06-09
0.10656
0.81352
-0.112252
0.083208
2016-06-10
0.13060
0.80348
-0.168208
0.063500
2016-06-13
0.18340
0.70224
-0.155468
0.117708
2016-06-14
0.12180
0.80804
-0.173084
0.073042
2016-06-15
0.19164
0.76648
-0.304884
0.043583
2016-06-16
0.15096
0.81296
-0.294940
0.035458
2016-06-17
0.21112
0.71960
-0.295452
0.072250
2016-06-20
0.21044
0.72260
-0.307364
0.069750
2016-06-21
0.18456
0.75740
-0.357180
0.060417
2016-06-22
0.10736
0.80848
-0.084284
0.080458
2016-06-23
0.15228
0.77068
-0.182704
0.073000
2016-06-24
0.08844
0.86088
-0.060040
0.052708
2016-06-27
0.17112
0.74460
-0.142552
0.081375
2016-06-28
0.14528
0.75460
-0.052576
0.084458
2016-06-29
0.19696
0.68536
-0.151972
0.122708
2016-06-30
0.19316
0.73576
-0.292040
0.074000
2016-07-01
0.20772
0.71320
-0.362280
0.082375
1989 rows × 4 columns
In [12]:
# ori_price = pd.DataFrame.from_csv('./Documents/Cornell/Courses/MPS Project/DJIA_table.csv',index_col=0)
# ori_price
positive.describe()
negative.describe()
neutral.describe()
compound.describe()
Out[12]:
comTop1
comTop2
comTop3
comTop4
comTop5
comTop6
comTop7
comTop8
comTop9
comTop10
...
comTop16
comTop17
comTop18
comTop19
comTop20
comTop21
comTop22
comTop23
comTop24
comTop25
count
1989.000000
1989.000000
1989.000000
1989.000000
1989.00000
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
...
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
1989.000000
mean
-0.195257
-0.244356
-0.206016
-0.218765
-0.23567
-0.209543
-0.232516
-0.227572
-0.229615
-0.237790
...
-0.216250
-0.227294
-0.216468
-0.207204
-0.217184
-0.242517
-0.225665
-0.231830
-0.208458
-0.229798
std
0.464838
0.449513
0.453901
0.457649
0.45464
0.449819
0.449859
0.445318
0.447302
0.448638
...
0.435431
0.439118
0.438239
0.444243
0.441309
0.433941
0.436844
0.435298
0.426768
0.435107
min
-0.973200
-0.989800
-0.978100
-0.980800
-0.98250
-0.976400
-0.989600
-0.982600
-0.966600
-0.975200
...
-0.975800
-0.981400
-0.982500
-0.966300
-0.968200
-0.978600
-0.978800
-0.979400
-0.975000
-0.974100
25%
-0.599400
-0.636900
-0.599400
-0.624900
-0.62490
-0.585900
-0.624900
-0.599400
-0.599400
-0.624900
...
-0.599400
-0.599400
-0.599400
-0.585900
-0.585900
-0.612400
-0.599400
-0.612400
-0.571900
-0.599400
50%
-0.153100
-0.296000
-0.202300
-0.226300
-0.27320
-0.226300
-0.265600
-0.250000
-0.250000
-0.273200
...
-0.226300
-0.226300
-0.226300
-0.202300
-0.226300
-0.250000
-0.250000
-0.250000
-0.128000
-0.250000
75%
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
max
0.951700
0.962300
0.950900
0.952400
0.93310
0.950900
0.923100
0.949300
0.944200
0.921700
...
0.911800
0.921700
0.941300
0.933700
0.915300
0.960800
0.926000
0.932400
0.945100
0.920100
8 rows × 25 columns
In [ ]:
Content source: info5900groupG/dataishumantool
Similar notebooks: