In [1]:
# Calculates the average channel viewcount growth per day, saves csv
import pandas as pa
import numpy as np
import json
import os
import networkx as nx
import pygraphviz as gz
from networkx.drawing.nx_pydot import write_dot
import matplotlib.pyplot as plt
#%matplotlib notebook
import itertools
import csv
from sqlalchemy import exists, func
from database import *
from matplotlib import pylab, pyplot
import seaborn as sns
sns.set(color_codes=True)
current_palette = sns.color_palette()
first = current_palette[0]
second = current_palette[1]
sns.set_palette(
[second, first] + current_palette[2:]
)
from scipy import stats, integrate
In [2]:
DIR = '../../data/data_evaluation_2'
db = YTDatabase()
/home/mlode/intel/intelpython27/lib/python2.7/site-packages/sqlalchemy/engine/default.py:470: Warning: Can't create database 'mlode'; database exists
cursor.execute(statement, parameters)
In [3]:
with db._session_scope(False) as session:
df_channel_history = pa.read_sql(session.query(ChannelHistory).statement, db.engine)
In [4]:
df_channel_history[df_channel_history.channelID == 'UC-lHJZR3Gqxm24_Vd_AJ5Yw']
Out[4]:
id
channelID
viewCount
subscriberCount
commentCount
videoCount
crawlTimestamp
58
59
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14132206795
51598211
289181
3057
2016-12-28 02:57:16
8000
8011
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14145979497
51645606
289181
3058
2016-12-29 00:00:06
15942
15963
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14158774806
51692274
289181
3059
2016-12-30 00:00:05
23883
23914
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14171475075
51731668
289181
3060
2016-12-31 00:00:07
31824
31865
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14182683968
51774245
289181
3061
2017-01-01 00:00:04
39765
39816
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14193935233
51807431
289181
3062
2017-01-02 00:00:10
47706
47767
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14203784589
51840406
289181
3063
2017-01-03 00:00:05
55647
55718
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14215372493
51896204
289181
3064
2017-01-04 00:00:06
63588
63669
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14227166293
51946586
289181
3065
2017-01-05 00:00:05
71529
71620
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14241617250
51980954
289181
3066
2017-01-06 00:00:05
79469
79570
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14253269056
52026530
289181
3067
2017-01-07 00:00:05
87409
87520
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14264283557
52083285
289181
3068
2017-01-08 00:00:06
95348
95469
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14289672774
52143613
289181
3069
2017-01-09 00:00:08
103417
103548
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14304695125
52190347
289181
3070
2017-01-10 00:00:14
111357
111488
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14304695125
52192493
289181
3070
2017-01-10 01:38:51
119297
119428
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14319045407
52228058
289181
3071
2017-01-11 00:00:16
127236
127367
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14329657330
52271395
289181
3072
2017-01-12 00:00:14
135174
135305
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14340411572
52316867
289181
3073
2017-01-13 00:00:13
143113
143244
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14352162791
52366118
289181
3074
2017-01-14 00:00:12
151054
151185
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14425610123
52407628
289181
3075
2017-01-15 00:00:12
158990
159121
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14425610123
52440435
289181
3076
2017-01-16 00:00:32
166926
167057
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14560497168
52471855
289181
3077
2017-01-17 00:00:13
174860
174991
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14607679435
52496111
289181
3078
2017-01-18 00:00:13
182795
182926
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14642309986
52529243
289181
3079
2017-01-19 00:00:13
190731
190862
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14642526557
52559346
289181
3080
2017-01-20 00:00:14
198658
198789
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14652762481
52585356
289181
3081
2017-01-21 00:00:13
206588
206719
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14663068832
52610579
289181
3082
2017-01-22 00:00:13
214519
214650
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14672851674
52644148
289181
3083
2017-01-23 00:00:13
222451
222582
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14685572158
52667438
289181
3084
2017-01-24 00:00:18
230373
230504
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14469166045
52687945
289181
3086
2017-01-25 00:00:14
...
...
...
...
...
...
...
...
270009
270140
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14525527005
52804819
289181
3096
2017-01-30 00:00:22
277931
278062
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14540687031
52832083
289181
3097
2017-01-31 00:00:23
285855
285986
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14556966534
52865237
289181
3098
2017-02-01 00:00:39
293779
293910
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14568574240
52916409
289181
3099
2017-02-02 00:00:14
301703
301834
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14568574240
52946657
289181
3100
2017-02-03 00:00:20
309621
309752
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14580396577
52967242
289181
3101
2017-02-04 00:00:12
317543
317674
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14600189072
53001768
289181
3102
2017-02-05 00:00:12
325464
325595
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14611453836
53034878
289181
3103
2017-02-06 00:00:13
333387
333518
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14623571274
53054674
289181
3104
2017-02-07 00:00:23
341307
341438
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14633497298
53063197
289181
3105
2017-02-08 00:00:20
349230
349361
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14642030603
52749933
289181
3107
2017-02-09 00:00:20
357154
357285
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14642030603
53394491
289181
3107
2017-02-10 00:00:18
365076
365207
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14657485933
53312437
289181
3109
2017-02-11 00:00:35
372995
373126
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14666114013
53233889
289181
3110
2017-02-12 00:00:13
380921
381052
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14676534988
53146966
289181
3110
2017-02-13 00:01:29
388841
388972
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14676534988
53167390
289181
3111
2017-02-14 00:00:24
396762
396893
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14685511029
53195457
289181
3112
2017-02-15 00:00:31
404682
404813
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14694323042
53249565
289181
3111
2017-02-16 00:01:27
412603
412734
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14709814807
53314050
289181
3111
2017-02-17 00:00:14
420523
420654
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14709370995
53437836
289181
3112
2017-02-18 00:00:26
428441
428572
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14711456115
53519122
289181
3113
2017-02-19 00:00:26
436359
436490
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14723629799
53582988
289181
3113
2017-02-20 00:02:01
444276
444407
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14735225497
53635032
289181
3113
2017-02-21 00:00:13
452193
452324
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14743493368
53669525
289181
3114
2017-02-22 00:00:14
460108
460239
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14749942951
53694394
289181
3115
2017-02-23 00:00:14
468025
468156
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14757843312
53730804
289181
3116
2017-02-24 00:00:13
475942
476073
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14766470763
53780338
289181
3116
2017-02-25 00:01:30
483857
483988
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14775589755
53826526
289181
3118
2017-02-26 00:00:13
491770
491901
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14782568924
53866011
289181
3119
2017-02-27 00:00:13
499687
499818
UC-lHJZR3Gqxm24_Vd_AJ5Yw
14782568924
53890765
289181
3120
2017-02-28 00:00:13
64 rows × 7 columns
In [5]:
channel_groups = df_channel_history.groupby(['channelID'])
counts = channel_groups.count().sort_values(by='viewCount')
print counts['id'].count()
print counts[counts.id != 64]['id'].count()
7942
30
In [6]:
group = channel_groups.get_group('UCK1i2UviaXLUNrZlAFpw_jA')
group = group.sort_values(by='id')
group['diff'] = group['viewCount'] - group['viewCount'].shift(1)
group
Out[6]:
id
channelID
viewCount
subscriberCount
commentCount
videoCount
crawlTimestamp
diff
3919
3925
UCK1i2UviaXLUNrZlAFpw_jA
6294348732
5017530
101
321
2016-12-28 03:00:14
NaN
11859
11875
UCK1i2UviaXLUNrZlAFpw_jA
6306806646
5025907
101
322
2016-12-29 00:03:08
12457914.0
19802
19828
UCK1i2UviaXLUNrZlAFpw_jA
6319562857
5036506
101
322
2016-12-30 00:03:07
12756211.0
27743
27779
UCK1i2UviaXLUNrZlAFpw_jA
6332654585
5047671
101
322
2016-12-31 00:03:04
13091728.0
35684
35730
UCK1i2UviaXLUNrZlAFpw_jA
6345655230
5058529
101
323
2017-01-01 00:03:07
13000645.0
43624
43680
UCK1i2UviaXLUNrZlAFpw_jA
6358705350
5071510
101
319
2017-01-02 00:03:09
13050120.0
51566
51632
UCK1i2UviaXLUNrZlAFpw_jA
6266832291
5082663
101
316
2017-01-03 00:03:07
-91873059.0
59507
59583
UCK1i2UviaXLUNrZlAFpw_jA
6278092542
5093290
101
316
2017-01-04 00:03:06
11260251.0
67448
67534
UCK1i2UviaXLUNrZlAFpw_jA
6253900053
5103742
101
317
2017-01-05 00:03:04
-24192489.0
75387
75483
UCK1i2UviaXLUNrZlAFpw_jA
6266468980
5114273
101
317
2017-01-06 00:02:50
12568927.0
83329
83435
UCK1i2UviaXLUNrZlAFpw_jA
6279259643
5125283
101
317
2017-01-07 00:02:49
12790663.0
91269
91385
UCK1i2UviaXLUNrZlAFpw_jA
6291982307
5136093
101
318
2017-01-08 00:02:58
12722664.0
99207
99333
UCK1i2UviaXLUNrZlAFpw_jA
6316604422
5148306
101
318
2017-01-09 00:03:43
24622115.0
107276
107407
UCK1i2UviaXLUNrZlAFpw_jA
6329725242
5159316
101
318
2017-01-10 00:03:47
13120820.0
115216
115347
UCK1i2UviaXLUNrZlAFpw_jA
6329725242
5160681
101
318
2017-01-10 01:42:26
0.0
123154
123285
UCK1i2UviaXLUNrZlAFpw_jA
6342209335
5170440
101
318
2017-01-11 00:03:38
12484093.0
131091
131222
UCK1i2UviaXLUNrZlAFpw_jA
6355424348
5180700
101
319
2017-01-12 00:03:39
13215013.0
139034
139165
UCK1i2UviaXLUNrZlAFpw_jA
6368409828
5191569
101
319
2017-01-13 00:03:38
12985480.0
146972
147103
UCK1i2UviaXLUNrZlAFpw_jA
6381602848
5203298
101
319
2017-01-14 00:03:38
13193020.0
154909
155040
UCK1i2UviaXLUNrZlAFpw_jA
6451807974
5214389
101
321
2017-01-15 00:03:37
70205126.0
162847
162978
UCK1i2UviaXLUNrZlAFpw_jA
6451807974
5226920
101
321
2017-01-16 00:04:08
0.0
170785
170916
UCK1i2UviaXLUNrZlAFpw_jA
6737444777
5237705
101
321
2017-01-17 00:03:54
285636803.0
178718
178849
UCK1i2UviaXLUNrZlAFpw_jA
6801836413
5248584
101
322
2017-01-18 00:03:51
64391636.0
186654
186785
UCK1i2UviaXLUNrZlAFpw_jA
6840388774
5259033
101
323
2017-01-19 00:03:45
38552361.0
194588
194719
UCK1i2UviaXLUNrZlAFpw_jA
6843212533
5269945
101
323
2017-01-20 00:03:59
2823759.0
202519
202650
UCK1i2UviaXLUNrZlAFpw_jA
6856931051
5280974
101
323
2017-01-21 00:03:56
13718518.0
210446
210577
UCK1i2UviaXLUNrZlAFpw_jA
6870906289
5291744
101
324
2017-01-22 00:04:07
13975238.0
218375
218506
UCK1i2UviaXLUNrZlAFpw_jA
6884597434
5303505
101
324
2017-01-23 00:04:08
13691145.0
226303
226434
UCK1i2UviaXLUNrZlAFpw_jA
6898367762
5313862
101
324
2017-01-24 00:03:36
13770328.0
234229
234360
UCK1i2UviaXLUNrZlAFpw_jA
6624656213
5324569
101
324
2017-01-25 00:04:07
-273711549.0
...
...
...
...
...
...
...
...
...
273862
273993
UCK1i2UviaXLUNrZlAFpw_jA
6692786929
5378296
101
326
2017-01-30 00:03:59
13833940.0
281785
281916
UCK1i2UviaXLUNrZlAFpw_jA
6706945237
5388828
101
326
2017-01-31 00:04:19
14158308.0
289710
289841
UCK1i2UviaXLUNrZlAFpw_jA
6720073990
5399574
101
327
2017-02-01 00:04:21
13128753.0
297631
297762
UCK1i2UviaXLUNrZlAFpw_jA
6733479120
5410727
101
327
2017-02-02 00:03:54
13405130.0
305551
305682
UCK1i2UviaXLUNrZlAFpw_jA
6733479120
5421484
101
327
2017-02-03 00:04:20
0.0
313467
313598
UCK1i2UviaXLUNrZlAFpw_jA
6790264674
5431790
101
327
2017-02-04 00:03:30
56785554.0
321394
321525
UCK1i2UviaXLUNrZlAFpw_jA
6803763339
5442407
101
328
2017-02-05 00:03:08
13498665.0
329317
329448
UCK1i2UviaXLUNrZlAFpw_jA
6817556756
5454245
101
328
2017-02-06 00:04:08
13793417.0
337237
337368
UCK1i2UviaXLUNrZlAFpw_jA
6831701922
5464491
101
328
2017-02-07 00:03:37
14145166.0
345162
345293
UCK1i2UviaXLUNrZlAFpw_jA
6844550076
5475105
101
328
2017-02-08 00:03:15
12848154.0
353083
353214
UCK1i2UviaXLUNrZlAFpw_jA
6857855691
5484462
101
328
2017-02-09 00:03:30
13305615.0
361004
361135
UCK1i2UviaXLUNrZlAFpw_jA
6857855691
5493898
101
328
2017-02-10 00:04:06
0.0
368928
369059
UCK1i2UviaXLUNrZlAFpw_jA
6884995129
5510239
101
328
2017-02-11 00:04:25
27139438.0
376848
376979
UCK1i2UviaXLUNrZlAFpw_jA
6899615642
5523990
101
329
2017-02-12 00:03:47
14620513.0
384772
384903
UCK1i2UviaXLUNrZlAFpw_jA
6914273359
5538658
101
329
2017-02-13 00:04:57
14657717.0
392694
392825
UCK1i2UviaXLUNrZlAFpw_jA
6914273359
5551816
101
329
2017-02-14 00:03:55
0.0
400613
400744
UCK1i2UviaXLUNrZlAFpw_jA
6942558695
5564732
101
329
2017-02-15 00:04:07
28285336.0
408536
408667
UCK1i2UviaXLUNrZlAFpw_jA
6957278426
5576172
101
330
2017-02-16 00:05:00
14719731.0
416453
416584
UCK1i2UviaXLUNrZlAFpw_jA
6970959469
5587323
101
330
2017-02-17 00:04:01
13681043.0
424372
424503
UCK1i2UviaXLUNrZlAFpw_jA
6984386046
5598240
101
330
2017-02-18 00:04:30
13426577.0
432291
432422
UCK1i2UviaXLUNrZlAFpw_jA
6997541002
5610310
101
331
2017-02-19 00:04:18
13154956.0
440208
440339
UCK1i2UviaXLUNrZlAFpw_jA
7011107614
5622978
101
331
2017-02-20 00:05:45
13566612.0
448123
448254
UCK1i2UviaXLUNrZlAFpw_jA
7024589993
5634352
101
331
2017-02-21 00:03:50
13482379.0
456041
456172
UCK1i2UviaXLUNrZlAFpw_jA
7037167627
5645777
101
331
2017-02-22 00:03:56
12577634.0
463955
464086
UCK1i2UviaXLUNrZlAFpw_jA
7050276769
5659027
101
332
2017-02-23 00:04:01
13109142.0
471873
472004
UCK1i2UviaXLUNrZlAFpw_jA
7063677215
5670555
101
332
2017-02-24 00:03:39
13400446.0
479791
479922
UCK1i2UviaXLUNrZlAFpw_jA
7078362058
5681969
101
332
2017-02-25 00:05:06
14684843.0
487704
487835
UCK1i2UviaXLUNrZlAFpw_jA
7092384134
5693290
101
333
2017-02-26 00:03:42
14022076.0
495620
495751
UCK1i2UviaXLUNrZlAFpw_jA
7106006470
5705418
101
333
2017-02-27 00:03:40
13622336.0
503534
503665
UCK1i2UviaXLUNrZlAFpw_jA
7106006470
5717241
101
333
2017-02-28 00:03:35
0.0
64 rows × 8 columns
In [7]:
fig = plt.figure()
group['viewCount'].plot()
plt.show()
fig1 = plt.figure()
group['diff'].plot()
plt.show()
print group['diff'].mean()
12883456.1587
In [8]:
with db._session_scope(False) as session:
df_channel_avg_viewcounts = pa.read_sql(session.query(Channel.id, Channel.title).statement, db.engine)
df_channel_avg_viewcounts = df_channel_avg_viewcounts.set_index(['id'])
df_channel_avg_viewcounts['mean viewCount'] = np.nan
for name, group in channel_groups:
##print name
group = channel_groups.get_group(name)
group = group.sort_values(by='id')
group['diff'] = group['viewCount'] - group['viewCount'].shift(1)
df_channel_avg_viewcounts.loc[name, 'mean viewCount'] = group['diff'].mean()
df_channel_avg_viewcounts.head()
Out[8]:
title
mean viewCount
id
UC__Pj66OeDibNZNN__L913g
TRACKS - ARTE
3640.666667
UC__PZLSRGtUQiTtvm3hPoEQ
Cripta dos Mistérios
1854.222222
UC__rmdgxs3ZF0zK_he7Tmig
Contoured Living
770.365079
UC_-CxgsxX0tpnm24WO-797Q
Mega Gumelar
4623.412698
UC_1FUFB6TlGeGOyDI4ikkzg
Best Games For Kids TV
50151.746032
In [9]:
df_channel_avg_viewcounts.loc['UC-lHJZR3Gqxm24_Vd_AJ5Yw']
Out[9]:
title PewDiePie
mean viewCount 1.03232e+07
Name: UC-lHJZR3Gqxm24_Vd_AJ5Yw, dtype: object
In [10]:
df_avg_viewcounts_sorted = df_channel_avg_viewcounts.sort_values(by='mean viewCount', ascending=False)
df_avg_viewcounts_sorted.to_csv(DIR+r'/df_channel_avg_viewcounts.txt', sep=str('\t'), encoding='utf-8')
In [ ]:
Content source: lodemo/CATANA
Similar notebooks: