In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import warnings
from custom import custom_funcs as cf
from datetime import datetime
warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
import pandas as pd
import numpy as np
In [3]:
df = pd.read_excel("Final.xlsx")
df.head()
Out[3]:
In [4]:
dfniviews = df.copy()
dfn2views = df.copy()
dfn3views = df.copy()
dfn4views = df.copy()
In [5]:
dfniviews = dfniviews.sort_values('nviews', ascending=False)
dfniviews.head(10)
Out[5]:
In [6]:
dfn2views = dfn2views.sort_values('n2views', ascending=False)
dfn2views.head(10)
Out[6]:
In [7]:
dfn3views = dfn3views.sort_values('n3views', ascending=False)
dfn3views.head(10)
Out[7]:
In [8]:
dfn4views = dfn4views.sort_values('n4views', ascending=False)
dfn4views.head(10)
Out[8]:
In [9]:
vidsdfniviews = [(index,row['nviews']) for index, row in dfniviews.iterrows()]
print(vidsdfniviews)
In [10]:
vidsdfn2views = [(index,row['n2views']) for index, row in dfn2views.iterrows()]
print(vidsdfn2views)
In [11]:
vidsdfn4views = [(index,row['n4views']) for index, row in dfn4views.iterrows()]
print(vidsdfn4views)
In [12]:
vidsdfn3views = [(index,row['n3views']) for index, row in dfn3views.iterrows()]
print(vidsdfn3views)
In [13]:
niviewslst = [b for a,b in vidsdfniviews]
print(niviewslst)
In [14]:
n2viewslst = [b for a,b in vidsdfn2views]
print(n2viewslst)
In [15]:
n3viewslst = [b for a,b in vidsdfn3views]
print(n3viewslst)
In [16]:
n4viewslst = [b for a,b in vidsdfn4views]
print(n4viewslst)
In [17]:
krange = list(range(len(niviewslst)))
t = np.linspace(0,1, len(krange))
In [18]:
from numpy import *
import math
import matplotlib.pyplot as plt
In [19]:
plt.plot(t,niviewslst,'r', label='Initial Number of views')
plt.legend(loc='upper right')
#plt.ylim(0, 500000)
plt.show()
In [20]:
niviewslstcpy = niviewslst[:]
n2viewslstcpy = n2viewslst[:]
n3viewslstcpy = n3viewslst[:]
n4viewslstcpy = n4viewslst[:]
In [21]:
sumi=0
for i in range(len(niviewslstcpy)):
temp = niviewslstcpy[i]
niviewslstcpy[i] = niviewslstcpy[i] + sumi
sumi += temp
In [22]:
plt.plot(t,niviewslstcpy,'b', label='Initial Number of views')
plt.legend(loc='upper right')
#plt.ylim(0, 500000)
plt.show()
In [23]:
sum2 = 0
for i in range(len(n2viewslstcpy)):
temp = n2viewslstcpy[i]
n2viewslstcpy[i] = n2viewslstcpy[i] + sum2
sum2 += temp
In [24]:
sum2 = 0
for i in range(len(n3viewslstcpy)):
temp = n3viewslstcpy[i]
n3viewslstcpy[i] = n3viewslstcpy[i] + sum2
sum2 += temp
In [25]:
sum3 =0
for i in range(len(n4viewslstcpy)):
temp = n4viewslstcpy[i]
n4viewslstcpy[i] = n4viewslstcpy[i] + sum3
sum3 += temp
In [26]:
plt.plot(t,niviewslstcpy,'k', label='Initial Number of views')
plt.plot(t,n2viewslstcpy,'r', label='Number of views after 2 days')
plt.plot(t,n3viewslstcpy,'g', label='Number of views after 1 week')
plt.plot(t,n4viewslstcpy,'b', label='Number of views after 1 month')
plt.legend(loc='upper right')
#plt.ylim(0, 500000)
plt.show()
In [27]:
#plt.plot(t,niviewslstcpy,'r', label='Initial Number of views')
plt.plot(t,n2viewslstcpy,'r', label='Number of views after 2 days')
#plt.plot(t,n3viewslstcpy,'r', label='Number of views after 1 week')
#plt.plot(t,n4viewslstcpy,'r', label='Number of views after 1 month')
plt.legend(loc='upper right')
#plt.ylim(0, 500000)
plt.show()
In [28]:
print(niviewslstcpy)
In [29]:
print(n2viewslstcpy)
In [30]:
sumi = 0
sumi = sum(niviewslstcpy)
sumi
Out[30]:
In [31]:
niviewslstcpyf = list()
tsum = 0
for i in range(len(niviewslstcpy)):
tsum += niviewslstcpy[i]
temp = float(tsum)/sumi
niviewslstcpyf.append(temp)
print(niviewslstcpyf)
In [32]:
plt.plot(t,niviewslstcpyf,'b', label='Initial Number of views')
plt.legend(loc='upper right')
#plt.ylim(0, 500000)
plt.show()
In [33]:
niviewslstcpyf2 = list()
tsum = 0
length = len(niviewslstcpy)
sumi = length*(length + 1)/float(2)
for i in range(len(niviewslstcpy)):
tsum += i + 1
temp = float(tsum)/sumi
niviewslstcpyf2.append(temp)
print(niviewslstcpyf2)
In [34]:
plt.plot(niviewslstcpyf2,niviewslstcpyf,'b', label='Initial Number of views')
plt.legend(loc='upper right')
#plt.ylim(0, 500000)
plt.show()
In [35]:
plt.plot(t,n4viewslstcpy,'b', label='After 1 month')
plt.plot(t,n3viewslstcpy,'g', label='After 1 week')
plt.plot(t,n2viewslstcpy,'r', label='After 2 days')
plt.plot(t,niviewslstcpy,'k', label='Initially')
#plt.legend(loc='upper right')
plt.xlabel('Normalized video index(ranked by popularity)', fontsize=10)
plt.ylabel('Total Number of views', fontsize=10)
plt.yscale('log')
plt.ylim(0, 10000000)
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=1, fancybox=True, shadow=True)
plt.grid(True)
In [36]:
plt.plot(niviewslstcpyf2,niviewslstcpy,'k', label='3rd March')
#plt.legend(loc='upper right')
plt.xlabel('Normalized video index(ranked by popularity)', fontsize=10)
plt.ylabel('Total Number of views', fontsize=10)
plt.yscale('log')
plt.ylim(0, 100000)
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 0.25), ncol=1, fancybox=True, shadow=True)
plt.grid(True)
In [37]:
max_, min_ = float(max(niviewslstcpy)), float(min(niviewslstcpy))
niviewslstcpyf3 = [(i - min_)/(max_ - min_) for i in niviewslstcpy]
In [38]:
plt.plot(t,niviewslstcpyf3,'k', label='Initially')
#plt.legend(loc='upper right')
#plt.yscale('log')
#plt.xscale('log')
plt.ylim(0.0, 1.0)
#plt.lim(0.0, 1.0)
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.25), ncol=1, fancybox=True, shadow=True)
plt.grid(True)
In [39]:
max_, min_ = float(max(n2viewslstcpy)), float(min(n2viewslstcpy))
n2viewslstcpyf3 = [(i - min_)/(max_ - min_) for i in n2viewslstcpy]
In [40]:
max_, min_ = float(max(n3viewslstcpy)), float(min(n3viewslstcpy))
n3viewslstcpyf3 = [(i - min_)/(max_ - min_) for i in n3viewslstcpy]
In [41]:
max_, min_ = float(max(n4viewslstcpy)), float(min(n4viewslstcpy))
n4viewslstcpyf3 = [(i - min_)/(max_ - min_) for i in n4viewslstcpy]
In [42]:
#https://en.wikipedia.org/wiki/Feature_scaling
plt.plot(t,n4viewslstcpyf3,'b', label='After 1 month')
plt.plot(t,n3viewslstcpyf3,'g', label='After 1 week')
plt.plot(t,n2viewslstcpyf3,'r', label='After 2 days')
plt.plot(t,niviewslstcpyf3,'k', label='Initially')
#plt.legend(loc='upper right')
#plt.yscale('log')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.25), ncol=1, fancybox=True, shadow=True)
plt.grid(True)
In [43]:
nratingslsttemp = [(index,row['n4ratings']) for index, row in dfn4views.iterrows()]
ncommentslsttemp = [(index,row['n4comments']) for index, row in dfn4views.iterrows()]
In [44]:
#print(nratingslsttemp)
In [45]:
nratingslst = [b for a,b in nratingslsttemp]
ncommentslst = [b for a,b in ncommentslsttemp]
In [46]:
plt.plot(t,nratingslst,'b', label='Number of ratings')
#plt.plot(t,ncommentslst,'g', label='Number of comments')
plt.legend(loc='upper right')
plt.xlabel('Normalized video index(ranked by popularity)', fontsize=10)
#plt.ylim(0, 6000)
plt.show()
In [47]:
#plt.plot(t,nratingslst,'b', label='Number of ratings')
plt.plot(t,ncommentslst,'g', label='Number of comments')
plt.legend(loc='upper right')
plt.xlabel('Normalized video index(ranked by popularity)', fontsize=10)
#plt.ylim(0, 6000)
plt.show()
In [55]:
#import matplotlib.transforms as ax
#N = 50
#colors = np.random.rand(N)
#area = np.pi * (15 * np.random.rand(N))**2 # 0 to 15 point radiuses
#import matplotlib.pyplot as plt
#import matplotlib.patches as patches
#fig = plt.figure()
#left, width = .25, .5
#bottom, height = .25, .5
#right = left + width
#top = bottom + height
#p = patches.Rectangle(
# (left, bottom), width, height,
# fill=False, transform=ax.transAxes, clip_on=False
# )
#ax.add_patch(p)
#ax = plt.add_axes([0,0,1,1])
plt.xlim(0, 1000)
plt.ylim(0, 1000)
plt.scatter(nratingslst, ncommentslst)
plt.xlabel('Number of ratings', fontsize=10)
plt.ylabel('Number of comments', fontsize=10)
plt.text(600, 800, 'PearsonCoeff = 0.7407 \nSpearmanCoeff = 0.8118')
#ax.text(0.1, 0.9,'matplotlib', ha='center', va='center',transform = ax.transAxes)
#ax.text(right, top, 'right top', horizontalalignment='right', verticalalignment='top', transform=ax.transAxes)
#ax.set_axis_off()
plt.show()
In [100]:
from scipy.stats.stats import pearsonr
pearsonr(nratingslst, ncommentslst)
Out[100]:
In [101]:
from scipy.stats.stats import spearmanr
spearmanr(nratingslst, ncommentslst)
Out[101]:
In [1]:
import numpy as np
a = np.arange(1,1001)
In [2]:
p1 = np.random.choice(a,size=100,replace=0)
p2 = np.setdiff1d(a,p1)
In [4]:
#a
In [5]:
p1
Out[5]:
In [7]:
#p2
In [8]:
p1_idx = np.random.randint(0,p1.size,(8000))
p2_idx = np.random.randint(0,p2.size,(2000))
In [9]:
p1_idx
Out[9]:
In [10]:
p2_idx
Out[10]:
In [11]:
max(p1_idx)
Out[11]:
In [12]:
max(p2_idx)
Out[12]:
In [13]:
out = np.random.permutation(np.hstack((p1[p1_idx], p2[p2_idx])))
In [14]:
out
Out[14]:
In [15]:
len(out)
Out[15]:
In [16]:
lst = list()
In [17]:
lst = list(out)
In [20]:
#lst
In [19]:
max(lst)
Out[19]:
In [ ]: