In [49]:
pylab.rc('font', family='serif', size=9)

from __future__ import division
from mysql.connector import connect
from mysql.connector.errors import Error
import math
from functools import reduce
from datetime import datetime

class YouTube(object):
    feeds = ["top_rated", "top_favorites", "most_shared", "most_popular", "most_recent", "most_discussed", 
                 "most_responded", "recently_featured", "on_the_web"]

    def dbConnect(self):
        return connect(unix_socket="/Applications/MAMP/tmp/mysql/mysql.sock", port=8889, user="nemo", passwd="nemo.", db="OnlineVideo")
    
    def tests(self):
        db = self.dbConnect()
        cur_db = db.cursor()
        MAX_QUERIES = 1
        for feed in self.feeds:
            date = "2012-02-20"
            for i in range(MAX_QUERIES):
                try:
                    videos = {}
                    cur_db.execute("""SELECT date, video FROM FeedVideo WHERE source='YOUTUBE' AND feed='{0}' AND DATE_FORMAT(date, '%Y-%m-%d')='{1}';""".format(feed, date))
                    
                    vids = []
                    init_date, video = cur_db.fetchone() or (None, None)
                    if not init_date or not video:
                        break
                    vids.append(video)
                    for date, video in cur_db.fetchall():
                        if date != init_date:
                            break
                        vids.append(video)
                    print(feed, date, len(vids))
                    date = init_date.strftime('%Y-%m-%d')
                    # Get individual video metadata
                    #vids_meta = {'id':[], 'duration':[], 'rating':[], 'ratingCount':[], 'likeCount':[], 'viewCount':[], 'favoriteCount':[], 
                    #             'commentCount':[], 'uploaded':[], 'updated':[]}
                    eucl_length = []
                    for vid in vids:
                        cur_db.execute("""SELECT duration, rating, ratingCount, likeCount, viewCount, favoriteCount, commentCount FROM Video WHERE videoID='{0}'""".format(vid))
                        row = cur_db.fetchone()
                        # Compute euclidean length
                        l = math.sqrt(sum([math.log(r if r > 0 else 1)**2 if type(r) is not datetime else math.log(int(r.strftime('%s')))**2 for r in row]))
                        eucl_length.append(l)
                        #vids_meta['id'].append(vid)
                        #vids_meta['duration'].append(r[0])
                        #vids_meta['rating'].append(r[1])
                        #vids_meta['ratingCount'].append(r[2])
                        #vids_meta['likeCount'].append(r[3])
                        #vids_meta['viewCount'].append(r[4])
                        #vids_meta['favoriteCount'].append(r[5])
                        #vids_meta['commentCount'].append(r[6])
                        #vids_meta['uploaded'].append(r[7].strftime('%Y-%m-%d'))
                        #vids_meta['updated'].append(r[8].strftime('%Y-%m-%d'))
                    #print(vids_meta)
                    print(['{0:.2f}'.format(l) for l in eucl_length])
                    print(sum(eucl_length)/len(eucl_length))
                except Error as e:
                    print("MySQL error: {}".format(str(e)))
            print()
        cur_db.close()
        db.close()
g = YouTube()
g.tests()


top_rated 2012-02-20 01:29:31 24
['33.19', '31.49', '33.79', '33.65', '32.22', '32.27', '34.31', '35.59', '32.86', '34.51', '32.81', '33.07', '32.47', '33.56', '32.24', '34.10', '33.57', '32.70', '33.23', '34.13', '32.88', '32.61', '34.31', '34.09']
33.3190901407

top_favorites 2012-02-20 01:19:00 24
['32.86', '32.80', '33.79', '32.55', '33.65', '31.19', '34.31', '32.24', '34.51', '32.81', '33.07', '33.22', '31.78', '33.56', '32.63', '34.10', '33.57', '32.70', '33.23', '34.13', '32.61', '31.64', '34.31', '34.09']
33.1391877048

most_shared 2012-02-20 01:30:01 24
['16.94', '24.50', '21.65', '14.17', '21.73', '16.84', '23.68', '17.74', '16.86', '14.75', '13.04', '19.25', '14.97', '17.97', '14.20', '15.98', '19.53', '15.47', '15.03', '12.53', '15.97', '16.93', '15.86', '20.15']
17.3225902641

most_popular 2012-02-20 01:30:06 24
['25.63', '25.21', '28.58', '25.74', '26.83', '27.14', '29.76', '26.19', '25.74', '28.18', '28.32', '29.98', '26.14', '26.95', '26.40', '26.55', '30.51', '24.51', '23.80', '26.90', '26.36', '25.41', '28.86', '25.22']
26.8717317431

most_recent 2012-02-20 01:26:30 24
['1.10', '5.57', '3.09', '6.15', '4.32', '3.26', '4.57', '3.71', '5.87', '5.51', '4.56', '4.44', '6.39', '3.66', '2.71', '3.62', '5.48', '3.76', '3.07', '4.26', '4.38', '3.53', '2.89', '4.87']
4.19950037052

most_discussed 2012-02-20 01:30:09 24
['31.33', '28.81', '33.79', '30.07', '25.04', '34.31', '27.23', '35.59', '30.85', '34.51', '32.47', '33.22', '33.50', '33.56', '32.24', '34.10', '31.09', '24.00', '34.31', '32.54', '19.94', '28.70', '34.09', '33.30']
31.1912928174

most_responded 2012-02-20 01:30:12 24
['11.38', '33.19', '17.66', '23.52', '32.08', '32.22', '34.31', '35.59', '34.51', '26.38', '32.47', '25.46', '33.56', '33.57', '34.13', '19.47', '34.31', '12.74', '21.31', '24.83', '25.30', '25.86', '20.01', '19.67']
26.8150960124

recently_featured 2012-02-20 01:23:43 24
['21.91', '20.27', '22.64', '22.21', '16.91', '16.39', '18.84', '17.74', '23.60', '19.84', '17.20', '17.50', '20.13', '21.73', '24.28', '19.20', '23.01', '15.72', '24.35', '20.38', '18.41', '21.42', '17.65', '17.43']
19.9480414895

on_the_web 2012-02-20 01:30:12 24
['16.67', '19.16', '21.73', '14.80', '18.54', '12.66', '16.77', '17.62', '29.76', '17.06', '17.40', '16.57', '13.81', '9.34', '15.12', '14.94', '18.48', '18.28', '12.90', '15.99', '21.06', '11.55', '19.39', '17.27']
16.9527308297


In [ ]:


In [2]:
from urllib.parse import urlencode

urlencode({'a': 2})


Out[2]:
'a=2'