In [11]:
import numpy as np
import pandas as pd
import pandas.io.sql as pdsql
from pandas import Series, DataFrame
import os

from mysql import DB
from git_details import g
from github_stars_job import insert_user, add_users_other_stars
import similarity as s

In [12]:
db = DB()
db.connect()


DB >> Opened connection to database.

In [13]:
user = g.get_user('keithblaha')
insert_user(db, user)
add_users_other_stars(db, user)


		Adding user's other repos
		0 starred repos
		1 starred repos
		2 starred repos
		3 starred repos
		Added 3 starred repos
mysql.py:67: Warning: Out of range value for column 'starred_at' at row 1
  cursor.execute(sql)

In [8]:
similar_users = pd.read_sql(s.similar_users_query(user),
                             con=os.environ['CLEARDB_DATABASE_URL'])
similar_users.head(10)


Out[8]:
user_id score count
0 535948 0.364033 3
1 85881 0.364033 3
2 64935 0.364033 3
3 3382565 0.364033 3
4 1792491 0.364033 3
5 371278 0.364033 3
6 2569287 0.364033 3
7 6785435 0.364033 3
8 88295 0.364033 3
9 1441307 0.364033 3

In [69]:
similar_repos = pd.read_sql(s.similar_repos_query(user),
                            con=os.environ['CLEARDB_DATABASE_URL'])
similar_repos.head()


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-69-7b4da5315f48> in <module>()
      1 similar_repos = pd.read_sql(s.similar_repos_query(user),
----> 2                             con=os.environ['CLEARDB_DATABASE_URL'])
      3 similar_repos.head()

/usr/local/lib/python2.7/dist-packages/pandas/io/sql.pyc in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
    511             sql, index_col=index_col, params=params,
    512             coerce_float=coerce_float, parse_dates=parse_dates,
--> 513             chunksize=chunksize)
    514 
    515 

/usr/local/lib/python2.7/dist-packages/pandas/io/sql.pyc in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
   1179         args = _convert_params(sql, params)
   1180 
-> 1181         result = self.execute(*args)
   1182         columns = result.keys()
   1183 

/usr/local/lib/python2.7/dist-packages/pandas/io/sql.pyc in execute(self, *args, **kwargs)
   1070     def execute(self, *args, **kwargs):
   1071         """Simple passthrough to SQLAlchemy connectable"""
-> 1072         return self.connectable.execute(*args, **kwargs)
   1073 
   1074     def read_table(self, table_name, index_col=None, coerce_float=True,

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in execute(self, statement, *multiparams, **params)
   1989 
   1990         connection = self.contextual_connect(close_with_result=True)
-> 1991         return connection.execute(statement, *multiparams, **params)
   1992 
   1993     def scalar(self, statement, *multiparams, **params):

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in execute(self, object, *multiparams, **params)
    904         """
    905         if isinstance(object, util.string_types[0]):
--> 906             return self._execute_text(object, multiparams, params)
    907         try:
    908             meth = object._execute_on_connection

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_text(self, statement, multiparams, params)
   1052             statement,
   1053             parameters,
-> 1054             statement, parameters
   1055         )
   1056         if self._has_events or self.engine._has_events:

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1144                 parameters,
   1145                 cursor,
-> 1146                 context)
   1147 
   1148         if self._has_events or self.engine._has_events:

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1339                 util.raise_from_cause(
   1340                     sqlalchemy_exception,
-> 1341                     exc_info
   1342                 )
   1343             else:

/usr/local/lib/python2.7/dist-packages/sqlalchemy/util/compat.pyc in raise_from_cause(exception, exc_info)
    198     exc_type, exc_value, exc_tb = exc_info
    199     cause = exc_value if exc_value is not exception else None
--> 200     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    201 
    202 if py3k:

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1137                         statement,
   1138                         parameters,
-> 1139                         context)
   1140         except Exception as e:
   1141             self._handle_dbapi_exception(

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/default.pyc in do_execute(self, cursor, statement, parameters, context)
    448 
    449     def do_execute(self, cursor, statement, parameters, context=None):
--> 450         cursor.execute(statement, parameters)
    451 
    452     def do_execute_no_params(self, cursor, statement, context=None):

/home/raj/.virtualenvs/gitrec/lib/python2.7/site-packages/MySQLdb/cursors.pyc in execute(self, query, args)
    203             del tb
    204             self.messages.append((exc, value))
--> 205             self.errorhandler(self, exc, value)
    206         self._executed = query
    207         if not self._defer_warnings: self._warning_check()

/home/raj/.virtualenvs/gitrec/lib/python2.7/site-packages/MySQLdb/connections.pyc in defaulterrorhandler(***failed resolving arguments***)
     34     del cursor
     35     del connection
---> 36     raise errorclass, errorvalue
     37 
     38 re_numeric_part = re.compile(r"^(\d+)")

OperationalError: (_mysql_exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query') [SQL: '\n    select  other_repos.user_id\n            , other_repos.starred_repo_id\n            , repo.repo_name\n            , repo.description\n            , repo.last_modified\n            , repo.language\n            , repo.stargazers_count\n            , repo.forks_count\n            , repo.from_hacker_news\n            , hn.added_at\n            , hn.submission_time\n            , hn.title\n            , hn.url\n    from\n        (select user_id\n                , starred_repo_id\n        from github_user_starred_repos\n        where user_id != 375265) other_repos\n    join\n        (select distinct(others.user_id) `user_id`\n        from\n            (select user_id\n                     , starred_repo_id\n            from github_user_starred_repos\n            where user_id != 375265) others\n        join\n            (select starred_repo_id\n            from github_user_starred_repos\n            where user_id = 375265) usr\n        on others.starred_repo_id = usr.starred_repo_id) others\n    on other_repos.user_id=others.user_id\n    join\n         (select id\n            , repo_name\n            , description\n            , last_modified\n            , language\n            , stargazers_count\n            , forks_count\n            , from_hacker_news\n        from github_repos) repo\n    on other_repos.starred_repo_id=repo.id\n    join\n        (select added_at\n                , submission_time\n                , title\n                , url\n                , github_repo_name\n        from hacker_news) hn\n    on repo.repo_name = hn.github_repo_name\n    ']

In [9]:
s.similar_repos_query(user)


Out[9]:
'\n    select  other_repos.user_id\n            , other_repos.starred_repo_id\n            , repo.repo_name\n            , repo.description\n            , repo.last_modified\n            , repo.language\n            , repo.stargazers_count\n            , repo.forks_count\n            , repo.from_hacker_news\n            , hn.added_at\n            , hn.submission_time\n            , hn.title\n            , hn.url\n    from\n        (select user_id\n                , starred_repo_id\n        from github_user_starred_repos\n        where user_id != 375265) other_repos\n    join\n        (select distinct(others.user_id) `user_id`\n        from\n            (select user_id\n                     , starred_repo_id\n            from github_user_starred_repos\n            where user_id != 375265) others\n        join\n            (select starred_repo_id\n            from github_user_starred_repos\n            where user_id = 375265) usr\n        on others.starred_repo_id = usr.starred_repo_id) others\n    on other_repos.user_id=others.user_id\n    join\n         (select id\n            , repo_name\n            , description\n            , last_modified\n            , language\n            , stargazers_count\n            , forks_count\n            , from_hacker_news\n        from github_repos) repo\n    on other_repos.starred_repo_id=repo.id\n    join\n        (select added_at\n                , submission_time\n                , title\n                , url\n                , github_repo_name\n        from hacker_news) hn\n    on repo.repo_name = hn.github_repo_name\n    '

In [10]:
user.id


Out[10]:
375265