In [1]:
import numpy as np
import pandas as pd
import pandas.io.sql as pdsql
from pandas import Series, DataFrame
import os
from mysql import DB
from git_details import g
from github_stars_job import insert_user, add_users_other_stars
import similarity as s
In [2]:
db = DB()
db.connect()
DB >> Opened connection to database.
In [7]:
user = g.get_user('yankov')
# insert_user(db, user)
# add_users_other_stars(db, user)
In [8]:
similar_users = pd.read_sql(s.similar_users_query(user),
con=os.environ['CLEARDB_DATABASE_URL'])
similar_users.head(10)
Out[8]:
user_id
score
count
0
535948
0.364033
3
1
85881
0.364033
3
2
64935
0.364033
3
3
3382565
0.364033
3
4
1792491
0.364033
3
5
371278
0.364033
3
6
2569287
0.364033
3
7
6785435
0.364033
3
8
88295
0.364033
3
9
1441307
0.364033
3
In [69]:
similar_repos = pd.read_sql(s.similar_repos_query(user),
con=os.environ['CLEARDB_DATABASE_URL'])
similar_repos.head()
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
<ipython-input-69-7b4da5315f48> in <module>()
1 similar_repos = pd.read_sql(s.similar_repos_query(user),
----> 2 con=os.environ['CLEARDB_DATABASE_URL'])
3 similar_repos.head()
/usr/local/lib/python2.7/dist-packages/pandas/io/sql.pyc in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
511 sql, index_col=index_col, params=params,
512 coerce_float=coerce_float, parse_dates=parse_dates,
--> 513 chunksize=chunksize)
514
515
/usr/local/lib/python2.7/dist-packages/pandas/io/sql.pyc in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1179 args = _convert_params(sql, params)
1180
-> 1181 result = self.execute(*args)
1182 columns = result.keys()
1183
/usr/local/lib/python2.7/dist-packages/pandas/io/sql.pyc in execute(self, *args, **kwargs)
1070 def execute(self, *args, **kwargs):
1071 """Simple passthrough to SQLAlchemy connectable"""
-> 1072 return self.connectable.execute(*args, **kwargs)
1073
1074 def read_table(self, table_name, index_col=None, coerce_float=True,
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in execute(self, statement, *multiparams, **params)
1989
1990 connection = self.contextual_connect(close_with_result=True)
-> 1991 return connection.execute(statement, *multiparams, **params)
1992
1993 def scalar(self, statement, *multiparams, **params):
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in execute(self, object, *multiparams, **params)
904 """
905 if isinstance(object, util.string_types[0]):
--> 906 return self._execute_text(object, multiparams, params)
907 try:
908 meth = object._execute_on_connection
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_text(self, statement, multiparams, params)
1052 statement,
1053 parameters,
-> 1054 statement, parameters
1055 )
1056 if self._has_events or self.engine._has_events:
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
1144 parameters,
1145 cursor,
-> 1146 context)
1147
1148 if self._has_events or self.engine._has_events:
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1339 util.raise_from_cause(
1340 sqlalchemy_exception,
-> 1341 exc_info
1342 )
1343 else:
/usr/local/lib/python2.7/dist-packages/sqlalchemy/util/compat.pyc in raise_from_cause(exception, exc_info)
198 exc_type, exc_value, exc_tb = exc_info
199 cause = exc_value if exc_value is not exception else None
--> 200 reraise(type(exception), exception, tb=exc_tb, cause=cause)
201
202 if py3k:
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
1137 statement,
1138 parameters,
-> 1139 context)
1140 except Exception as e:
1141 self._handle_dbapi_exception(
/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/default.pyc in do_execute(self, cursor, statement, parameters, context)
448
449 def do_execute(self, cursor, statement, parameters, context=None):
--> 450 cursor.execute(statement, parameters)
451
452 def do_execute_no_params(self, cursor, statement, context=None):
/home/raj/.virtualenvs/gitrec/lib/python2.7/site-packages/MySQLdb/cursors.pyc in execute(self, query, args)
203 del tb
204 self.messages.append((exc, value))
--> 205 self.errorhandler(self, exc, value)
206 self._executed = query
207 if not self._defer_warnings: self._warning_check()
/home/raj/.virtualenvs/gitrec/lib/python2.7/site-packages/MySQLdb/connections.pyc in defaulterrorhandler(***failed resolving arguments***)
34 del cursor
35 del connection
---> 36 raise errorclass, errorvalue
37
38 re_numeric_part = re.compile(r"^(\d+)")
OperationalError: (_mysql_exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query') [SQL: '\n select other_repos.user_id\n , other_repos.starred_repo_id\n , repo.repo_name\n , repo.description\n , repo.last_modified\n , repo.language\n , repo.stargazers_count\n , repo.forks_count\n , repo.from_hacker_news\n , hn.added_at\n , hn.submission_time\n , hn.title\n , hn.url\n from\n (select user_id\n , starred_repo_id\n from github_user_starred_repos\n where user_id != 375265) other_repos\n join\n (select distinct(others.user_id) `user_id`\n from\n (select user_id\n , starred_repo_id\n from github_user_starred_repos\n where user_id != 375265) others\n join\n (select starred_repo_id\n from github_user_starred_repos\n where user_id = 375265) usr\n on others.starred_repo_id = usr.starred_repo_id) others\n on other_repos.user_id=others.user_id\n join\n (select id\n , repo_name\n , description\n , last_modified\n , language\n , stargazers_count\n , forks_count\n , from_hacker_news\n from github_repos) repo\n on other_repos.starred_repo_id=repo.id\n join\n (select added_at\n , submission_time\n , title\n , url\n , github_repo_name\n from hacker_news) hn\n on repo.repo_name = hn.github_repo_name\n ']
In [9]:
s.similar_repos_query(user)
Out[9]:
'\n select other_repos.user_id\n , other_repos.starred_repo_id\n , repo.repo_name\n , repo.description\n , repo.last_modified\n , repo.language\n , repo.stargazers_count\n , repo.forks_count\n , repo.from_hacker_news\n , hn.added_at\n , hn.submission_time\n , hn.title\n , hn.url\n from\n (select user_id\n , starred_repo_id\n from github_user_starred_repos\n where user_id != 375265) other_repos\n join\n (select distinct(others.user_id) `user_id`\n from\n (select user_id\n , starred_repo_id\n from github_user_starred_repos\n where user_id != 375265) others\n join\n (select starred_repo_id\n from github_user_starred_repos\n where user_id = 375265) usr\n on others.starred_repo_id = usr.starred_repo_id) others\n on other_repos.user_id=others.user_id\n join\n (select id\n , repo_name\n , description\n , last_modified\n , language\n , stargazers_count\n , forks_count\n , from_hacker_news\n from github_repos) repo\n on other_repos.starred_repo_id=repo.id\n join\n (select added_at\n , submission_time\n , title\n , url\n , github_repo_name\n from hacker_news) hn\n on repo.repo_name = hn.github_repo_name\n '
In [10]:
user.id
Out[10]:
375265
Content source: webmasterraj/gitrecommender
Similar notebooks: