In [ ]:
import requests
import json
from github import Github
import networkx as nx
from operator import itemgetter
from collections import Counter
from networkx.readwrite import json_graph
import webbrowser
import os
In [ ]:
ACCESS_TOKEN = '1161b718b9555cd76bf7ff9070c8f1ba300ea885'
USER = 'edx'
REPO = 'edx-documentation'
client = Github(ACCESS_TOKEN, per_page=100)
user = client.get_user(USER)
repo = user.get_repo(REPO)
stargazers = [ s for s in repo.get_stargazers() ] #可以先对这些人数进行分类>限制
print "关注人的数目: %d \n" % len(stargazers) #如果人数很多,速度很慢
In [ ]:
for s in repo.get_forks():
print s.get_
In [ ]:
g = nx.DiGraph()
g.add_node(repo.name + '(r)', type='repo', lang=repo.language, owner=user.login)
for sg in stargazers:
g.add_node(sg.login + '(u)', type='user')
g.add_edge(sg.login + '(u)', repo.name + '(r)', type='gazes')
print sg.login + '(u)'
for i, sg in enumerate(stargazers):
try:
for follower in sg.get_followers():
if follower.login + '(u)' in g:
g.add_edge(follower.login + '(u)', sg.login + '(u)', type='follows')
except Exception, e:
print "获取追随者失败,跳过", sg.login, e
print "正在处理第", i+1, " 个关注者。"
c = Counter([e[1] for e in g.edges_iter(data=True) if e[2]['type'] == 'follows'])
popular_users = [(u, f) for (u, f) in c.most_common() if f > 1]
print "受欢迎的用户数目:", len(popular_users)
print "最受欢迎的10个用户:", popular_users[:10]
In [ ]:
MAX_REPOS = 500
for i, sg in enumerate(stargazers):
print sg.login
try:
for starred in sg.get_starred()[:MAX_REPOS]: # Slice to avoid supernodes
g.add_node(starred.name + '(r)', type='repo', lang=starred.language, owner=starred.owner.login)
g.add_edge(sg.login + '(u)', starred.name + '(r)', type='gazes')
except Exception, e: #ssl.SSLError:
print "获取加星仓库失败 ", sg.login, "跳过."
print "正在处理", i+1, "加星的仓库"
filename = "1.1"
nx.write_gpickle(g, filename)
d = json_graph.node_link_data(g)
filename = "1.json"
json.dump(d, open(filename, 'w'))
In [ ]:
g = nx.read_gpickle("1.1")
In [ ]:
print nx.info(g)
data = {}
for n in g.nodes_iter():
if (g.node[n]['type'] == 'repo' and g.node[n]['lang'] not in data and g.node[n]['lang']):
data[ g.node[n]['lang']] = 1
elif (g.node[n]['type'] == 'repo' and g.node[n]['lang'] in data and g.node[n]['lang']) :
data[ g.node[n]['lang']] = data[ g.node[n]['lang']]+ 1
else:
pass
sumRepo = sum([v for k,v in data.iteritems()])
data1 = sorted(data.iteritems(), key=lambda d:d[1], reverse = True )[:9]#排序输出
others = 0
for v in data1:
others = others + v[1]
others = sumRepo - others
dothers = [unicode('others', "UTF-8"),others]
data1.append(dothers)
print sumRepo
print others
print data1
json.dump(data1, open('1.1.pie', 'w'))
In [ ]:
print "Popular repositories"
data2 = sorted([(n,d) for (n,d) in g.in_degree_iter() if g.node[n]['type'] == 'repo'], key=itemgetter(1), reverse=True)[:10]
print data2
json.dump(data2, open('1.1.rect', 'w'))
In [ ]:
USER = 'edx'
REPO = 'edx-documentation'
client = Github("ch710798472","Mm456123")
user = client.get_user(USER)
repo = user.get_repo(REPO)
stargazers = [ s for s in repo.get_stargazers() ] #可以先对这些人数进行分类>限制
print "关注人的数目: %d \n" % len(stargazers)
In [ ]:
i = 1
for sg in stargazers:
print i
print sg.login,"-",sg.email,"-",sg.avatar_url,"-",sg.followers,"-",sg.following,"-",sg.created_at
i = i + 1
In [ ]:
for s in repo.get_forks():
for x in s.get_forks():
print x
In [ ]:
r=requests.get('https://api.github.com/search/users?access_token=dff8d5a7bbc4f5240eac2fd4114dc5d0e87707ed&q=location:china&sort=followers')
In [ ]:
d=r.json()
total_count = d['total_count']
incomplete_results = d['incomplete_results']
In [ ]:
d = {"items":[]}
url = 'https://api.github.com/search/users?q=location:china&sort=followers&per_page=100&page='
for i in range(10):
print url + str(i+1)
newUrl = url + str(i+1)
r = requests.get(newUrl)
temp = r.json()
d['items'].extend(temp['items'])
filename = "user.json"
json.dump(d, open(filename, 'w'))
In [ ]:
t = {}
j=0
for i in d1['items']:
# print i['url']
r = requests.get(i['url'])
temp = r.json()
t[temp['login']] = temp
if (j%50)==0:
print j
j = j + 1
filename = "userinfo.json"
json.dump(t, open(filename, 'w'))
In [ ]:
with open("user.json", "r") as f:
d1 = json.load(f)
print d1['items']
In [ ]:
ACCESS_TOKEN = 'c55b7dfa48b0f4ed3ace41a124b5d35c52368604'
client = Github(ACCESS_TOKEN, per_page=100)
In [ ]:
t = {}
notdone=[]
j=0
for i in d1['items']:
try:
user = client.get_user(i['login'])
t[user._rawData['login']] = user._rawData
except Exception,e:
print "time out"
notdone.append(i['login'])
if (j%11)==0:
print j
j = j + 1
filename = "userinfo.json"
json.dump(t, open(filename, 'w'))
In [ ]:
from pprint import pprint
pprint(vars(user))
user._rawData['avatar_url']
In [ ]:
print notdone
In [ ]:
t1 = {}
notdone1=[]
j=0
for i in notdone:
try:
user = client.get_user(i)
t1[user._rawData['login']] = user._rawData
except Exception,e:
print "time out"
notdone1.append(i)
filename = "userinfo.json"
json.dump(t1, open(filename, 'w'))
In [ ]:
with open("userinfo-1.json", "r") as f:
d2 = json.load(f)
print len(d2)
In [ ]:
# apendx = []
d = []
for i in d1['items']:
for k in d2:
try:
if i['login'] == k:
# print k
d.append(d2[k])
# apendx.append(k)
except Exception,e:
pass
# print len(apendx)
In [ ]:
filename = "userMoreInfo.json"
json.dump(d, open(filename, 'w'))
In [ ]:
print d[111]['avatar_url']
In [ ]:
#获取用户头像图片并保存
import os
import urllib2
def getImage(filePath,addr):
try:
splitPath = addr.split('/')
fName = splitPath.pop()
# print fName
open(filePath+fName, "wb").write(urllib2.urlopen(addr).read())
except Exception,e:
print "[Error]Cant't download: %s:%s" %(fName,e)
In [ ]:
#加载本地用户数据
with open("userMoreInfo.json", "r") as f:
d = json.load(f)
In [ ]:
#下载用户头像图片缓存到本地
filePath = 'userImages/'
for i in d:
getImage(filePath,i['avatar_url'])
In [ ]:
#获取中国区C语言的排名(前100名)
d = {"items":[]}
url = 'https://api.github.com/search/users?sort=followers&q=location:china+language:C&per_page=100&page='
rank_count=1
for i in range(rank_count):
print url + str(i+1)
newUrl = url + str(i+1)
r = requests.get(newUrl)
temp = r.json()
d['items'].extend(temp['items'])
filename = "c_user.json"
json.dump(d, open(filename, 'w'))
In [ ]:
with open("c_user.json", "r") as f:
d1 = json.load(f)
ACCESS_TOKEN = 'e986ae79943cf8735d8906fd4d77182cdb3f6cd7'
client = Github(ACCESS_TOKEN, per_page=100)
t = {}
notdone=[]
j=0
for i in d1['items']:
try:
user = client.get_user(i['login'])
t[user._rawData['login']] = user._rawData
except Exception,e:
print "time out"
notdone.append(i['login'])
if (j%11)==0:
print j
j = j + 1
filename = "c_userinfo.json"
json.dump(t, open(filename, 'w'))
In [ ]:
with open("c_userinfo.json", "r") as f:
d2 = json.load(f)
with open("c_user.json", "r") as f:
d1 = json.load(f)
d = []
for i in d1['items']:
for k in d2:
try:
if i['login'] == k:
# print k
d.append(d2[k])
# apendx.append(k)
except Exception,e:
pass
filename = "c_userMoreInfo.json"
json.dump(d, open(filename, 'w'))
#需要完成头像缓存,但是中国区的排名大部分已经包括了
In [ ]:
#获取中国区Python语言的排名(前100名)
d = {"items":[]}
url = 'https://api.github.com/search/users?sort=followers&q=location:china+language:python&per_page=100&page='
rank_count=1
filename = "py_user.json"
filename1 = "py_userinfo.json"
filename2 = "py_userMoreInfo.json"
#获取排名
for i in range(rank_count):
print url + str(i+1)
newUrl = url + str(i+1)
r = requests.get(newUrl)
temp = r.json()
d['items'].extend(temp['items'])
json.dump(d, open(filename, 'w'))
print 'finish rank user'
#获取用户详细信息
with open(filename, "r") as f:
d1 = json.load(f)
ACCESS_TOKEN = '0d1c6d6da836bc28b691f87dd34a1fbdc604c895'
client = Github(ACCESS_TOKEN, per_page=100)
t = {}
notdone=[]
j=0
for i in d1['items']:
try:
user = client.get_user(i['login'])
t[user._rawData['login']] = user._rawData
except Exception,e:
print "time out"
notdone.append(i['login'])
if (j%11)==0:
print j
j = j + 1
json.dump(t, open(filename1, 'w'))
print 'finish user info'
#重新排序
with open(filename1, "r") as f:
d2 = json.load(f)
with open(filename, "r") as f:
d1 = json.load(f)
d = []
for i in d1['items']:
for k in d2:
try:
if i['login'] == k:
# print k
d.append(d2[k])
# apendx.append(k)
except Exception,e:
pass
json.dump(d, open(filename2, 'w'))
print 'DONE'
In [ ]:
#search
d = {"items":[]}
url = 'https://api.github.com/search/users?sort=followers&q=location:china+language:python&per_page=100&page='
rank_count=1
#获取排名
for i in range(rank_count):
print url + str(i+1)
newUrl = url + str(i+1)
r = requests.get(newUrl)
temp = r.json()
d['items'].extend(temp['items'])
print 'finish rank user'
#获取用户详细信息
username = 'ch710798472'
password = 'Mm456123'
ACCESS_TOKEN = '0d1c6d6da836bc28b691f87dd34a1fbdc604c895'
client = Github(username,password=password, per_page=100)
t = {}
notdone=[]
j=0
for i in d['items']:
try:
user = client.get_user(i['login'])
t[user._rawData['login']] = user._rawData
except Exception,e:
print "time out"
notdone.append(i['login'])
if (j%10)==0:
print j
j = j + 1
print 'finish user info'
#重新排序
d1 = []#最后的结果存储
for i in d['items']:
for k in t:
try:
if i['login'] == k:
# print k
d1.append(t[k])
# apendx.append(k)
except Exception,e:
pass
print 'DONE'
In [ ]:
username = 'ch710798472'
password = 'Mm456123'
client = Github(login_or_token=username,password=password, per_page=100)
USER = 'ch710798472'
REPO = 'python_aiml_rebot'
user = client.get_user(USER)
repo = user.get_repo(REPO)
stargazers = [ s for s in repo.get_stargazers() ] #获得关注者,通常这个人数比较多
contributors = [ s for s in repo.get_contributors() ] #获得贡献者
g = nx.DiGraph()
g.add_node(repo.name + '(r)', type='repo', lang=repo.language, owner=user.login)
for sg in stargazers:
g.add_node(sg.login + '(u)', type='user')
g.add_edge(sg.login + '(u)', repo.name + '(r)', type='gazes')
print 'finish add stargazers'
for sg in contributors:
g.add_node(sg.login + '(u)', type='user')
g.add_edge(sg.login + '(u)', repo.name + '(r)', type='conbs')
print 'finish add contributors'
d = json_graph.node_link_data(g)
filename = "connect.json"
json.dump(d, open(filename, 'w'))
In [ ]:
w = [ s for s in user.get_watched() ]
for sg in w:
print sg.name
In [ ]:
username = 'ch710798472'
password = 'Mm456123'
client = Github(login_or_token=username,password=password, per_page=100)
USER = 'ch710798472'
user = client.get_user(USER)
repos = [s for s in user.get_repos()]
g = nx.DiGraph()
g.add_node(USER + '(u)', type='user')
try:
for r in repos:
stargazers = [ s for s in r.get_stargazers() ]
if(len(stargazers)):
g.add_edge(USER + '(u)',r.name + '(r)', type='have')
for sg in stargazers:
g.add_node(sg.login + '(u)', type='user')
g.add_edge(sg.login + '(u)', r.name + '(r)', type='gazes')
contributors = [ s for s in r.get_contributors() ]
for sg in contributors:
g.add_node(sg.login + '(u)', type='user')
g.add_edge(sg.login + '(u)', r.name + '(r)', type='conbs')
except Exception,e:
print "time out"
In [ ]:
print g.edges()
In [ ]:
#获取仓库使用语言排名
d = {"items":[]}
url = 'https://api.github.com/search/repositories?q=language:&sort=stars&order=desc&per_page=100&page='
rank_count=10
for i in range(rank_count):
print url + str(i+1)
newUrl = url + str(i+1)
r = requests.get(newUrl)
temp = r.json()
d['items'].extend(temp['items'])
filename = "repo_lang.json"
json.dump(d, open(filename, 'w'))
repo_lang = {}
for i in range(1000):
temp = d['items'][i-1]['language']
if temp not in repo_lang.keys():
repo_lang[temp]=1
else:
repo_lang[temp]= repo_lang[temp]+1
print repo_lang
filename = "../data/repo_lang.json"
json.dump(repo_lang, open(filename, 'w'))
In [ ]:
In [17]:
In [ ]:
In [ ]:
In [ ]: