In [13]:
import time
import requests
In [14]:
headers = {'User-Agent': 'WeRead/1.5.8 WRBrand/Meizu Dalvik/2.1.0 (Linux; U; Android 5.1; M3s Build/LMY47I)',\
'accessToken': '_vJgP0bU',\
'vid': '7649467'}
second = 1
In [15]:
from pymongo import MongoClient
client = MongoClient()
db = client.weread
books = db.books
users = db.users
In [16]:
from py2neo.ogm import GraphObject, Property, Label, RelatedObjects, RelatedTo, RelatedFrom
from py2neo import Graph, NodeSelector
graph = Graph()
selector = NodeSelector(graph)
In [17]:
url_base = 'https://i.weread.qq.com/'
#user
url_user_profile = '''https://i.weread.qq.com/user/profile?vid={}&gender=1&signature=1\
&vDesc=1&location=1&totalReadingTime=1¤tReadingTime=1&finishedBookCount=1\
&followerCount=1&followingCount=1&buyCount=1&reviewCount=1&reviewLikedCount=1\
&reviewCommentedCount=1&likeBookCount=1&isFollowing=1&isFollower=1&isBlackMe=1\
&isInBlackList=1&bookReviewCount=1¬eBookCount=1&exchangedMoney=0&recentReadingBooks=0\
&booklistCount=1&booklistCollectCount=1&articleBookId=1&articleCount=1&articleDraftCount=1\
&articleReadCount=1&articleSubscribeCount=1&articleWordCount=1&lectureCount=1&\
lectureListenCount=1&lectureLikedCount=1&lectureCommentedCount=1
'''
url_book_finishreading = url_base+'book/finishreading?userVid={}'
url_shelf_friendsshelf = url_base+'shelf/friendsshelf?userVid={}'
#book
url_book_info = url_base+'book/info?bookId={}'
url_book_chapterInfos = url_base+'book/chapterInfos'
url_book_review_list = url_base+'review/list?listType=3&bookId={}&maxIdx=1000000&tips=1&listMode=1'
In [18]:
def get_from_api(api,id_):
if api == url_book_chapterInfos:
data = {"bookIds":[id_],"synckeys":[0]}
r = requests.post(api,headers=headers,data=data)
else:
r = requests.get(api.format(id_),headers=headers)
j = r.json()
try:
if j['errcode'] == -2014:
print u'请求频率过高 ',
time.sleep(15)
return get_from_api(api,id_)
if j['errcode'] == -2012:
print u'登录超时 ',
raise Exception(u'登录超时')
except:
return j
In [19]:
class Book(GraphObject):
__primarykey__ = 'bookId'
bookId = Property()
author = Property()
title = Property()
category = Property()
tags = Property()
lastChapterIdx = Property()
price = Property()
publishPrice = Property()
publishTime = Property()
publisher = Property()
star = Property()
recommended = Property()
totalWords = Property()
version = Property()
#category = Label()
def __init__(self, bookId):
self.bookId = bookId
if self.exists_in_mongo():
self.get_mongo_data()
else:
self.get_api_data()
self.save_mongo_data()
self.init_Property()
self.push()
def get_api_data(self):
#self.book_info = get_book_info(self.bookId)
self.book_info = get_from_api(url_book_info,self.bookId)
#self.book_chapterInfos = get_book_chapterInfos(self.bookId)
self.book_chapterInfos = get_from_api(url_book_chapterInfos,self.bookId)
#self.book_review_list = get_book_review_list(self.bookId)
self.book_review_list = get_from_api(url_book_review_list,self.bookId)
def init_Property(self):
self.author = self.book_info['author']
self.title = self.book_info['title']
self.category = self.book_info['category']
self.tags = self.book_info['tags']
self.lastChapterIdx = self.book_info['lastChapterIdx']
self.price = self.book_info['price']
#self.publishPrice = self.book_info['publishPrice']
#self.publishTime = self.book_info['publishTime']
self.publisher = self.book_info['publisher']
#self.star = self.book_info['star']
self.recommended = self.book_info['recommended']
self.totalWords = self.book_info['totalWords']
self.version = self.book_info['version']
def exists_in_mongo(self):
return books.find_one({'bookId':self.bookId}) != None
def save_mongo_data(self):
self.mongo_data = {
'_id':self.bookId,
'bookId':self.bookId,
'book_info':self.book_info,
'book_chapterInfos':self.book_chapterInfos,
'book_review_list':self.book_review_list,
}
books.insert_one(self.mongo_data)
def get_mongo_data(self):
self.mongo_data = books.find_one({'bookId':self.bookId})
self.book_info = self.mongo_data['book_info']
self.book_chapterInfos = self.mongo_data['book_chapterInfos']
self.book_review_list = self.mongo_data['book_review_list']
def push(self):
graph.push(self)
In [20]:
class User(GraphObject):
__primarykey__ = 'userVid'
userVid = Property()
bookReviewCount = Property()
booklistCollectCount = Property()
booklistCount = Property()
buyCount = Property()
followerCount = Property()
followingCount = Property()
gender = Property()
likeBookCount = Property()
location = Property()
noteBookCount = Property()
reviewCommentedCount = Property()
reviewCount = Property()
totalReadingTime = Property()
vDesc = Property()
review = RelatedTo('Book','REVIEW')
comment = RelatedTo('User','COMMENT')
like_review = RelatedTo('User','LIKE_REVIEW')
reading = RelatedTo('Book','REDING')
finish = RelatedTo('Book','FINISH')
def __init__(self, userVid):
self.userVid = userVid
if self.exists_in_mongo():
self.get_mongo_data()
else:
self.get_api_data()
self.save_mongo_data()
self.init_Property()
self.book_shelf_link()
self.push()
def get_api_data(self):
#self.user_profile = get_user_profile(self.userVid)
self.user_profile = get_from_api(url_user_profile,self.userVid)
#self.book_finishreading = get_book_finishreading(self.userVid)
self.book_finishreading = get_from_api(url_book_finishreading,self.userVid)
#self.book_shelf_friendsshelf = get_shelf_friendsshelf(self.userVid)
self.book_shelf_friendsshelf = get_from_api(url_shelf_friendsshelf,self.userVid)
def init_Property(self):
#self.bookReviewCount = self.user_profile['bookReviewCount']
#self.booklistCollectCount = self.user_profile['booklistCollectCount']
#self.booklistCount = self.user_profile['booklistCount']
#self.buyCount = self.user_profile['buyCount']
self.followerCount = self.user_profile['followerCount']
self.followingCount = self.user_profile['followingCount']
self.gender = self.user_profile['gender']
#self.likeBookCount = self.user_profile['likeBookCount']
self.location = self.user_profile['location']
#self.noteBookCount = self.user_profile['noteBookCount']
#self.reviewCommentedCount = self.user_profile['reviewCommentedCount']
#self.reviewCount = self.user_profile['reviewCount']
self.totalReadingTime = self.user_profile['totalReadingTime']
self.vDesc = self.user_profile['vDesc']
def exists_in_mongo(self):
return users.find_one({'userVid':self.userVid}) != None
def save_mongo_data(self):
self.mongo_data = {
'_id':self.userVid,
'userVid':self.userVid,
'user_profile':self.user_profile,
'book_finishreading':self.book_finishreading,
'book_shelf_friendsshelf':self.book_shelf_friendsshelf,
}
users.insert_one(self.mongo_data)
def get_mongo_data(self):
self.mongo_data = users.find_one({'userVid':self.userVid})
self.user_profile = self.mongo_data['user_profile']
self.book_finishreading = self.mongo_data['book_finishreading']
self.book_shelf_friendsshelf = self.mongo_data['book_shelf_friendsshelf']
def push(self):
graph.push(self)
def book_shelf_link(self):
bookfinish = [int(book['bookId']) for book in self.book_finishreading['books']]
friendsshelf = [int(book['bookId']) for book in self.book_shelf_friendsshelf['books']]
bookreading = list(set(friendsshelf)-set(bookfinish))
for bf in bookfinish:
bf = int(bf)
b = Book.select(graph,bf).first()#selector.select('Book').where(bookId=bf).first()
if not b:
b = Book(bf)
self.finish.add(b)
#self.push()
'''for br in bookreading:
br = int(br)
b = Book.select(graph,br).first()#selector.select('Book').where(bookId=bf).first()
if not b:
b = Book(br)
'''
#self.reading.add(b)
try:
for rec in self.book_shelf_friendsshelf['recent']:
recent = int(rec['book']['bookId'])
b = Book.select(graph,recent).first()#selector.select('Book').where(bookId=bf).first()
if not b:
b = Book(br)
self.reading.add(b)
except:
pass
In [21]:
booklist = [635938,728774]
In [22]:
def link_for_booklist(booklist):
for book in booklist:
print '\n',book
b = Book.select(graph,book).first()
if not b:
b = Book(book)
else:
b.get_mongo_data()
for r in b.book_review_list['reviews']:
print 'review',
try:
ruid = r['review']['userVid']
print ruid,
#r['commentsCount']
#r['likesCount']
ru = User.select(graph,ruid).first()
if not ru:
ru = User(ruid)
ru.review.add(b)
ru.push()
try:
for c in r['commentsForList']:
cuid = c['author']['userVid']
cu = User.select(graph,cuid).first()
if not cu:
cu = User(cuid)
cu.comment.add(ru)
cu.push()
except:
pass
try:
for l in r['likesForList']:
luid = l['author']['userVid']
lu = User.select(graph,luid).first()
if not lu:
lu = User(luid)
lu.like_review.add(ru)
lu.push()
except:
pass
except:
pass
print '\nfinish'
In [23]:
link_for_booklist(booklist)
In [24]:
print 'hello'
In [ ]: