api


In [13]:
import time
import requests

In [14]:
headers = {'User-Agent': 'WeRead/1.5.8 WRBrand/Meizu Dalvik/2.1.0 (Linux; U; Android 5.1; M3s Build/LMY47I)',\
           'accessToken': '_vJgP0bU',\
           'vid': '7649467'}
second = 1

In [15]:
from pymongo import MongoClient
client = MongoClient()
db = client.weread
books = db.books
users = db.users

In [16]:
from py2neo.ogm import GraphObject, Property, Label, RelatedObjects, RelatedTo, RelatedFrom
from py2neo import Graph, NodeSelector
graph = Graph()
selector = NodeSelector(graph)

In [17]:
url_base = 'https://i.weread.qq.com/'
#user
url_user_profile = '''https://i.weread.qq.com/user/profile?vid={}&gender=1&signature=1\
                        &vDesc=1&location=1&totalReadingTime=1&currentReadingTime=1&finishedBookCount=1\
                        &followerCount=1&followingCount=1&buyCount=1&reviewCount=1&reviewLikedCount=1\
                        &reviewCommentedCount=1&likeBookCount=1&isFollowing=1&isFollower=1&isBlackMe=1\
                        &isInBlackList=1&bookReviewCount=1&noteBookCount=1&exchangedMoney=0&recentReadingBooks=0\
                        &booklistCount=1&booklistCollectCount=1&articleBookId=1&articleCount=1&articleDraftCount=1\
                        &articleReadCount=1&articleSubscribeCount=1&articleWordCount=1&lectureCount=1&\
                        lectureListenCount=1&lectureLikedCount=1&lectureCommentedCount=1 
                    '''
url_book_finishreading = url_base+'book/finishreading?userVid={}'
url_shelf_friendsshelf = url_base+'shelf/friendsshelf?userVid={}'
#book
url_book_info = url_base+'book/info?bookId={}'
url_book_chapterInfos = url_base+'book/chapterInfos'
url_book_review_list = url_base+'review/list?listType=3&bookId={}&maxIdx=1000000&tips=1&listMode=1'
def get_from_api(api,id_): if api == url_book_chapterInfos: data = {"bookIds":[id_],"synckeys":[0]} r = requests.post(api,headers=headers,data=data) else: r = requests.get(api.format(id_),headers=headers) j = r.json() try: if j['errcode'] == -2014: print u'请求频率过高 ', if second != 1: second = second*2 time.sleep(second) print second return get_from_api(api,id_) if j['errcode'] == -2012: print u'登录超时 ', raise Exception(u'登录超时') except: sleep = 1 return j

In [18]:
def get_from_api(api,id_):
    if api == url_book_chapterInfos:
        data = {"bookIds":[id_],"synckeys":[0]}
        r = requests.post(api,headers=headers,data=data)
    else:
        r = requests.get(api.format(id_),headers=headers)
    j = r.json()
    try:
        if j['errcode'] == -2014:
            print u'请求频率过高 ',
            time.sleep(15)
            return get_from_api(api,id_)
        if j['errcode'] == -2012:
            print u'登录超时 ',
            raise Exception(u'登录超时')
    except:
        return j
{u'errcode': -2014, u'errmsg': u'\u8bf7\u6c42\u9891\u7387\u8fc7\u9ad8'} 请求频率过高 {u'errcode': -2012, u'errmsg': u'\u767b\u5f55\u8d85\u65f6'} 登录超时
get_from_api(url_book_info,728774)

ogm


In [19]:
class Book(GraphObject):
    __primarykey__ = 'bookId'
    
    bookId = Property()
    author = Property()
    title = Property()
    category = Property()
    tags = Property()
    lastChapterIdx = Property()
    price = Property()
    publishPrice = Property()
    publishTime = Property()
    publisher = Property()
    star = Property()
    recommended = Property()
    totalWords = Property()
    version = Property()
    
    #category = Label()
    
    def __init__(self, bookId):
        self.bookId = bookId
        if self.exists_in_mongo():
            self.get_mongo_data()
        else:
            self.get_api_data()
            self.save_mongo_data()
        self.init_Property()
        self.push()
        
    def get_api_data(self):
        #self.book_info = get_book_info(self.bookId)
        self.book_info = get_from_api(url_book_info,self.bookId)
        #self.book_chapterInfos = get_book_chapterInfos(self.bookId)
        self.book_chapterInfos = get_from_api(url_book_chapterInfos,self.bookId)
        #self.book_review_list = get_book_review_list(self.bookId)
        self.book_review_list = get_from_api(url_book_review_list,self.bookId)
        
    def init_Property(self):
        self.author = self.book_info['author']
        self.title = self.book_info['title']
        self.category = self.book_info['category']
        self.tags = self.book_info['tags']
        self.lastChapterIdx = self.book_info['lastChapterIdx']
        self.price = self.book_info['price']
        #self.publishPrice = self.book_info['publishPrice']
        #self.publishTime = self.book_info['publishTime']
        self.publisher = self.book_info['publisher']
        #self.star = self.book_info['star']
        self.recommended = self.book_info['recommended']
        self.totalWords = self.book_info['totalWords']
        self.version = self.book_info['version']
        
    def exists_in_mongo(self):
        return books.find_one({'bookId':self.bookId}) != None
    
    def save_mongo_data(self):
        self.mongo_data = {
            '_id':self.bookId,
            'bookId':self.bookId,
            'book_info':self.book_info,
            'book_chapterInfos':self.book_chapterInfos,
            'book_review_list':self.book_review_list,
        }
        books.insert_one(self.mongo_data)
    
    def get_mongo_data(self):
        self.mongo_data = books.find_one({'bookId':self.bookId})
        self.book_info = self.mongo_data['book_info']
        self.book_chapterInfos = self.mongo_data['book_chapterInfos']
        self.book_review_list = self.mongo_data['book_review_list']
        
    def push(self):
        graph.push(self)

In [20]:
class User(GraphObject):
    __primarykey__ = 'userVid'
    
    userVid = Property()
    bookReviewCount = Property()
    booklistCollectCount = Property()
    booklistCount = Property()
    buyCount = Property()
    followerCount = Property()
    followingCount = Property()
    gender = Property()
    likeBookCount = Property()
    location = Property()
    noteBookCount = Property()
    reviewCommentedCount = Property()
    reviewCount = Property()
    totalReadingTime = Property()
    vDesc = Property()
    
    review = RelatedTo('Book','REVIEW')
    comment = RelatedTo('User','COMMENT')
    like_review = RelatedTo('User','LIKE_REVIEW')
    reading = RelatedTo('Book','REDING')
    finish = RelatedTo('Book','FINISH')
    
    def __init__(self, userVid):
        self.userVid = userVid
        if self.exists_in_mongo():
            self.get_mongo_data()
        else:
            self.get_api_data()
            self.save_mongo_data()
        self.init_Property()
        self.book_shelf_link()
        self.push()
        
    def get_api_data(self):
        #self.user_profile = get_user_profile(self.userVid)
        self.user_profile = get_from_api(url_user_profile,self.userVid)
        #self.book_finishreading = get_book_finishreading(self.userVid)
        self.book_finishreading = get_from_api(url_book_finishreading,self.userVid)
        #self.book_shelf_friendsshelf = get_shelf_friendsshelf(self.userVid)
        self.book_shelf_friendsshelf = get_from_api(url_shelf_friendsshelf,self.userVid)
        
    def init_Property(self):
        #self.bookReviewCount = self.user_profile['bookReviewCount']
        #self.booklistCollectCount = self.user_profile['booklistCollectCount']
        #self.booklistCount = self.user_profile['booklistCount']
        #self.buyCount = self.user_profile['buyCount']
        self.followerCount = self.user_profile['followerCount']
        self.followingCount = self.user_profile['followingCount']
        self.gender = self.user_profile['gender']
        #self.likeBookCount = self.user_profile['likeBookCount']
        self.location = self.user_profile['location']
        #self.noteBookCount = self.user_profile['noteBookCount']
        #self.reviewCommentedCount = self.user_profile['reviewCommentedCount']
        #self.reviewCount = self.user_profile['reviewCount']
        self.totalReadingTime = self.user_profile['totalReadingTime']
        self.vDesc = self.user_profile['vDesc']
        
    def exists_in_mongo(self):
        return users.find_one({'userVid':self.userVid}) != None
    
    def save_mongo_data(self):
        self.mongo_data = {
            '_id':self.userVid,
            'userVid':self.userVid,
            'user_profile':self.user_profile,
            'book_finishreading':self.book_finishreading,
            'book_shelf_friendsshelf':self.book_shelf_friendsshelf,
        }
        users.insert_one(self.mongo_data)
    
    def get_mongo_data(self):
        self.mongo_data = users.find_one({'userVid':self.userVid})
        self.user_profile = self.mongo_data['user_profile']
        self.book_finishreading = self.mongo_data['book_finishreading']
        self.book_shelf_friendsshelf = self.mongo_data['book_shelf_friendsshelf']
        
    def push(self):
        graph.push(self)
        
    def book_shelf_link(self):
        bookfinish = [int(book['bookId']) for book in self.book_finishreading['books']]
        friendsshelf = [int(book['bookId']) for book in self.book_shelf_friendsshelf['books']]
        bookreading = list(set(friendsshelf)-set(bookfinish))
        for bf in bookfinish:
            bf = int(bf)
            b = Book.select(graph,bf).first()#selector.select('Book').where(bookId=bf).first()
            if not b:
                b = Book(bf)
            self.finish.add(b)
            #self.push()
        '''for br in bookreading:
            br = int(br)
            b = Book.select(graph,br).first()#selector.select('Book').where(bookId=bf).first()
            if not b:
                b = Book(br)
        '''
            #self.reading.add(b)
        try:
            for rec in self.book_shelf_friendsshelf['recent']:
                recent = int(rec['book']['bookId'])
                b = Book.select(graph,recent).first()#selector.select('Book').where(bookId=bf).first()
                if not b:
                    b = Book(br)
                self.reading.add(b)
        except:
            pass
u = User(7649467)
b = Book(852290)

In [21]:
booklist = [635938,728774]

In [22]:
def link_for_booklist(booklist):
    for book in booklist:
        print '\n',book
        b = Book.select(graph,book).first()
        if not b:
            b = Book(book)
        else:
            b.get_mongo_data()
        for r in b.book_review_list['reviews']:
            print 'review',
            try:
                ruid = r['review']['userVid']
                print ruid,
                #r['commentsCount']
                #r['likesCount']
                ru = User.select(graph,ruid).first()
                if not ru:
                    ru = User(ruid)
                ru.review.add(b)
                ru.push()
                try:
                    for c in r['commentsForList']:
                        cuid = c['author']['userVid']
                        cu = User.select(graph,cuid).first()
                        if not cu:
                            cu = User(cuid)
                        cu.comment.add(ru)
                        cu.push()
                except:
                    pass
                try:
                    for l in r['likesForList']:
                        luid = l['author']['userVid']
                        lu = User.select(graph,luid).first()
                        if not lu:
                            lu = User(luid)
                        lu.like_review.add(ru)
                        lu.push() 
                except:
                    pass
            except:
                pass
    print '\nfinish'

In [23]:
link_for_booklist(booklist)


635938
review 2019272 review 9721970 review 3955865 review 18237752 review 38516878 review 4930357 review 12302816 review 8628810 review 6158452 review 6158452 review 4437055 review 23803756 review 4457483 review 11340730 review 4606117 review 10916752 review 1924692 review 7319046 review 6427257 review 18727500 review 2208478 review 17946220 review 6225137 review 9556438 review 11355339 review 131265 review 15111658 review 18235576 review 12409212 review 39816639 review 2141908 review 19211399 review 19024711 review 8338805 review 4023980 review 9617412 review 4436031 review 12409212 review 12409212 review 12800800 review 10730014 review 12409212 review 15206223 review 20818381 review 25308400 review 9340709 review 23503563 review 34009926 review 16541570 review 38602279 review 21707444 review 18538322 review 38100378 review 13429056 review 16137347 review 3933165 review 12409212 review 11858318 review 7339082 review 17946220 
728774
review 1937323 review 1417410 review 16334715 review 23307477 review 32018094 review 19336951 review 31734173 review 5251136 review 28631924 review 5032748 review 6860177 review 13428705 review 17058706 review 6267014 review 31804634 review 5349956 review 819693 review 1112263 review 16635165 review 4017828 review 3660932 review 17753089 review 342029 review 5426601 review 32917043 review 34409167 review 33821079 review 6361936 review 31018339 review 8520668 review 13114062 review 27702275 review 13949295 review 35608486 review 3349980 review 13259140 review 1642991 review 19941420 review 3354451 review 8604903 review 6436344 review 8210832 review 25308818 review 38009621 review 4720774 review 17121309 review 18332153 review 37609827 review 7214473 review 18402939 review 16630725 review 30100557 review 39827256 review 4236018 review 20502725 review 5555885 review 7643668 review 13216704 review 20426880 review 6809256 review 28509009 review 30523028 review 13932541 review 15837848 review 11433005 review 29412449 review 1635991 review 23403008 review 19941420 review 17531885 review 39803028 review 21001641 review 701163 review 3943740 review 38014186 review 34111324 review 18057715 review 18149411 review 6063495 review 24533947 review 4338825 review 17953038 review 8946465 review 27502183 review 27300835 review 2832845 review 19447280 review 6333037 review 19538648 review 18841067 review 35222096 review 26829748 review 17335938 review 10824246 review 11900694 review 23309496 review 5340975 review 5062954 review 23309496 review 12563924 review 33422345 review 39721868 review 15411139 review 39815767 review 21433615 review 35313215 review 16239291 review 3856923 review 17540157 review 38616642 review 3716562 review 2765444 review 4152096 review 37923803 review 28919027 review 4549724 review 38805654 review 15558741 review 1316581 review 39507195 review 38933182 review 35923692 review 16050914 review 3859608 review 15250632 review 31435161 review 5865073 review 22207357 review 35829783 review 37422284 review 743930 review 10729739 review 38933805 review 25132120 review 6263956 review 10617526 review 13268353 review 12415207 review 19656682 review 7161131 review 4755590 review 2435571 review 33709464 review 20026640 
finish

In [24]:
print 'hello'


hello

In [ ]: