In [2]:
from lib.users import UserAPI
import csv
api=UserAPI()
userdata_file="/home/clemsos/Dev/mitras/lib/cities/usersample.csv"
# number of lines : 14,388,386 users
with open(userdata_file, 'rb') as csvfile:
user_data=csv.reader(csvfile)
csvfile.next() #skip csv header
for user in user_data:
province_code= api.get_province(user[0])
print api.provinces[province_code]
Fetch cities code from csv
In [23]:
import csv
# fetch cities
provinces_file="/home/clemsos/Dev/mitras/lib/cities/provinces.csv"
provinces={}
with open(provinces_file, 'rb') as csvfile:
provinces_data=csv.reader(csvfile)
for row in provinces_data:
provinces[row[0]]=row[1]
# for index in provinces:
# print provinces[index]
Store users in mongo db
In [22]:
from models.user import User
# data_sample="/home/clemsos/Dev/mitras/data/datazip/others/userdata.csv"
userdata_file="/home/clemsos/Dev/mitras/lib/cities/usersample.csv"
# number of lines : 14,388,386 users
with open(userdata_file, 'rb') as csvfile:
user_data=csv.reader(csvfile)
csvfile.next() #skip csv header
for row in user_data:
# create Tweet object
u=User()
# print row[0],row[1],row[2],row[3]
u.uid=row[0]
u.province=provinces[row[1]]
u.gender=row[2]
u.verified=row[3]
u.save() # store to mongo
In [29]:
from lib.mongo import MongoDB
# Connect to Mongo
db=MongoDB("tweets").db["weibousers"]
print "Total users in the db : %d"%db.count()
def get_user(_uid):
user=db.find_one({"uid":_uid}) #.limit(c)
return user
def get_province(_uid):
user=db.find_one({"uid":_uid}) #.limit(c)
return user["province"]
result = get_province("uHRWEYSX0")
print result