In [117]:
import json
In [4]:
data = []
with open('/home/jerry/taobao_baokuan_data.json') as f:
for line in f:
data.append(json.loads(line))
In [6]:
print data[0]['shop_address'][0].encode('utf-8')
In [121]:
price_list = []
for d in data:
price_list.append(d['goods_price'])
In [122]:
price_list
In [123]:
import numpy as np
import matplotlib.pyplot as plt
t = np.arange(0, 250, 1)
plt.plot(price_list)
plt.ylabel('goods price')
plt.plot([0,len(price_list)], [average_price1,average_price1], 'r--')
plt.title('pic. the price and average price of all goods')
plt.show()
In [124]:
#总平均价
total_price = 0
for p in price_list:
total_price += float(p)
average_price1 = total_price/len(price_list)
In [125]:
average_price1
Out[125]:
In [126]:
#首页平均价
total_price = 0
for p in price_list[0:44]:
total_price += float(p)
average_price2 = total_price/len(price_list[0:44])
In [127]:
average_price2
Out[127]:
In [128]:
sale_num_list = []
for d in data:
sale_num_list.append(d['goods_sale_num'])
In [129]:
#所有商家销量
import matplotlib.pyplot as plt
plt.plot(sale_num_list)
plt.ylabel('sales volume')
plt.title('pic. sales volume')
plt.show()
In [130]:
#销量大于10000的
sale_num_10000_list = []
for s in sale_num_list:
if int(s) >= 10000:
sale_num_10000_list.append(s)
else:
pass
In [131]:
sale_num_10000_list
Out[131]:
In [132]:
#大于10000的商家销量
import matplotlib.pyplot as plt
plt.plot(sale_num_10000_list)
plt.ylabel('sales volume')
plt.title('pic. sales volume larger than 10000')
plt.show()
In [133]:
#销量大于10000平均价
total_price = 0
for p in price_list[0:15]:
total_price += float(p)
average_price3 = total_price/len(price_list[0:15])
In [134]:
average_price3
Out[134]:
In [165]:
#销量大于10000价格走向
import matplotlib.pyplot as plt
plt.plot(price_list[0:15])
plt.ylabel('goods price')
plt.title('pic. price of the goods whitch sals volume is larger than 10000')
plt.show()
In [136]:
#天猫和非天猫比例图
plt.pie(istmall_list,labels=['is tmall','not tmall'],colors=['m','c'])
plt.title("pic. wether the shop is TMall")
plt.show
Out[136]:
In [137]:
#统计所有店铺天猫和非天猫数量
istmall_list = []
istmall = 0
nottmall = 0
for d in data:
if d['shop_istmall'] == 'is_tmall':
istmall +=1
else:
nottmall +=1
istmall_list.append(istmall)
istmall_list.append(nottmall)
In [138]:
istmall_list
Out[138]:
In [142]:
#首页天猫和非天猫
istmall_first_page_list = []
istmall_first_page = 0
nottmall_first_page = 0
for d in data[0:44]:
if d['shop_istmall'] == 'is_tmall':
istmall_first_page +=1
else:
nottmall_first_page +=1
istmall_first_page_list.append(istmall_first_page)
istmall_first_page_list.append(nottmall_first_page)
In [143]:
#首页天猫和非天猫比例图
plt.pie(istmall_first_page_list,labels=['is tmall','not tmall'],colors=['m','c'])
plt.title("pic. wether the shop is TMall in first page")
plt.show()
In [144]:
istmall_first_page_list
Out[144]:
In [163]:
#首页价格
plt.plot(price_list[0:44])
plt.ylabel('goods price')
plt.title('pic. price of the goods in the first page')
plt.show()
In [158]:
guangdong = 0
shanghai = 0
beijing = 0
shandong = 0
jiangsu = 0
jiangxi = 0
liaoning = 0
zhejiang = 0
henan = 0
hubei = 0
for d in data:
#print d['shop_address'][0].encode('utf-8')
if d['shop_address'][0].encode('utf-8')[0:6] == '广东':
guangdong +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '上海':
shanghai +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '北京':
beijing +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '山东':
shandong +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '江苏':
jiangsu +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '江西':
jiangxi +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '辽宁':
liaoning +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '浙江':
zhejiang +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '河南':
henan +=1
elif d['shop_address'][0].encode('utf-8')[0:6] == '湖北':
hubei +=1
else:
print d['shop_address'][0].encode('utf-8')
print guangdong
print shanghai
print beijing
print shandong
print jiangsu
print jiangxi
print liaoning
print zhejiang
print henan
print hubei
shop_address_list = ['guangdong','zhejiang','beijing','shanghai','jiangsu','shandong','hubei','henan','liaoning','jiangxi']
address_num_list = []
address_num_list.append(guangdong)
address_num_list.append(zhejiang)
address_num_list.append(beijing)
address_num_list.append(shanghai)
address_num_list.append(jiangsu)
address_num_list.append(shandong)
address_num_list.append(hubei)
address_num_list.append(henan)
address_num_list.append(liaoning)
address_num_list.append(jiangxi)
In [166]:
#店铺位置饼图
colors_list = ['m','c','b','g','r','y','k','w','y','r']
plt.pie(address_num_list,labels=shop_address_list,colors=colors_list)
plt.title("pic. shop location")
plt.show()
In [162]:
#首页商家销量
plt.plot(sale_num_list[0:44])
plt.ylabel('sales volume')
plt.title('pic. sales volume in first page')
plt.show()
In [ ]: