In [117]:
import json

In [4]:
data = []
with open('/home/jerry/taobao_baokuan_data.json') as f:
    for line in f:
        data.append(json.loads(line))

In [6]:
print data[0]['shop_address'][0].encode('utf-8')

In [121]:
price_list = []
for d in data:
    price_list.append(d['goods_price'])

In [122]:
price_list

In [123]:
import numpy as np
import matplotlib.pyplot as plt
t = np.arange(0, 250, 1)
plt.plot(price_list)
plt.ylabel('goods price')

plt.plot([0,len(price_list)], [average_price1,average_price1], 'r--')
plt.title('pic. the price and average price of all goods')
plt.show()



In [124]:
#总平均价
total_price = 0
for p in price_list:
    total_price += float(p)
average_price1 = total_price/len(price_list)

In [125]:
average_price1


Out[125]:
46.053818181818144

In [126]:
#首页平均价
total_price = 0
for p in price_list[0:44]:
    total_price += float(p)
average_price2 = total_price/len(price_list[0:44])

In [127]:
average_price2


Out[127]:
42.84068181818182

In [128]:
sale_num_list = []
for d in data:
    sale_num_list.append(d['goods_sale_num'])

In [129]:
#所有商家销量
import matplotlib.pyplot as plt
plt.plot(sale_num_list)
plt.ylabel('sales volume')
plt.title('pic. sales volume')
plt.show()



In [130]:
#销量大于10000的
sale_num_10000_list = []
for s in sale_num_list:
    if int(s) >= 10000:
        sale_num_10000_list.append(s)
    else:
        pass

In [131]:
sale_num_10000_list


Out[131]:
[u'30829',
 u'15387',
 u'13397',
 u'12718',
 u'12195',
 u'11646',
 u'11580',
 u'11249',
 u'11162',
 u'11015',
 u'10733',
 u'10711',
 u'10686',
 u'10257',
 u'10009']

In [132]:
#大于10000的商家销量
import matplotlib.pyplot as plt
plt.plot(sale_num_10000_list)
plt.ylabel('sales volume')
plt.title('pic. sales volume larger than 10000')
plt.show()



In [133]:
#销量大于10000平均价
total_price = 0
for p in price_list[0:15]:
    total_price += float(p)
average_price3 = total_price/len(price_list[0:15])

In [134]:
average_price3


Out[134]:
31.786666666666665

In [165]:
#销量大于10000价格走向
import matplotlib.pyplot as plt
plt.plot(price_list[0:15])
plt.ylabel('goods price')
plt.title('pic. price of the goods whitch sals volume is larger than 10000')
plt.show()



In [136]:
#天猫和非天猫比例图
plt.pie(istmall_list,labels=['is tmall','not tmall'],colors=['m','c'])
plt.title("pic. wether the shop is TMall")
plt.show


Out[136]:
<function matplotlib.pyplot.show>

In [137]:
#统计所有店铺天猫和非天猫数量
istmall_list = []
istmall = 0
nottmall = 0
for d in data:
    if d['shop_istmall'] == 'is_tmall':
        istmall +=1
    else:
        nottmall +=1
istmall_list.append(istmall)
istmall_list.append(nottmall)

In [138]:
istmall_list


Out[138]:
[72, 148]

In [142]:
#首页天猫和非天猫
istmall_first_page_list = []
istmall_first_page = 0
nottmall_first_page = 0
for d in data[0:44]:
    if d['shop_istmall'] == 'is_tmall':
        istmall_first_page +=1
    else:
        nottmall_first_page +=1
istmall_first_page_list.append(istmall_first_page)
istmall_first_page_list.append(nottmall_first_page)

In [143]:
#首页天猫和非天猫比例图
plt.pie(istmall_first_page_list,labels=['is tmall','not tmall'],colors=['m','c'])
plt.title("pic. wether the shop is TMall in first page")
plt.show()



In [144]:
istmall_first_page_list


Out[144]:
[16, 28]

In [163]:
#首页价格
plt.plot(price_list[0:44])
plt.ylabel('goods price')
plt.title('pic. price of the goods in the first page')
plt.show()



In [158]:
guangdong = 0
shanghai = 0
beijing = 0
shandong = 0
jiangsu = 0
jiangxi = 0
liaoning = 0
zhejiang = 0
henan = 0
hubei = 0

for d in data:
    #print d['shop_address'][0].encode('utf-8')
    if d['shop_address'][0].encode('utf-8')[0:6] == '广东':
        guangdong +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '上海':
        shanghai +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '北京':
        beijing +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '山东':
        shandong +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '江苏':
        jiangsu +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '江西':
        jiangxi +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '辽宁':
        liaoning +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '浙江':
        zhejiang +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '河南':
        henan +=1
    elif d['shop_address'][0].encode('utf-8')[0:6] == '湖北':
        hubei +=1
    else:
        print d['shop_address'][0].encode('utf-8') 
        
print guangdong 
print shanghai 
print beijing 
print shandong 
print jiangsu 
print jiangxi 
print liaoning 
print zhejiang 
print henan 
print hubei 
shop_address_list = ['guangdong','zhejiang','beijing','shanghai','jiangsu','shandong','hubei','henan','liaoning','jiangxi']
address_num_list = []
address_num_list.append(guangdong)
address_num_list.append(zhejiang)
address_num_list.append(beijing)
address_num_list.append(shanghai)
address_num_list.append(jiangsu)
address_num_list.append(shandong)
address_num_list.append(hubei)
address_num_list.append(henan)
address_num_list.append(liaoning)
address_num_list.append(jiangxi)


102
9
16
4
6
2
2
75
2
2

In [166]:
#店铺位置饼图
colors_list = ['m','c','b','g','r','y','k','w','y','r']
plt.pie(address_num_list,labels=shop_address_list,colors=colors_list)
plt.title("pic. shop location")
plt.show()



In [162]:
#首页商家销量
plt.plot(sale_num_list[0:44])
plt.ylabel('sales volume')
plt.title('pic. sales volume in first page')
plt.show()



In [ ]: