In [17]:
import pandas as pd
import numpy as np
import xlwt
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号
Toolss = ['walk', 'subway', 'bus', 'bus', 'bus', 'bus', 'train', 'ferry', 'ferry', 'train', 'paratransit', 'car', 'car', 'car',
        'motocycle', 'taxi', 'taxi', 'bus', 'bike', 'car', 'car', 'bike', 'bike', 'bike']
Tools = ['walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike']
df = pd.read_csv("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\Citywide_Mobility_Survey_-_Main_Survey.csv", low_memory=False)

In [18]:
def change(n):
    for idi, i in enumerate(Tools):
        if Toolss[n] == i:
            return idi

In [19]:
#df

In [20]:
#df.to_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\1.xlsx",na_rep="NULL")

In [21]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet1', na_values='NaN')

In [22]:
B = np.array(A.dropna(), dtype=int)

In [23]:
B[:,0].size


Out[23]:
419

In [24]:
print(B)


[[2 0 0 ... 0 0 1]
 [1 0 0 ... 0 0 1]
 [2 1 0 ... 0 0 1]
 ...
 [2 1 1 ... 0 0 1]
 [1 0 1 ... 0 0 1]
 [1 1 1 ... 1 1 1]]

In [25]:
man = np.zeros(10, dtype=int)
wom = np.zeros(10, dtype=int)
num_man = 0
num_wom = 0
for i in range(419):
    for j in range(1,24):
        if B[i][j] == 1.0:
            if B[i][0] == 1.0:
                man[change(j-1)] += 1
                num_man += 1
            else:
                wom[change(j-1)] += 1
                num_wom += 1

In [26]:
man


Out[26]:
array([ 66,  98, 155,  24,  20,  17, 122,  33,  49,  28])

In [27]:
wom


Out[27]:
array([ 68, 111, 171,  11,  17,  15, 127,  37,  52,  34])

In [28]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 400, 20))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('sex', size = 20)   #标题
plt.plot(range(0, 10), man, 'blue',label = '男性 %d'%num_man)
plt.plot(range(0, 10), wom, 'orange',label = '女性 %d'%num_wom)
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.plot(man)
plt.plot(wom)
plt.show()



In [29]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet2', na_values='NaN')

In [30]:
B = np.array(A.dropna(), dtype=int)

In [31]:
print(B)


[[35  0  0 ...  0  0  0]
 [37  0  0 ...  0  0  0]
 [91  1  0 ...  0  0  0]
 ...
 [29  1  1 ...  0  0  0]
 [37  0  1 ...  0  0  0]
 [35  1  1 ...  1  1  1]]

In [32]:
B[:,1].size


Out[32]:
416

In [33]:
age = np.zeros((10,10), dtype=int)
num_age = np.zeros(10, dtype=int)
for i in range(416):
    num_age[B[i][0]//10] += 1
    for j in range(1,24):
        if B[i][j] == 1:
            if B[i][0] > 100:
                continue
            age[B[i][0]//10][change(j-1)] += 1

In [34]:
age


Out[34]:
array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  3,   3,   4,   0,   1,   1,   5,   0,   0,   0],
       [ 53,  70, 112,   9,   8,  14,  72,  14,  27,  13],
       [ 49,  81, 132,  16,  16,  10,  94,  28,  42,  26],
       [ 11,  27,  33,   6,   5,   1,  37,  14,  18,   9],
       [  8,  14,  26,   4,   5,   5,  20,   9,   7,   7],
       [  6,  11,  15,   0,   1,   1,  18,   4,   6,   5],
       [  3,   2,   2,   0,   1,   0,   2,   1,   1,   2],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  1,   0,   0,   0,   0,   0,   0,   0,   0,   0]])

In [35]:
num_age


Out[35]:
array([  0,   8, 117, 150,  59,  41,  35,   5,   0,   1])

In [36]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 150, 10))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('age', size = 20)   #标题
plt.plot(range(0, 10), age[1], 'black', label = '10~19 %d'%num_age[1])
plt.plot(range(0, 10),age[2], 'green', label = '20~29 %d'%num_age[2])
plt.plot(range(0, 10),age[3], 'red', label = '30~39 %d'%num_age[3])
plt.plot(range(0, 10),age[4], 'blue', label = '40~49 %d'%num_age[4])
plt.plot(range(0, 10),age[5], 'orange', label = '50~59 %d'%num_age[5])
plt.plot(range(0, 10),age[6], 'pink', label = '60~69 %d'%num_age[6])
plt.plot(range(0, 10),age[7], 'yellow', label = '70~79 %d'%num_age[7])
plt.plot(range(0, 10),age[9], 'purple', label = '90~99 %d'%num_age[9])
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.show()



In [37]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet3', na_values='NaN')

In [38]:
B = np.array(A.dropna(), dtype=int)

In [39]:
B


Out[39]:
array([[6, 0, 0, ..., 0, 0, 0],
       [6, 0, 0, ..., 0, 0, 0],
       [6, 0, 1, ..., 0, 0, 0],
       ...,
       [4, 1, 1, ..., 0, 0, 0],
       [6, 0, 1, ..., 0, 0, 0],
       [5, 1, 1, ..., 1, 1, 1]])

In [40]:
B[:,0].size


Out[40]:
410

In [41]:
income = np.zeros((9,10), dtype=int)
num_income = np.zeros(9, dtype=int)
for i in range(410):
    num_income[B[i][0]-1] += 1
    for j in range(1,24):
        if B[i][j] == 1:
            income[B[i][0]-1][change(j-1)] += 1

In [42]:
income


Out[42]:
array([[ 7,  7, 10,  0,  0,  0, 11,  1,  2,  1],
       [19, 17, 24,  2,  2,  4,  9,  3,  6,  3],
       [ 8, 17, 26,  2,  2,  4, 12,  8, 11,  7],
       [19, 31, 54,  7,  4,  5, 26,  8, 11,  7],
       [17, 40, 64,  7, 11,  6, 52,  9, 14, 14],
       [20, 36, 62,  9,  3,  7, 59, 13, 24, 15],
       [22, 35, 44,  4,  9,  4, 48, 13, 14, 10],
       [15, 16, 31,  3,  4,  2, 14, 12, 17,  3],
       [ 6,  7, 10,  1,  2,  0, 16,  3,  1,  2]])

In [43]:
num_income


Out[43]:
array([19, 32, 25, 42, 76, 84, 74, 33, 25])

In [44]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 100, 10))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('income', size = 20)   #标题
plt.plot(range(0, 10), income[0], 'black', label = 'Less than \$14,999 %d'%num_income[0])
plt.plot(range(0, 10),income[1], 'green', label = '\$15,000 - \$24,999 %d'%num_income[1])
plt.plot(range(0, 10),income[2], 'red', label = '\$25,000 - \$34,999 %d'%num_income[2])
plt.plot(range(0, 10),income[3], 'blue', label = '\$35,000 - \$49,999 %d'%num_income[3])
plt.plot(range(0, 10),income[4], 'orange', label = '\$50,000 - \$74,999 %d'%num_income[4])
plt.plot(range(0, 10),income[5], 'pink', label = '\$75,000 - \$99,999 %d'%num_income[5])
plt.plot(range(0, 10),income[6], 'yellow', label = '\$100,000 - \$149,999 %d'%num_income[6])
plt.plot(range(0, 10),income[7], 'purple', label = '\$150,000 - \$199,999 %d'%num_income[7])
plt.plot(range(0, 10),income[8], 'brown', label = '\$200,000 and above %d'%num_income[8])
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.show()



In [45]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet4', na_values='NaN')

In [46]:
B = np.array(A.dropna(), dtype=int)

In [47]:
B


Out[47]:
array([[7, 0, 0, ..., 0, 0, 0],
       [3, 0, 0, ..., 0, 0, 0],
       [3, 1, 0, ..., 0, 0, 0],
       ...,
       [6, 1, 1, ..., 0, 0, 0],
       [3, 0, 1, ..., 0, 0, 0],
       [6, 1, 1, ..., 1, 1, 1]])

In [48]:
B[:,0].size


Out[48]:
419

In [49]:
edu = np.zeros((7,10), dtype=int)
num_edu = np.zeros(7, dtype=int)
for i in range(419):
    num_edu[B[i][0]-1] += 1
    for j in range(1,24):
        if B[i][j] == 1:
            edu[B[i][0]-1][change(j-1)] += 1

In [50]:
edu


Out[50]:
array([[  0,   1,   1,   0,   0,   0,   0,   0,   0,   0],
       [  1,   1,   3,   0,   2,   1,   5,   0,   0,   0],
       [ 14,  22,  23,   2,   2,   7,  15,   3,   5,   5],
       [ 33,  34,  57,   9,   9,   7,  42,  10,  22,   8],
       [ 11,  18,  29,   4,   3,   3,  21,   3,   5,   6],
       [ 38,  80, 127,  15,  14,   5,  94,  30,  36,  28],
       [ 37,  53,  86,   5,   7,   9,  72,  24,  33,  15]])

In [51]:
num_edu


Out[51]:
array([  1,   7,  41,  67,  28, 152, 123])

In [52]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 150, 10))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('edu', size = 20)   #标题
plt.plot(range(0, 10), edu[0], 'black', label = 'No high school %d'%num_edu[0])
plt.plot(range(0, 10),edu[1], 'green', label = 'Some high school %d'%num_edu[1])
plt.plot(range(0, 10),edu[2], 'red', label = 'High school graduate or equivalent %d'%num_edu[2])
plt.plot(range(0, 10),edu[3], 'blue', label = 'Some college but degree not received or in progress %d'%num_edu[3])
plt.plot(range(0, 10),edu[4], 'orange', label = 'Associate degree %d'%num_edu[4])
plt.plot(range(0, 10),edu[5], 'pink', label = 'Bachelor’s degree %d'%num_edu[5])
plt.plot(range(0, 10),edu[6], 'yellow', label = 'Graduate degree %d'%num_edu[6])
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.show()



In [53]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet5', na_values='NaN')

In [54]:
B = np.array(A.dropna(), dtype=int)

In [55]:
B


Out[55]:
array([[1, 1, 0, ..., 0, 0, 0],
       [2, 2, 0, ..., 0, 0, 0],
       [1, 5, 0, ..., 0, 0, 0],
       ...,
       [1, 3, 1, ..., 0, 0, 0],
       [1, 3, 0, ..., 0, 0, 0],
       [2, 1, 1, ..., 1, 1, 1]])

In [56]:
B[:,1].size


Out[56]:
406

In [57]:
B[0].size


Out[57]:
25

In [58]:
safe = np.zeros((5,10), dtype=int)
num_safe = np.zeros(5, dtype=int)
for i in range(406):
    num_safe[(B[i][0]+B[i][1])//2-1] += 1
    for j in range(2,25):
        if B[i][j] == 1:
            safe[(B[i][0]+B[i][1])//2-1][change(j-2)] += 1

In [59]:
safe


Out[59]:
array([[ 59,  98, 166,  22,  15,  19, 131,  36,  60,  38],
       [ 46,  67, 110,   9,  14,   4,  76,  24,  26,  15],
       [ 20,  29,  37,   4,   7,   5,  27,   8,  11,   7],
       [  3,   6,   4,   0,   0,   3,   9,   2,   3,   2],
       [  1,   1,   3,   0,   0,   0,   1,   0,   1,   0]])

In [60]:
num_safe


Out[60]:
array([176, 138,  74,  16,   2])

In [61]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 200, 10))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('safe', size = 20)   #标题
plt.plot(range(0, 10), safe[0], 'black', label = 'Extremely important %d'%num_safe[0])
plt.plot(range(0, 10),safe[1], 'green', label = 'Very important %d'%num_safe[1])
plt.plot(range(0, 10),safe[2], 'red', label = 'Somewhat important %d'%num_safe[2])
plt.plot(range(0, 10),safe[3], 'blue', label = 'Not very important %d'%num_safe[3])
plt.plot(range(0, 10),safe[4], 'orange', label = 'Not at all important %d'%num_safe[4])
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.show()



In [62]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet6', na_values='NaN')

In [63]:
B = np.array(A.dropna(), dtype=int)

In [64]:
B


Out[64]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 1, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 1, 1, 1]])

In [65]:
B[1].size


Out[65]:
24

In [66]:
B[:,1].size


Out[66]:
419

In [67]:
rain = np.zeros((3,10), dtype=int)
num_rain = np.zeros(3, dtype=int)
for i in range(419):
    num_rain[B[i][0]] += 1
    for j in range(1,24):
        if B[i][j] == 1:
            rain[B[i][0]][change(j-1)] += 1

In [68]:
rain


Out[68]:
array([[116, 183, 280,  34,  32,  26, 226,  61,  81,  50],
       [  0,   1,   1,   0,   0,   0,   0,   0,   0,   0],
       [ 18,  25,  45,   1,   5,   6,  23,   9,  20,  12]])

In [69]:
num_rain


Out[69]:
array([376,   1,  42])

In [70]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 300, 30))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('rain', size = 20)   #标题
plt.plot(range(0, 10), rain[0], 'black', label = 'Light rain %d'%num_rain[0])
plt.plot(range(0, 10),rain[1], 'green', label = 'Moderate rain %d'%num_rain[1])
plt.plot(range(0, 10),rain[2], 'red', label = 'Heavy rain %d'%num_rain[2])
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.show()



In [71]:
A = pd.read_excel("D:\\BaiduNetdiskDownload\\ICM\\2018第七届数学中国数学建模国际赛赛题\\test.xlsx", sheet_name='Sheet7', na_values='NaN')

In [72]:
B = np.array(A.dropna(), dtype=int)

In [73]:
B


Out[73]:
array([[0, 0, 0, ..., 0, 0, 0],
       [5, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       ...,
       [5, 1, 1, ..., 0, 0, 0],
       [5, 0, 1, ..., 0, 0, 0],
       [5, 1, 1, ..., 1, 1, 1]])

In [74]:
B[1].size


Out[74]:
24

In [75]:
B[:,1].size


Out[75]:
419

In [76]:
templow = np.zeros((7,10), dtype=int)
num_templow = np.zeros(7, dtype=int)
for i in range(419):
    num_templow[B[i][0]] += 1
    for j in range(1,24):
        if B[i][j] == 1:
            templow[B[i][0]][change(j-1)] += 1

In [77]:
templow


Out[77]:
array([[ 0,  0,  0,  0,  0,  0,  1,  0,  0,  0],
       [20, 34, 50,  4,  8,  4, 33,  9, 11,  9],
       [19, 28, 44,  5,  5,  5, 39, 13, 15, 11],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [18, 41, 56,  5,  9,  4, 48, 14, 19,  6],
       [35, 48, 79, 12,  6,  7, 59, 17, 26, 16],
       [42, 58, 97,  9,  9, 12, 69, 17, 30, 20]])

In [78]:
num_templow


Out[78]:
array([  1,  71,  61,   0,  72,  98, 116])

In [79]:
plt.figure(figsize=(25, 15))
plt.tick_params(labelsize=20)   #坐标轴字体大小
#plt.xlim(0,15)
plt.xticks(np.arange(0, 10, 1), ('walk', 'subway', 'bus', 'train', 'ferry', 'paratransit', 'car',
        'motocycle', 'taxi', 'bike'))   #横坐标刻度
plt.yticks(np.arange(0, 300, 30))   #纵坐标刻度
plt.xlabel('style', size = 20)   #横坐标标签
plt.ylabel('people', size = 20)   #纵坐标标签
plt.title('templow', size = 20)   #标题
plt.plot(range(0, 10), templow[0], 'black', label = '45-49℉ %d'%num_templow[0])
plt.plot(range(0, 10),templow[1], 'green', label = '50~54℉ %d'%num_templow[1])
plt.plot(range(0, 10),templow[2], 'red', label = '55~59℉ %d'%num_templow[2])
plt.plot(range(0, 10),templow[4], 'yellow', label = '60~64℉ %d'%num_templow[4])
plt.plot(range(0, 10),templow[5], 'blue', label = '65~69℉ %d'%num_templow[5])
plt.plot(range(0, 10),templow[6], 'pink', label = '70℉ and above %d'%num_templow[6])
plt.legend(loc = 'upper right', fontsize = 20)   #图例位置和字体大小
plt.show()



In [ ]: