notebook.community

Edit and run



In [ ]:

    
# ndarray对象
import numpy as np
a = np.array([1,2,3])
print(type(a))

print("ndim:" + str(a.ndim) +"\nsize:" + str(a.size) + "\nshape:" + str(a.shape))

b = np.array([[1.3, 2.4], [0.3, 4.1]])
b.dtype
print("ndim:" + str(b.ndim) +"\nsize:" + str(b.size) + "\nshape:" + str(b.shape))

e = np.array([[1,2,3],(4,5,6),[7,8,9]])
e



In [ ]:

    
import numpy as np
f = np.array([[1,2,3], [4,5,6]], dtype=complex)
f



In [ ]:

    
import numpy as np
np.zeros((3,3))
np.arange(0,10)

#help(np.arange)
np.arange(0, 10, 2)

np.arange(0,12).reshape(3,4)

np.linspace(0,10,5)

np.random.random(3)
np.random.random((3,3))



In [ ]:

    
# 
import numpy as np
import pandas as pd

data = pd.read_csv('monthly_csv_2.csv') # 读取csv文件
print(data)
print(type(data))
print("=============================================")
data = pd.read_csv('monthly_csv.csv', usecols=[0, 1, 2])
ret = data.sort_values(by='Value', ascending=False) # 根据value值降序
print(ret)
print("=============================================")
ret = data[(data['Value'] > 0.889)] # value值大于0.889
print(ret)
print("=============================================")
data_of_china = ret[ret['Country'] == 'China'] # 过滤Country为China的数据
print(data_of_china['Value'])
print("=============================================")
data_of_china = data[(data['Value'] > 0.889) & (data['Country'] == 'China')] # 过滤Country为China的数据
print(data_of_china)



In [ ]:

    
import numpy as np
import pandas as pd
import matplotlib as mpl
# mpl.use('TkAgg')
import matplotlib.pyplot as plt
%matplotlib inline

# 获取所有sum
data = pd.read_csv('res/monthly_csv.csv')

def get_value_from_data(data):
    """从dataframe中获取value列表"""
    list_v1 = []
    for value in data['Value']:
        list_v1.append(value)
        
    return list_v1

print("==================获取所有国家==================")
# 获取所有国家
ser_all_country = data.drop_duplicates(['Country']).reset_index(drop=True)['Country']
print("国家总数：" + str(len(ser_all_country)) + "个")
# print(ser_all_country['Country'])


print("==================获取Australia总和和平均值==================")
# 获取对应Australia国家的总和
print(data[(data['Country'] == ser_all_country[0])]['Value'].sum())
print(data[(data['Country'] == ser_all_country[0])]['Value'].mean())

print("==================获取所有国家的总和数据==================")
dict_sum = {}
for country in ser_all_country:
    dict_sum[country] = (data[(data['Country'] == country)]['Value'])
    
variables = pd.DataFrame(dict_sum)
# variables.plot()


print("==================台湾,中国,澳大利亚 对比走势图==================")

#  长度必须是一致的
ret = data[(data['Country'] == 'Taiwan')][['Date','Value']]
list_tw = get_value_from_data(ret)[:400]
ret = data[(data['Country'] == 'China')][['Date','Value']]
list_ch = get_value_from_data(ret)[:400]
ret = data[(data['Country'] == 'Australia')][['Date','Value']]
list_aus = get_value_from_data(ret)[:400]

variables = pd.DataFrame({'China': np.array(list_ch),
                         'Taiwan': np.array(list_tw),
                         'Australia': np.array(list_aus)})
variables.plot()



In [ ]:

    
# print("==================画Australia的走势图==================")
# ret = data[(data['Country'] == 'Australia')]
# print(ret)
# rest_a = pd.Series(ret['Value'], index=ret['Date'])
# rest_a.plot(kind='bar', x=list_country)



In [ ]:

    
plt.plot([1,2,3,4])



In [ ]:

    
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
months = mdates.MonthLocator()
days = mdates.DayLocator()
timeFmt = mdates.DateFormatter('%Y-%m')
events = [datetime.date(2015,1,23),
         datetime.date(2015,1,28),
         datetime.date(2015,2,3),
         datetime.date(2015,2,21),
         datetime.date(2015,3,15),
         datetime.date(2015,3,24),
         datetime.date(2015,4,8),
         datetime.date(2015,4,24)]
readings = [12,22,25,20,18,15,17,14]
fig, ax = plt.subplots()
plt.plot(events, readings)
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(timeFmt)
ax.xaxis.set_minor_locator(days)



In [ ]:

    
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib as mpl
# mpl.use('TkAgg')
import matplotlib.pyplot as plt
%matplotlib inline

def get_value_from_data(data, key):
    """从dataframe中获取value列表"""
    list_v = []
    for value in data[key]:
        list_v.append(value)
        
    return list_v


def get_age_from_birth(data):
    import datetime as dt
    now_year =dt.datetime.today().year  #当前的年份
    data['age']=now_year-data.Birth.dt.year
    return data

# 读取csv文件
data = pd.read_csv('res/worldcup_team_csv.csv')
# data = pd.read_table('res/worldcup_team_csv.csv', sep=',')


data['Birth'] = pd.to_datetime(data['Birth'])
data = get_age_from_birth(data)

frame_data_russia = data[data['Country'] == 'Russia'];
frame_data_egypt = data[data['Country'] == 'Egypt'];

list_age = get_value_from_data(data[data['Country'] == 'Russia'][['age']], 'age')
list_age_egypt = get_value_from_data(frame_data_egypt, 'age')

variables = pd.DataFrame({'Russia': np.array(list_age),
                         'Egypt': np.array(list_age_egypt)})
variables.plot(kind='line')



In [ ]:

    
# 读取csv文件
# 没有表头，使用header选项，将其值置为None,pandas会为其添加默认表头
data = pd.read_csv('res/worldcup_team_csv.csv', header=None) 
# 可以指定表头
data = pd.read_csv('res/worldcup_team_csv.csv', names=['white','red','blue','green','animal']) 
print(data)



In [ ]:

    
# 创建等级结构的DataFrame对象
# 把所有想转换为索引的列名称赋给index_col
pd.read_csv('res/myCSV_03.csv', index_col=['color','status'])



In [ ]:

    
# pd.read_table('res/ch05_05.txt', sep='\D*', names=['white','red','blue'])
pd.read_table('res/ch05_05.txt', sep='\D*', names=['white','red','blue'], skiprows=[3])



In [ ]:

    
pd.read_table('res/ch05_06.txt', sep=',', skiprows=[0,1,3,6])



In [ ]:

    
# 从txt文件读取部分数据
# 过滤第2行，从起始行往后读3行
pd.read_csv('res/ch05_02.csv',sep=',', skiprows=[2], nrows=3, header=None)



In [ ]:

    
out = pd.Series() # 每隔3行进行累加，并且把和插入到series对象中
i = 0
pieces = pd.read_csv('res/ch05_01.csv', chunksize=3)
for piece in pieces:
    print(piece['white'])
    out.at[i] = piece['white'].sum()
    i = i + 1

out



In [ ]:

    
my_frame = pd.DataFrame({
    'ball':[0,4,8,12],
    'pen':[1,5,9,13],
    'pencil':[2,6,10,14],
    'paper':[3,7,11,15]
})
# 写入文件的时候，取消index和header选项
my_frame.to_csv('res/ch05_07.csv', index=False, header=False)

# 同理对DataFrame对象
frame1 = pd.DataFrame(np.arange(16).reshape((4,4)),
                     index=['red', 'blue', 'yellow', 'white'],
                     columns=['ball', 'pen', 'pencil', 'paper'])
print(frame1)
frame2 = pd.DataFrame(np.arange(12).reshape((4,3)),
                     index=['blue', 'green', 'white', 'yellow'],
                     columns=['mug', 'pen', 'ball'])
print(frame2)
frame3 = frame1+frame2

frame3.to_csv('res/ch05_08.csv', index=False, header=False, na_rep='NaN')



In [94]:

    
# 天然气数据分析
import numpy as np
import pandas as pd

#  获取原始数据
data = pd.read_csv('res/natural-gas-daily.csv')
frame = pd.DataFrame(data[data['Price']>10.0])

from datetime import datetime
arr_date = frame['Date'].values
print(arr_date)
# print(datetime.strptime(arr_date[0], '%Y-%m-%d'))

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

months = mdates.MonthLocator()
days = mdates.DayLocator()
timeFmt = mdates.DateFormatter('%Y-%m')

prices = np.array(frame['Price'])
# dates = [datetime.strptime(date, '%Y-%m') for date in arr_date]
# print(dates)

plt.plot(prices)
plt.show()

# fig, ax = plt.subplots()
# plt.plot(prices)
# ax.xaxis.set_major_locator(months)
# ax.xaxis.set_major_formatter(timeFmt)
# ax.xaxis.set_minor_locator(days)









    



['2000-12-21' '2000-12-22' '2000-12-26' '2000-12-29' '2001-01-05'
 '2001-01-08' '2003-02-24' '2003-02-25' '2003-02-26' '2003-02-28'
 '2005-08-24' '2005-08-30' '2005-08-31' '2005-09-01' '2005-09-02'
 '2005-09-06' '2005-09-07' '2005-09-08' '2005-09-09' '2005-09-12'
 '2005-09-13' '2005-09-14' '2005-09-15' '2005-09-16' '2005-09-19'
 '2005-09-20' '2005-09-21' '2005-09-22' '2005-10-07' '2005-10-10'
 '2005-10-11' '2005-10-12' '2005-10-13' '2005-10-14' '2005-10-17'
 '2005-10-18' '2005-10-19' '2005-10-20' '2005-10-21' '2005-10-24'
 '2005-10-25' '2005-10-26' '2005-10-27' '2005-10-28' '2005-10-31'
 '2005-11-01' '2005-11-02' '2005-11-03' '2005-11-16' '2005-11-17'
 '2005-11-18' '2005-11-21' '2005-11-22' '2005-11-23' '2005-11-28'
 '2005-11-29' '2005-11-30' '2005-12-01' '2005-12-02' '2005-12-05'
 '2005-12-06' '2005-12-07' '2005-12-08' '2005-12-09' '2005-12-12'
 '2005-12-13' '2005-12-14' '2005-12-15' '2005-12-16' '2005-12-19'
 '2005-12-20' '2005-12-21' '2005-12-22' '2005-12-23' '2005-12-27'
 '2005-12-29' '2008-04-10' '2008-04-11' '2008-04-14' '2008-04-15'
 '2008-04-16' '2008-04-17' '2008-04-18' '2008-04-21' '2008-04-22'
 '2008-04-23' '2008-04-24' '2008-04-25' '2008-04-28' '2008-04-29'
 '2008-04-30' '2008-05-01' '2008-05-02' '2008-05-05' '2008-05-06'
 '2008-05-07' '2008-05-08' '2008-05-09' '2008-05-12' '2008-05-13'
 '2008-05-14' '2008-05-15' '2008-05-16' '2008-05-19' '2008-05-20'
 '2008-05-21' '2008-05-22' '2008-05-23' '2008-05-27' '2008-05-28'
 '2008-05-29' '2008-05-30' '2008-06-02' '2008-06-03' '2008-06-04'
 '2008-06-05' '2008-06-06' '2008-06-09' '2008-06-10' '2008-06-11'
 '2008-06-12' '2008-06-13' '2008-06-16' '2008-06-17' '2008-06-18'
 '2008-06-19' '2008-06-20' '2008-06-23' '2008-06-24' '2008-06-25'
 '2008-06-26' '2008-06-27' '2008-06-30' '2008-07-01' '2008-07-02'
 '2008-07-03' '2008-07-07' '2008-07-08' '2008-07-09' '2008-07-10'
 '2008-07-11' '2008-07-14' '2008-07-15' '2008-07-16' '2008-07-17'
 '2008-07-18' '2008-07-21' '2008-07-22']






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-94-7e4cfbeab379> in <module>()
     21 
     22 prices = np.array(frame['Price'])
---> 23 dates = [datetime.strptime(date, '%Y-%m') for date in arr_date]
     24 print(dates)
     25 

<ipython-input-94-7e4cfbeab379> in <listcomp>(.0)
     21 
     22 prices = np.array(frame['Price'])
---> 23 dates = [datetime.strptime(date, '%Y-%m') for date in arr_date]
     24 print(dates)
     25 

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/_strptime.py in _strptime_datetime(cls, data_string, format)
    563     """Return a class cls instance based on the input string and the
    564     format string."""
--> 565     tt, fraction = _strptime(data_string, format)
    566     tzname, gmtoff = tt[-2:]
    567     args = tt[:6] + (fraction,)

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/_strptime.py in _strptime(data_string, format)
    363     if len(data_string) != found.end():
    364         raise ValueError("unconverted data remains: %s" %
--> 365                           data_string[found.end():])
    366 
    367     iso_year = year = None

ValueError: unconverted data remains: -21



In [ ]: