In [90]:
import pandas as pd
import numpy as np
data = {'BoolCol': [1, 2, 3, 3, 4],
'attr': [22, 33, 22, 44, 66],
'BoolC': [1, 2, 3, 3, 4],
'att': [22, 33, 22, 44, 66],
'Bool': [1, 2, 3, 3, 4]
}
index= pd.Index(data=[1,2,3,4,5],name="index_new")
df=pd.DataFrame(data, index=index)
#df.index, df.index.name
In [91]:
import random
#随机生成3000个test号
#random.sample(range(0,10),6)从0-9这十位数中随机选出6位
test_list=[]
for i in range(3000):
test_list.append("123456"+"".join(str(s) for s in random.sample(range(0,10),6)))
#生成3000个1-200的随机浮点数,且保留两位小数
test_list2 = [round(random.uniform(1,200),2) for i in range(3000)]
data = {
'date':pd.date_range("2000",freq= 'D',periods=3000),
'aa':test_list,
'test2':test_list2,
'label':[random.randint(0,1) for _ in range(3000)]
}
df_test = pd.DataFrame(data)
#date_1= pd.date_range("2000",freq= 'D',periods=3000).year
In [92]:
df_test.describe()
Out[92]:
In [93]:
date=df_test.pop('date')
df_test.insert(0,'date',date)
df_test.head(5)
Out[93]:
In [94]:
df_test[:2]
Out[94]:
In [95]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
np.random.seed(666)
df = pd.DataFrame(np.random.rand(25).reshape(5, 5),
index=['A', 'B', 'D', 'E', 'F'],
columns=['c1', 'c2', 'c3', 'c4', 'c5'])
#print(df.shape) # (5, 5) # 返回前五行
#df.head() # 返回后五行
#df.tail() # 访问 某几个 列
#print(df[['c1', 'c4']])
'''
c1 c4
A 0.700437 0.727858
B 0.012703 0.099929
D 0.200248 0.700845
E 0.774479 0.110954
F 0.023236 0.197503
''' # 赋值于一个新的 dataframe
sub_df = df[['c1', 'c3', 'c5']]
'''
c1 c3 c5
A 0.700437 0.676514 0.951458
B 0.012703 0.048813 0.508066
D 0.200248 0.192892 0.293228
E 0.774479 0.112858 0.247668
F 0.023236 0.340035 0.909180
''' # 查看前五行
#print(sub_df.head(5))
'''
c1 c3 c5
A 0.700437 0.676514 0.951458
B 0.012703 0.048813 0.508066
D 0.200248 0.192892 0.293228
E 0.774479 0.112858 0.247668
F 0.023236 0.340035 0.909180
''' # 查看中间 几行 的数据 使用 方法 iloc
print(sub_df.iloc[1:3, :]) # iloc : index location 用索引定位, 前包含后不包含
'''
c1 c3 c5
B 0.012703 0.048813 0.508066
D 0.200248 0.192892 0.293228
''' # 过滤 列
print(sub_df.iloc[1:2, 0:2]) # 和python的用法一样,但是 该方法 是 基于 index 信息的
'''
c1 c3
B 0.012703 0.048813
''' # loc 方法, 通过label 名称来过滤
print(sub_df.loc['A':'B', 'c1':'c5']) # 基于 label 选择 , 包含前后
In [96]:
df.iloc[[1,2,3]]
Out[96]:
In [97]:
df.loc[["A","B","D"]]
Out[97]:
In [98]:
print(df.loc["A","c1"])
print(df.at["A","c1"])
print(df.iat[1,1])
In [ ]: