In [90]:
import pandas as pd
import numpy as np
data = {'BoolCol': [1, 2, 3, 3, 4],
        'attr': [22, 33, 22, 44, 66],
        'BoolC': [1, 2, 3, 3, 4],
        'att': [22, 33, 22, 44, 66],
        'Bool': [1, 2, 3, 3, 4]
        }
index= pd.Index(data=[1,2,3,4,5],name="index_new")
df=pd.DataFrame(data, index=index)

#df.index, df.index.name

In [91]:
import random
#随机生成3000个test号
#random.sample(range(0,10),6)从0-9这十位数中随机选出6位
test_list=[]
for i in range(3000):
    test_list.append("123456"+"".join(str(s) for s in random.sample(range(0,10),6)))
#生成3000个1-200的随机浮点数,且保留两位小数
test_list2 = [round(random.uniform(1,200),2) for i in range(3000)]
data = {
    'date':pd.date_range("2000",freq= 'D',periods=3000),
    'aa':test_list,
    'test2':test_list2,
    'label':[random.randint(0,1) for _ in range(3000)]
}
df_test = pd.DataFrame(data)

#date_1= pd.date_range("2000",freq= 'D',periods=3000).year

In [92]:
df_test.describe()


Out[92]:
test2 label
count 3000.000000 3000.000000
mean 100.821863 0.512000
std 56.557216 0.499939
min 1.140000 0.000000
25% 52.022500 0.000000
50% 101.300000 1.000000
75% 150.087500 1.000000
max 199.900000 1.000000

In [93]:
date=df_test.pop('date')
df_test.insert(0,'date',date)
df_test.head(5)


Out[93]:
date aa test2 label
0 2000-01-01 123456253640 51.40 1
1 2000-01-02 123456652438 65.38 1
2 2000-01-03 123456549183 35.39 0
3 2000-01-04 123456792183 165.18 1
4 2000-01-05 123456785963 36.03 0

In [94]:
df_test[:2]


Out[94]:
date aa test2 label
0 2000-01-01 123456253640 51.40 1
1 2000-01-02 123456652438 65.38 1

In [95]:
import numpy as np 
import pandas as pd 
from pandas import Series, DataFrame 
np.random.seed(666) 
df = pd.DataFrame(np.random.rand(25).reshape(5, 5), 
                  index=['A', 'B', 'D', 'E', 'F'], 
                  columns=['c1', 'c2', 'c3', 'c4', 'c5']) 
#print(df.shape) # (5, 5) # 返回前五行 
#df.head() # 返回后五行 
#df.tail() # 访问 某几个 列 
#print(df[['c1', 'c4']]) 
'''
         c1        c4
A  0.700437  0.727858
B  0.012703  0.099929
D  0.200248  0.700845
E  0.774479  0.110954
F  0.023236  0.197503
''' # 赋值于一个新的 dataframe 
sub_df = df[['c1', 'c3', 'c5']] 
'''
         c1        c3        c5
A  0.700437  0.676514  0.951458
B  0.012703  0.048813  0.508066
D  0.200248  0.192892  0.293228
E  0.774479  0.112858  0.247668
F  0.023236  0.340035  0.909180
''' # 查看前五行 
#print(sub_df.head(5)) 
'''
         c1        c3        c5
A  0.700437  0.676514  0.951458
B  0.012703  0.048813  0.508066
D  0.200248  0.192892  0.293228
E  0.774479  0.112858  0.247668
F  0.023236  0.340035  0.909180
''' # 查看中间 几行 的数据 使用 方法 iloc 
print(sub_df.iloc[1:3, :]) # iloc : index location  用索引定位, 前包含后不包含
'''
         c1        c3        c5
B  0.012703  0.048813  0.508066
D  0.200248  0.192892  0.293228
''' # 过滤 列 
print(sub_df.iloc[1:2, 0:2]) # 和python的用法一样,但是 该方法 是 基于 index 信息的 
'''
         c1        c3
B  0.012703  0.048813
''' # loc 方法, 通过label 名称来过滤 
print(sub_df.loc['A':'B', 'c1':'c5']) # 基于 label 选择 , 包含前后


         c1        c3        c5
B  0.012703  0.048813  0.508066
D  0.200248  0.192892  0.293228
         c1        c3
B  0.012703  0.048813
         c1        c3        c5
A  0.700437  0.676514  0.951458
B  0.012703  0.048813  0.508066

In [96]:
df.iloc[[1,2,3]]


Out[96]:
c1 c2 c3 c4 c5
B 0.012703 0.413588 0.048813 0.099929 0.508066
D 0.200248 0.744154 0.192892 0.700845 0.293228
E 0.774479 0.005109 0.112858 0.110954 0.247668

In [97]:
df.loc[["A","B","D"]]


Out[97]:
c1 c2 c3 c4 c5
A 0.700437 0.844187 0.676514 0.727858 0.951458
B 0.012703 0.413588 0.048813 0.099929 0.508066
D 0.200248 0.744154 0.192892 0.700845 0.293228

In [98]:
print(df.loc["A","c1"])

print(df.at["A","c1"])

print(df.iat[1,1])


0.7004371218578347
0.7004371218578347
0.41358769878652346

In [ ]: