In [1]:

    
import pandas as pd
import numpy as np
from datetime import datetime
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import re
import reprlib
from collections import abc

np.random.seed(1445)



In [2]:

    
# ========================================diff 函数
df = pd.DataFrame({'AAA' : [1,2,3,4,5,6,7,9,9,11,11,11], 'BBB' : [1,0,1,1,0,1,0,1,1,1,1,1], 'CCC' : [3,3,3,3,3,3,3,4,4,4,4,4]})
# df.query('BBB ==0 and AAA in (2,5)')
ss = df.groupby(['CCC'])#.AAA.apply(set).apply(len)
x = df.apply(max)
for i in x:
    print(i)
# isinstance(ss, abc.Iterable)
# ss.sort_values(ascending=False)
# ss_a = ss['AAA']

# print(ss_a.unique())
# source_cols = df.columns
# str_col = [str(x) + '_col' for x in source_cols]
# categories = {1 : 'Alpha', 2 : 'Beta', 3 : 'Charlie' }
# df[str_col] = df[source_cols].applymap(categories.get)
# df.groupby(['CCC'])['AAA'].mean()
# df.sort_values('CCC')#.reset_index(drop=True)#.groupby(['CCC'])#.diff()
# data = pd.DataFrame(np.random.rand(20,4), columns = ['A','B','C','D'])
# data.query('A > B')

取出有变化的行操作



In [3]:

    
type(df.groupby(['CCC']).BBB)









    Out[3]:





pandas.core.groupby.SeriesGroupBy



In [7]:

    
df_ll = df.loc[(df.sort_values('AAA').groupby(['CCC']).BBB.diff() !=0),:]
df_ll.reset_index(drop=True)#,df.sort_values('AAA').groupby(['CCC']).BBB.diff() !=0
df



In [58]:

    
df.loc[df.sort_values('AAA').groupby(['CCC']).BBB.diff() !=0, :]



In [2]:

    
mean = df.groupby(['CCC', 'BBB'])['AAA'].apply(lambda x: '%.2f' % x).apply(set).apply(list)
mean
# df.loc[:, ['AAA', 'BBB']]









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-c5d1f872956a> in <module>()
----> 1 mean = df.groupby(['CCC', 'BBB'])['AAA'].apply(lambda x: '%.2f' % x).apply(set).apply(list)
      2 mean
      3 # df.loc[:, ['AAA', 'BBB']]

NameError: name 'df' is not defined



In [104]:

    
np.random.seed(1445)
df_np = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],index=pd.date_range('1/1/2000', periods=10));df_np



In [7]:

    
df_np.iloc[0:6].agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}).apply(lambda x: x)



In [38]:

    
df_w = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
# df_w.A = df_w.A.mask(df_w.A == 0, df.B)
df_w



In [56]:

    
a = np.arange(10).reshape(-1,2)
b = np.arange(10, 20)
a,b









    Out[56]:





(array([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]]), array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]))



In [8]:

    
df_w.loc[df_w.A>0, 'B'] = 1;df_w



In [159]:

    
df1 = pd.DataFrame(np.random.randn(10,1),columns=['A'],index = np.arange(0,10,1))
df2 = pd.DataFrame(np.random.randn(10,1),columns=['B'],index = np.arange(0,10,1))
df_concat = pd.concat([df1, df2], axis=1);df_concat
df_concat.plot.bar()
plt.show()



In [299]:

    
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime

x = [datetime.datetime(2011, 1, 4, 0, 0),
     datetime.datetime(2011, 1, 5, 0, 0),
     datetime.datetime(2011, 1, 6, 0, 0)]
x = date2num(x)
y = [4, 9, 2]
z=[1,2,3]
k=[11,12,13]

ax = plt.subplot(111)
ax.bar(x-0.2, y,width=0.2,align='center')
ax.bar(x, z,width=0.2,align='center')
ax.bar(x+0.2, k,width=0.2,align='center')
ax.xaxis_date()
plt.show()

ax.axis()









    












    Out[299]:





(734140.57000000007, 734143.42999999993, 0.0, 13.65)



In [51]:

    
df4 = pd.DataFrame({'E': ['E2', 'E3', 'B6', 'B7'],
                    'F': ['F2', 'F3', 'D6', 'D7'],
                    'G': ['G2', 'G3', 'F6', 'F7']},
                   index=[0, 1, 2, 3])
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                   index=[0, 1, 2, 3])
result = pd.concat([df1, df4], axis=1, join_axes=[df1.index])
# result.rename(columns={"A": "a", "B": "b"})
result.loc[:,['F', 'A']] = result[['A', 'F']].values
result



In [112]:

    
# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'cs', t, t**3, 'g^')
plt.show()



In [116]:

    
x = np.linspace(0, 2, 100)

plt.plot(x, x, label='linear')
plt.plot(x, x**2, label='quadratic')
plt.plot(x, x**3, label='cubic')

plt.xlabel('x label')
plt.ylabel('y label')

plt.title("Simple Plot")

plt.legend()

plt.show()



In [157]:

    
a = pd.DataFrame(np.random.rand(4,5), columns = list('abcde'))
a_a = a.values
# plt.plot(a_a, label='a')
plt.plot(a_a, marker='o')
# plt.legend()
plt.show()
# a_asndarray



In [4]:

    
pd.Series(['man_q3', 'man_q2', 'man_q1']).str.replace(r'^man_', '')
pd.Series({'a':1 , 'b': 2})









    Out[4]:





a    1
b    2
dtype: int64



In [16]:

    
df_test = pd.DataFrame({'E': ['E2', 'E3', 'B6', 'B7'],
                    'F': ['F2', 'F3', 'D6', 'D7'],
                    'G': ['G2', 'G3', 'F6', 'F7']},
                   index=[0, 1, 2, 3])
df_test.applymap(lambda x, y : x+y , df_test.E, df_test.F)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-16-3c9034aee6c7> in <module>()
      3                     'G': ['G2', 'G3', 'F6', 'F7']},
      4                    index=[0, 1, 2, 3])
----> 5 df_test.applymap(lambda x, y : x+y , df_test.E, df_test.F)

TypeError: applymap() takes 2 positional arguments but 4 were given



In [2]:

    
df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
m = df % 3 == 0
df



In [7]:

    
df2 = df.mask(m, -df)
df2



In [8]:

    
df.append(df2)

	A	B	C
2000-01-01	-1.060171	-0.036052	-1.492735
2000-01-02	0.433475	-1.367344	0.465158
2000-01-03	-1.690195	-0.838451	0.323225
2000-01-04	-0.397144	3.080562	2.463100
2000-01-05	0.981047	1.074076	0.231936
2000-01-06	1.100334	1.171540	0.245949
2000-01-07	1.693705	-0.379755	-1.581115
2000-01-08	1.836124	0.700274	-1.215588
2000-01-09	1.123545	-1.470417	-2.324812
2000-01-10	2.456664	1.744990	1.139921

	A	B
max	NaN	3.080562
min	-1.690195	-1.367344
sum	-0.632654	NaN

	A	B	C	D	E	F	G
0	F2	B0	C0	D0	E2	A0	G2
1	F3	B1	C1	D1	E3	A1	G3
2	D6	B2	C2	D2	B6	A2	F6
3	D7	B3	C3	D3	B7	A3	F7

	A	B	C	D	E	F	G
0	F2	B0	C0	D0	E2	A0	G2
1	F3	B1	C1	D1	E3	A1	G3
2	D6	B2	C2	D2	B6	A2	F6
3	D7	B3	C3	D3	B7	A3	F7

	A	B	C	D	E	F	G
0	F2	B0	C0	D0	E2	A0	G2
1	F3	B1	C1	D1	E3	A1	G3
2	D6	B2	C2	D2	B6	A2	F6
3	D7	B3	C3	D3	B7	A3	F7