In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import re
import reprlib
from collections import abc
np.random.seed(1445)
In [2]:
# ========================================diff 函数
df = pd.DataFrame({'AAA' : [1,2,3,4,5,6,7,9,9,11,11,11], 'BBB' : [1,0,1,1,0,1,0,1,1,1,1,1], 'CCC' : [3,3,3,3,3,3,3,4,4,4,4,4]})
# df.query('BBB ==0 and AAA in (2,5)')
ss = df.groupby(['CCC'])#.AAA.apply(set).apply(len)
x = df.apply(max)
for i in x:
print(i)
# isinstance(ss, abc.Iterable)
# ss.sort_values(ascending=False)
# ss_a = ss['AAA']
# print(ss_a.unique())
# source_cols = df.columns
# str_col = [str(x) + '_col' for x in source_cols]
# categories = {1 : 'Alpha', 2 : 'Beta', 3 : 'Charlie' }
# df[str_col] = df[source_cols].applymap(categories.get)
# df.groupby(['CCC'])['AAA'].mean()
# df.sort_values('CCC')#.reset_index(drop=True)#.groupby(['CCC'])#.diff()
# data = pd.DataFrame(np.random.rand(20,4), columns = ['A','B','C','D'])
# data.query('A > B')
In [3]:
type(df.groupby(['CCC']).BBB)
Out[3]:
In [7]:
df_ll = df.loc[(df.sort_values('AAA').groupby(['CCC']).BBB.diff() !=0),:]
df_ll.reset_index(drop=True)#,df.sort_values('AAA').groupby(['CCC']).BBB.diff() !=0
df
Out[7]:
In [58]:
df.loc[df.sort_values('AAA').groupby(['CCC']).BBB.diff() !=0, :]
Out[58]:
In [2]:
mean = df.groupby(['CCC', 'BBB'])['AAA'].apply(lambda x: '%.2f' % x).apply(set).apply(list)
mean
# df.loc[:, ['AAA', 'BBB']]
In [104]:
np.random.seed(1445)
df_np = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],index=pd.date_range('1/1/2000', periods=10));df_np
Out[104]:
In [7]:
df_np.iloc[0:6].agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}).apply(lambda x: x)
Out[7]:
In [38]:
df_w = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
# df_w.A = df_w.A.mask(df_w.A == 0, df.B)
df_w
Out[38]:
In [56]:
a = np.arange(10).reshape(-1,2)
b = np.arange(10, 20)
a,b
Out[56]:
In [8]:
df_w.loc[df_w.A>0, 'B'] = 1;df_w
Out[8]:
In [159]:
df1 = pd.DataFrame(np.random.randn(10,1),columns=['A'],index = np.arange(0,10,1))
df2 = pd.DataFrame(np.random.randn(10,1),columns=['B'],index = np.arange(0,10,1))
df_concat = pd.concat([df1, df2], axis=1);df_concat
df_concat.plot.bar()
plt.show()
In [299]:
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime
x = [datetime.datetime(2011, 1, 4, 0, 0),
datetime.datetime(2011, 1, 5, 0, 0),
datetime.datetime(2011, 1, 6, 0, 0)]
x = date2num(x)
y = [4, 9, 2]
z=[1,2,3]
k=[11,12,13]
ax = plt.subplot(111)
ax.bar(x-0.2, y,width=0.2,align='center')
ax.bar(x, z,width=0.2,align='center')
ax.bar(x+0.2, k,width=0.2,align='center')
ax.xaxis_date()
plt.show()
ax.axis()
Out[299]:
In [51]:
df4 = pd.DataFrame({'E': ['E2', 'E3', 'B6', 'B7'],
'F': ['F2', 'F3', 'D6', 'D7'],
'G': ['G2', 'G3', 'F6', 'F7']},
index=[0, 1, 2, 3])
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
result = pd.concat([df1, df4], axis=1, join_axes=[df1.index])
# result.rename(columns={"A": "a", "B": "b"})
result.loc[:,['F', 'A']] = result[['A', 'F']].values
result
Out[51]:
In [112]:
# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)
# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'cs', t, t**3, 'g^')
plt.show()
In [116]:
x = np.linspace(0, 2, 100)
plt.plot(x, x, label='linear')
plt.plot(x, x**2, label='quadratic')
plt.plot(x, x**3, label='cubic')
plt.xlabel('x label')
plt.ylabel('y label')
plt.title("Simple Plot")
plt.legend()
plt.show()
In [157]:
a = pd.DataFrame(np.random.rand(4,5), columns = list('abcde'))
a_a = a.values
# plt.plot(a_a, label='a')
plt.plot(a_a, marker='o')
# plt.legend()
plt.show()
# a_asndarray
In [4]:
pd.Series(['man_q3', 'man_q2', 'man_q1']).str.replace(r'^man_', '')
pd.Series({'a':1 , 'b': 2})
Out[4]:
In [16]:
df_test = pd.DataFrame({'E': ['E2', 'E3', 'B6', 'B7'],
'F': ['F2', 'F3', 'D6', 'D7'],
'G': ['G2', 'G3', 'F6', 'F7']},
index=[0, 1, 2, 3])
df_test.applymap(lambda x, y : x+y , df_test.E, df_test.F)
In [2]:
df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
m = df % 3 == 0
df
Out[2]:
In [7]:
df2 = df.mask(m, -df)
df2
Out[7]:
In [8]:
df.append(df2)
Out[8]: