In [ ]:
import pandas as pd
from datetime import datetime as dt
a = [dt(2017,3,1), dt(2017,3,2), dt(2017,3,8)]
b = [1, 2, 3]
df = pd.DataFrame()
df['A'] = a
df['B'] = b
df['A'] >= dt(2017, 3, 5)
df['A'][0].to_pydatetime()
In [ ]:
from datetime import datetime
import time
dt = datetime(2011, 10, 21, 15, 35, 26)
s = time.mktime(dt.timetuple())
print(s)
# and back
dt_new = datetime.fromtimestamp(s)
print(dt_new)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df1 = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df1)
df2 = pd.DataFrame(columns=['A1', 'A2', 'A3', 'A3'])
for i in range(0, len(a1)):
df2.loc[i] = [a1[i], a2[i], a3[i], a4[i]]
print(df2)
a5 = [41, 42, 43, 44]
df4 = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
df4['A5'] = a5
print(df4)
In [ ]:
import pandas as pd
import numpy as np
data = np.linspace(0, 17, 18).reshape(-1, 3)
print(data)
print(data[:, [0, 2]])
print(data[:, [True, False, True]])
df = pd.DataFrame(data, columns=list('ABC'))
print(df)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
print(df.index)
print(df.columns)
print(df.shape)
print(df.values)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
# print(df['A1'][0])
# print(df.loc[0, 'A1'])
# print(df.loc[0].A1)
# print(df.loc[0]['A1'])
# print(df['A1'])
# print(df.A1)
# print(df[1:3])
# print(df.loc[1:3])
# print(df.loc[1])
# print(df[1:3]['A1'])
# print(df.loc[1:3]['A1'])
# print(df.loc[1:3, 'A1'])
# print(df.loc[1:3, ['A1']])
# print(df.loc[1:3, ['A1', 'A2']])
# print(df.loc[:, ['A1', 'A2']])
# print(df.iloc[1])
# print(df.iloc[1, 0])
# print(df.iloc[1][0])
# print(df.iloc[1]['A1'])
# print(df.iloc[1:3, 0:2])
# print(df[df > 12])
# print(df['A2'] > 12)
# print(df[df['A2'] > 12])
# print(df[(df.A2 > 12) & (df.A3 < 24)])
# df1 = df[df['A1'] > 2].copy()
# print(df1)
# print(df1.loc[3])
# print(df1.iloc[1])
# print(df[['A1', 'A2']][df[['A1', 'A2']] > 12].count())
# print(df[['A1', 'A2']][(df[['A1', 'A2']] > 12) | (df[['A1', 'A2']] < 2)].count())
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
# df1 = df
# print(df1)
# df1.loc[1]['A2'] = 100
# print(df1)
# print(df)
# df2 = df[0:2]
# print(df2)
# df2.loc[1]['A2'] = 100
# print(df2)
# print(df)
# df3 = df.loc[0:2]
# print(df3)
# df3.loc[1]['A2'] = 100
# print(df3)
# print(df)
# df4 = df.loc[0:2, ['A2', 'A3']]
# print(df4)
# df4.loc[1]['A2'] = 100
# print(df4)
# print(df)
# df5 = df.loc[:, ['A2', 'A3']]
# print(df5)
# df5.loc[1]['A2'] = 100
# print(df5)
# print(df)
# df6 = df.loc[(df['A1'] > 1) & (df['A1'] < 4), ['A2', 'A3']]
# print(df6)
# df6.loc[1]['A2'] = 100
# print(df6)
# print(df)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
# for index, row in df.iterrows():
# print(index)
# print(row)
# for row in df.itertuples():
# print(row)
# df1 = df[df['A1'] > 2].copy()
# print(df1)
# for index, row in df1.iterrows():
# print(index)
# print(row)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
def func(row):
return row.name, row['A1']
df.apply(lambda row: func(row), axis=1)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
def valuation_formula(x, y):
return x * y * 0.5
df.apply(lambda row: valuation_formula(row['A1'], row['A2']), axis=1)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
df['A1'] = df['A1'].apply(lambda x: x * 10)
df
In [ ]:
import numpy as np
import pandas as pd
close = np.array([1, 2, 5, 5, 5, 4, 3, 4, 5, 5, 5, 5, 4, 3, 2, 4, 5, 5], dtype=np.float64)
df = pd.DataFrame({'close': close})
df['binary'] = df.apply(lambda row: 1 if row['close'] == 5 else 0, axis=1)
df['binary_diff'] = [0] + np.diff(df['binary']).tolist()
def get_concave_num(index):
return ((df.index <= index) & (df['binary_diff'] == -1)).sum()
def get_break_count(index):
return ((df.index <= index) & (df['binary'] == 0) & (df['concave_num'] > 0)).sum()
df['concave_num'] = df.apply(lambda row: get_concave_num(row._name), axis=1)
df['break_count'] = df.apply(lambda row: get_break_count(row._name), axis=1)
print(df)
In [ ]:
import pandas as pd
from sklearn import preprocessing
a1 = [1, 2, 3, 4]
a2 = [11, 12, 150, 17]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df1 = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df1)
min_max_scaler = preprocessing.MinMaxScaler()
a11 = min_max_scaler.fit_transform(df1['A1'])
print(type(df1['A1']))
print(type(a11))
df1['A1'] = a11
print(type(df1['A1']))
df1.loc[:, ['A1', 'A2']] = min_max_scaler.fit_transform(df1.loc[:, ['A1', 'A2']])
print(df1)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
df1 = df.loc[(df['A1'] > 1) & (df['A1'] < 4), ['A2', 'A3']]
print(df1)
df1['A5'] = [42, 43]
print(df1)
df['A6'] = [51, 52, 53, 54]
df['A7'] = [61, 62, 63, 64]
print(df)
df2 = df.loc[df1.index, ['A6', 'A7']]
print(df2)
df1 = pd.concat([df1, df2], axis=1)
print(df1)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
a11 = [5, 6, 7, 8]
a12 = [15, 16, 17, 18]
a13 = [25, 26, 27, 28]
a14 = [35, 36, 37, 38]
df1 = pd.DataFrame({'A1': a11, 'A2': a12, 'A3': a13, 'A4': a14})
print(df1)
df2 = pd.concat([df, df1], axis=0, ignore_index=True)
print(df2)
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
df.loc[4] = [5, 15, 25, 35]
print(df)
In [ ]:
import pandas as pd
from datetime import datetime as dt
a0 = [dt(2017, 6, 1, 13, 5), dt(2017, 6, 1, 13, 6), dt(2017, 6, 1, 13, 7), dt(2017, 6, 1, 13, 8)]
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df = pd.DataFrame({'A0': a0, 'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
df_json = df.to_json(orient='columns', date_format='iso')
df_back = pd.read_json(df_json, orient='columns', convert_dates=True)
print(df_json)
print(df_back)
print(type(df_back.loc[0]['A0']))
print(type(df_back.loc[0]['A1']))
# Change A0 to date
df = pd.DataFrame({'date': a0, 'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df)
df_json = df.to_json(orient='columns', date_format='epoch')
df_back = pd.read_json(df_json, orient='columns', convert_dates=True)
print(df_json)
print(df_back)
print(type(df_back.loc[0]['date']))
print(type(df_back.loc[0]['A1']))
In [ ]:
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
df = pd.DataFrame(columns=['y', 'close', 'sma5', 'sma30'])
for i in range(0, 100):
df.loc[i] = [random.choice([-1, 1]), random.randint(0, 10), random.uniform(0, 10), random.uniform(0, 10)]
x = df.loc[:, ['close', 'sma5', 'sma30']]
y = df.loc[:, ['y']]
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
clf = svm.SVC()
clf.fit(x_train, y_train.values.ravel())
test_prediction = clf.predict(x_test)
train_prediction = clf.predict(x_train)
test_prediction_result = []
for i in range(0, len(test_prediction)):
if y_test.iloc[i]['y'] == test_prediction[i]:
test_prediction_result.append(1)
else:
test_prediction_result.append(0)
train_prediction_result = []
for i in range(0, len(train_prediction)):
if y_train.iloc[i]['y'] == train_prediction[i]:
train_prediction_result.append(1)
else:
train_prediction_result.append(0)
print(sum(test_prediction_result) / len(test_prediction_result))
print(sum(train_prediction_result) / len(train_prediction_result))
In [ ]:
import pandas as pd
from datetime import datetime
dt_str = ['2017-03-01', '2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05', ]
dt = [datetime.strptime(i,'%Y-%m-%d') for i in dt_str]
print(pd.Timestamp(dt[0]))
dt_01 = pd.Index(pd.Timestamp(d) for d in dt)
print(dt_01[0].to_pydatetime())
print(type(dt_01[0].to_pydatetime()))
In [ ]:
import pandas as pd
from sklearn import preprocessing
a1 = [1, 2, 3, 4]
a2 = [11, 12, 13, 14]
a3 = [21, 22, 23, 24]
a4 = [31, 32, 33, 34]
df1 = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
print(df1['A1'] * df1['A2'])
print(df1['A1'].dot(df1['A2']))
print(df1['A1'].multiply(df1['A2']))
In [ ]:
import pandas as pd
import numpy as np
dfm = pd.DataFrame({'jim': [0, 0, 1, 1],
'joe': ['x', 'x', 'z', 'y'],
'jolie': np.random.rand(4)})
dfm = dfm.set_index(['jim', 'joe'])
dfm.index
In [ ]:
import pandas as pd
a1 = [1, 2, 3, 4, 5, 6, 7, 8]
a2 = [11, 12, 13, 14, 15, 16, 17, 18]
a3 = [21, 22, 23, 24, 25, 26, 27, 28]
a4 = [31, 32, 33, 34, 35, 36, 37, 38]
df1 = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
df1.rolling(window=4, min_periods=4).sum()
In [ ]:
import pandas as pd
def func(s):
return s.sum()
a1 = [1, 2, 3, 4, 5, 6, 7, 8]
a2 = [11, 12, 13, 14, 15, 16, 17, 18]
a3 = [21, 22, 23, 24, 25, 26, 27, 28]
a4 = [31, 32, 33, 34, 35, 36, 37, 38]
df1 = pd.DataFrame({'A1': a1, 'A2': a2, 'A3': a3, 'A4': a4})
df1['A1'].rolling(window=4, min_periods=4).apply(func=func)