In [ ]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
import glob
In [ ]:
stock_prices = glob.glob('../labs/stock_prices/*.csv.gz')
stock_prices
In [ ]:
stock_data = DataFrame()
for file in stock_prices :
current_data = pd.read_csv(file,parse_dates=2,compression='gzip')
stock_data = stock_data.append(current_data)
stock_data
In [ ]:
stock_data['stock_price_high']
In [ ]:
stock_data[['stock_symbol','date','stock_price_high']]
In [ ]:
stock_data[stock_data['stock_volume'] >= 10000000]
In [ ]:
stock_data[stock_data['stock_volume'] >= 10000000].tail(20)
In [ ]:
stock_data.groupby('stock_symbol').size()
In [ ]:
stock_data.groupby('stock_symbol')['stock_price_high'].max()
In [ ]:
stock_data.groupby('stock_symbol')['stock_price_low'].min()
In [ ]:
stock_data.groupby('stock_symbol')['stock_volume'].mean()
In [ ]:
stock_data.groupby('stock_symbol').agg({'stock_price_high':[np.max, np.min, np.mean]})
In [ ]:
stock_data[(stock_data['stock_symbol'] == 'AYI')].stock_price_close.max()
In [ ]:
stock_data['date']=pd.to_datetime(stock_data['date'])
stock_data[(stock_data['stock_symbol'] == 'AYI')].plot(x="date", y="stock_price_close")
In [ ]:
stock_data['date']=pd.to_datetime(stock_data['date'])
stock_data[(stock_data['stock_symbol'] == 'ABA')].plot(x='date',y='stock_price_high')
In [ ]:
stock_data2 = DataFrame()
for file in stock_prices :
current_data = pd.read_csv(file,index_col=[1,2],compression='gzip')
if stock_data2.empty :
stock_data2 = current_data
else :
stock_data2 = stock_data2.append(current_data)
stock_data2
In [ ]:
stock_data2.stock_price_high.max()
In [ ]:
stock_data2.stock_price_high.max(level=0)
In [ ]:
stock_data2.stock_price_low.min(level=0)
In [ ]:
stock_data2.stock_volume.sum(level=0)
In [ ]:
stock_data2.stock_volume.sum()
In [ ]:
stock_data2.ix[['CLI']]
In [ ]:
stock_data2.ix[['CLI']].stock_price_high.max()
In [ ]:
stock_data2.ix['CLI'].stock_price_high.plot()
In [ ]: