In [15]:
import pandas as pd
import numpy as np
from numpy import *
from pandas import Series, DataFrame, Panel,

In [8]:
file_location = 'D:/GitHub/Spot-Price-Analysis/clean-data/Price.csv'
col_names = ['region','az','time','instance','category','spotprice','demandprice','compute','memory','hdd','ssd','storage']
prices_df = pd.read_csv(file_location, names=col_names, header=None)

In [12]:
prices_df.head()


Out[12]:
region az time instance category spotprice demandprice compute memory hdd ssd storage
0 us-east-1 us-east-1c 2014-08-11 17:23:20.000 r3.8xlarge High Memory 0.78 2.8 104 244 0 640 640
1 us-east-1 us-east-1c 2014-08-11 17:51:49.000 r3.8xlarge High Memory 0.78 2.8 104 244 0 640 640
2 us-east-1 us-east-1c 2014-08-11 18:20:38.000 r3.8xlarge High Memory 0.78 2.8 104 244 0 640 640
3 us-east-1 us-east-1c 2014-08-12 13:50:58.000 r3.8xlarge High Memory 0.78 2.8 104 244 0 640 640
4 us-east-1 us-east-1c 2014-08-27 18:47:22.000 r3.8xlarge High Memory 0.78 2.8 104 244 0 640 640

5 rows × 12 columns


In [18]:
# Do datatype conversions
prices_df.time = pd.to_datetime(prices_df.time)
prices_df_useast = prices_df[prices_df.region.isin('us-east-1')]
# http://pandas.pydata.org/pandas-docs/stable/10min.html#selection

# Convert to a time series
prices_s = Series(
# Time series analysis http://earthpy.org/pandas-basics.html http://earthpy.org/time_series_analysis_with_pandas_part_2.html http://stackoverflow.com/questions/4809577/correlation-of-two-variables-in-a-time-series-in-python http://stackoverflow.com/questions/25320773/time-series-correlation-and-lag-time

In [ ]: