Spot resources Analytics

Here we perform some initial process and analysis on the dataset.

With static dataset, e.g. load the grabbed data.



In [197]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline
import time
import dateutil.parser



In [3]:

    
# The filtered file make sure to use the fi
filename = 'data/aws-recent-filtered.csv'
#filename = './data/subset.csv'



In [4]:

    
start = time.time()
# parse the data file and extra the results
df = pd.read_csv(filename)
print(time.time()-start)
df.head(3)









    



149.24496603012085






    Out[4]:






  
    
      
      TimeStamp
      AvailabilityZone
      InstanceType
      SpotPrice
    
  
  
    
      0
      2016-12-12 10:44:30
      ap-northeast-1a
      t1.micro
      0.0135
    
    
      1
      2016-12-12 10:44:30
      ap-northeast-1a
      t1.micro
      0.0035
    
    
      2
      2016-12-12 10:43:50
      ap-northeast-1a
      t1.micro
      0.0134



In [5]:

    
df = df.dropna()
df = df[df.AvailabilityZone != "AvailabilityZone"]
df['TimeStamp'] = pd.to_datetime(df.TimeStamp)
df.index = df.TimeStamp
df = df.drop('TimeStamp', axis=1)
df= df.sort_index()
df.head(3)









    Out[5]:






  
    
      
      AvailabilityZone
      InstanceType
      SpotPrice
    
    
      TimeStamp
      
      
      
    
  
  
    
      2016-10-06 10:44:39
      eu-west-1b
      p2.16xlarge
      184.96
    
    
      2016-10-06 10:44:39
      eu-west-1c
      p2.16xlarge
      184.96
    
    
      2016-10-06 10:44:39
      eu-west-1a
      p2.16xlarge
      184.96



In [6]:

    
print (len(df))
print (df['InstanceType'].unique())
print (df['AvailabilityZone'].unique())
print (df['SpotPrice'].unique())









    



94577818
['p2.16xlarge' 'c3.large' 'i2.4xlarge' 'i2.2xlarge' 'm2.4xlarge' 'm3.large'
 'm4.4xlarge' 'm4.xlarge' 't1.micro' 'g2.8xlarge' 'd2.2xlarge' 'c1.xlarge'
 'm1.small' 'i2.xlarge' 'm1.xlarge' 'c4.2xlarge' 'm1.large' 'c4.large'
 'c4.xlarge' 'm3.medium' 'm4.16xlarge' 'p2.8xlarge' 'm1.medium'
 'x1.16xlarge' 'x1.32xlarge' 'c3.4xlarge' 'r3.4xlarge' 'c1.medium'
 'c4.4xlarge' 'i2.8xlarge' 'hi1.4xlarge' 'd2.8xlarge' 'r3.2xlarge'
 'd2.4xlarge' 'm4.10xlarge' 'c3.8xlarge' 'r3.8xlarge' 'c3.2xlarge'
 'c3.xlarge' 'r3.large' 'c4.8xlarge' 'm3.xlarge' 'm4.large' 'd2.xlarge'
 'm2.2xlarge' 'm2.xlarge' 'r3.xlarge' 'm3.2xlarge' 'm4.2xlarge'
 'cr1.8xlarge' 'p2.xlarge' 'cc2.8xlarge' 'cg1.4xlarge' 'cc1.4xlarge'
 'g2.2xlarge' 'r4.large' 'r4.xlarge' 'r4.4xlarge' 'r4.16xlarge'
 'r4.8xlarge' 'r4.2xlarge']
['eu-west-1b' 'eu-west-1c' 'eu-west-1a' 'us-west-2c' 'ap-northeast-2c'
 'ap-northeast-2a' 'us-west-1b' 'us-west-1c' 'us-west-2b' 'us-west-2a'
 'ap-southeast-2c' 'eu-central-1b' 'ap-southeast-2b' 'ap-southeast-2a'
 'eu-central-1a' 'us-east-1a' 'us-east-1e' 'sa-east-1c' 'sa-east-1a'
 'ap-southeast-1a' 'ap-southeast-1b' 'us-east-1d' 'ap-northeast-1c'
 'ap-northeast-1a' 'us-east-1c']
[  1.84960000e+02   1.04100000e-01   1.13600000e+00 ...,   3.05060000e+00
   1.83800000e-02   1.18380000e-01]

Hypothesis #2

For each machine type there exists a region that is more favorable to use, as the market volatility is very low and the prices tend to stay cheaper than the other regions.

With in proving this hypothesis users will be able to find the best region they should be bidding in, as long as latency is not an issue for them.

Data Science tools & Techniques: We can use clustering and classification methods.



In [7]:

    
def corrGraph(title, df):
    corr_df = df.corr()
    mask = np.zeros_like(corr_df)
    mask[np.triu_indices_from(mask)] = True
    seaborn.heatmap(corr_df, cmap='RdYlGn_r', vmax=1.0, vmin=-1.0 , mask = mask, linewidths=2.5)
    plt.yticks(rotation=0) 
    plt.title(title)
    plt.xticks(rotation=90) 
    plt.show()



In [13]:

    
# Some info about the data
df = df.truncate(before='2016-10-13 00:00:00', after='2016-12-11 00:00:00')
print (df.index.min())
print (df.index.max())
print(df.index.max()- df.index.min()) 

df.head(3)









    



2016-10-13 00:00:00
2016-12-11 00:00:00
59 days 00:00:00






    Out[13]:






  
    
      
      AvailabilityZone
      InstanceType
      SpotPrice
    
    
      TimeStamp
      
      
      
    
  
  
    
      2016-10-13
      us-east-1e
      m3.xlarge
      0.1395
    
    
      2016-10-13
      ap-northeast-1a
      r3.8xlarge
      0.4891
    
    
      2016-10-13
      ap-northeast-1a
      r3.8xlarge
      0.5891



In [68]:

    
depa = df.groupby(['AvailabilityZone', 'InstanceType'])



In [75]:

    
#depa = awsResampler(df)
# Initialize dictionary of all combos of  empty dfs we want to graph and corr
zonedfs={}
typedfs={}
for item in df['InstanceType'].unique():
    typedfs.update({item: pd.DataFrame()})
for item in df['AvailabilityZone'].unique():
    zonedfs.update({item: pd.DataFrame()})

#Fill zonedfs with dataframes of all machines in that zone pricing
for name, group in depa:
    #We have to create the data frame by merging the rows and the first run we need our first row
    if zonedfs[name[0]].empty:
        #RESAMPLE DATA HOURLY getting the avg of the hour
        zonedfs[name[0]] = group
        zonedfs[name[0]] = zonedfs[name[0]].resample('H').mean()
        zonedfs[name[0]] = zonedfs[name[0]].fillna(method="ffill")
        #zonedfs[name[0]] = zonedfs[name[0]].drop('InstanceType', axis=1).drop(['AvailabilityZone'],axis=1)
        #Remove index so that you can merge
        zonedfs[name[0]] = zonedfs[name[0]].reset_index()
        #print(zonedfs[name[0]].head(40) )
        zonedfs[name[0]].rename(columns = {'SpotPrice':name[1]}, inplace = True)

    else:
        #RESAMPLE DATA HOURLY getting the avg of the hour
        group1 = group.resample('H').mean()
        group1 = group1.fillna(method="ffill")
        #print(zonedfs[name[0]].head(20) )
        #group1 = group.drop('InstanceType', axis=1).drop(['AvailabilityZone'],axis=1)
        group1.rename(columns = {'SpotPrice':name[1]}, inplace = True)    
        #Remove index so that you can merge
        group1 = group1.reset_index()
        #print(group1.head(20))
        zonedfs[name[0]] = zonedfs[name[0]].merge(group1,how='right')


#Fill typedfs with dataframes of all machines in that zone pricing
for name, group in depa:
    if typedfs[name[1]].empty:
        typedfs[name[1]] = group
        typedfs[name[1]] = typedfs[name[1]].resample('H').mean()
        typedfs[name[1]] = typedfs[name[1]].fillna(method="ffill")
        #typedfs[name[1]] = typedfs[name[1]].drop('InstanceType', axis=1).drop(['AvailabilityZone'],axis=1)
        typedfs[name[1]].rename(columns = {'SpotPrice':name[0]}, inplace = True)
        typedfs[name[1]] = typedfs[name[1]].reset_index()
    else:
        group1 = group.resample('H').mean()
        group1 = group1.fillna(method="ffill")
        #group1 = group.drop('InstanceType', axis=1).drop(['AvailabilityZone'],axis=1)
        group1.rename(columns = {'SpotPrice':name[0]}, inplace = True)
        group1 = group1.reset_index()
        typedfs[name[1]] = typedfs[name[1]].merge(group1,how='right')



In [76]:

    
# Reindex time by timestamp
for key in typedfs:
    typedfs[key].index = typedfs[key].TimeStamp
    typedfs[key]       = typedfs[key].drop(['TimeStamp'],axis=1)

for key in zonedfs:
    zonedfs[key].index = zonedfs[key].TimeStamp
    zonedfs[key]       = zonedfs[key].drop(['TimeStamp'],axis=1)



In [96]:

    
typedfs['r4.large'].head(3)









    Out[96]:






  
    
      
      ap-southeast-2b
      ap-southeast-2c
      eu-central-1a
      eu-central-1b
      eu-west-1a
      eu-west-1b
      eu-west-1c
      us-east-1a
      us-east-1c
      us-east-1d
      us-east-1e
      us-west-1b
      us-west-1c
      us-west-2a
      us-west-2b
      us-west-2c
    
    
      TimeStamp
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-12-05 18:00:00
      0.08
      0.08
      0.08
      0.08
      0.078
      0.078
      0.078
      0.063000
      0.063000
      0.063000
      0.063000
      0.078
      0.078
      0.077
      0.077
      0.077
    
    
      2016-12-05 19:00:00
      0.08
      0.08
      0.08
      0.08
      0.078
      0.078
      0.078
      0.077067
      0.077067
      0.081667
      0.077267
      0.078
      0.078
      0.077
      0.077
      0.077
    
    
      2016-12-05 20:00:00
      0.08
      0.08
      0.08
      0.08
      0.078
      0.078
      0.078
      0.077033
      0.077033
      0.097500
      0.077133
      0.078
      0.078
      0.077
      0.077
      0.077

Correlations of timeseries



In [82]:

    
#NORMALIZE THE DATA HERE


tN=typedfs.copy()
zN=zonedfs.copy()

for key in tN:
    #typedfs[key].index = typedfs[key].TimeStamp
    #typedfs[key]       = typedfs[key].drop(['TimeStamp'],axis=1)
    #Normalize data
        #typedfs[key] = typedfs[key].apply(lambda row: np.log(row).diff(), axis=0 )
    tN[key] = tN[key].diff(axis=0)
    corrGraph(key, tN[key])
    
for key in zonedfsN:
    #zonedfs[key].index = zonedfs[key].TimeStamp
    #zonedfs[key]       = zonedfs[key].drop(['TimeStamp'],axis=1)
    #Normalize data
        #zonedfs[key] = zonedfs[key].apply(lambda row: np.log(row).diff(), axis=0 )
    zN[key] = zN[key].diff(axis=0)
    corrGraph(key, zN[key])

AVGS, MINS, MAXs vs REAL Prices



In [87]:

    
#NORMALIZE THE DATA HERE

tavg = typedfs
dfAVG= []
dfA = pd.DataFrame()
#["InstanceType","AvailabilityZone","Mean", "Median", "Max", "Min", "STD"]

for key in tavg:
    for column in tavg[key]:
        dfAVG.append({'InstanceType':key, 'AvailabilityZone':column,'Mean':tavg[key][column].mean(),'Median':tavg[key][column].median(),
             'Max':tavg[key][column].max(), 'Min':tavg[key][column].min(), 'STD':tavg[key][column].std()})

dfA = pd.DataFrame(dfAVG)
print(dfA.head(3))
print(len(dfA))









    



  AvailabilityZone InstanceType    Max      Mean  Median    Min       STD
0  ap-southeast-2b     r4.large  0.108  0.072216  0.0661  0.066  0.013839
1  ap-southeast-2c     r4.large  0.108  0.072179  0.0661  0.066  0.013854
2    eu-central-1a     r4.large  0.108  0.068157  0.0662  0.066  0.007208
1156

Get the actual pricing data for comparison



In [89]:

    
dfPricing = pd.read_csv("data/pricing-data.csv")



In [95]:

    
dfPricing.drop('Unnamed: 0', axis=1,inplace=True)
dfPricing.head(3)









    Out[95]:






  
    
      
      AvailabilityZone
      InstanceType
      OnDemand
      yrTerm1Standard.allUpfront
      yrTerm1Standard.noUpfront
      yrTerm1Standard.partialUpfront
      yrTerm3Convertible.allUpfront
      yrTerm3Convertible.noUpfront
      yrTerm3Convertible.partialUpfront
      yrTerm3Standard.allUpfront
      yrTerm3Standard.partialUpfront
    
  
  
    
      1
      us-west-2
      t1.micro
      0.020
      0.012
      0.014
      0.012
      NaN
      NaN
      NaN
      0.008
      0.009
    
    
      2
      us-west-1
      t1.micro
      0.025
      0.015
      0.017
      0.015
      NaN
      NaN
      NaN
      0.011
      0.012
    
    
      3
      eu-west-1
      t1.micro
      0.020
      0.015
      0.016
      0.015
      NaN
      NaN
      NaN
      0.011
      0.012



In [107]:

    
az = ['eu-west-1b', 'eu-west-1c', 'eu-west-1a', 'us-west-2c', 'ap-northeast-2c',
 'ap-northeast-2a', 'us-west-1b', 'us-west-1c', 'us-west-2b', 'us-west-2a',
 'ap-southeast-2c' ,'eu-central-1b','ap-southeast-2b','ap-southeast-2a',
 'eu-central-1a','us-east-1a','us-east-1e' 'sa-east-1c' 'sa-east-1a',
 'ap-southeast-1a','ap-southeast-1b', 'us-east-1d', 'ap-northeast-1c',
 'ap-northeast-1a', 'us-east-1c']

dfPricingAdj = pd.DataFrame()



In [126]:

    
#modify dfPricing Availability Zone to contain a, b ...etc
count =0
for item in az:
    #print(item)
    azM = item[:-1]
    if count == 0:
        dfPricingAdj= dfPricing[dfPricing.AvailabilityZone == azM].copy()
        dfPricingAdj.replace(to_replace=azM, value=item, inplace=True) 
        count = count+1
    else:
        dfTemp = dfPricing[dfPricing.AvailabilityZone == azM].copy()
        dfTemp.replace(to_replace=azM, value=item, inplace=True) 
        #print(dfTemp.head(3))
        dfPricingAdj = dfPricingAdj.append(dfTemp)
        
dfPricingAdj = dfPricingAdj.reset_index().drop('index', axis=1)
#dfPricingAdj.head(1000)
#for item in dfPricing['AvailabilityZone']:
#    print(item)
#    break

#Merge the prices with the stats
#dfA = dfA(dfPricing,how='right')



In [128]:

    
#Merge the prices with the stats
dfA = dfA.merge(dfPricingAdj,how='right')

dfA.head(3)









    Out[128]:






  
    
      
      AvailabilityZone
      InstanceType
      Max
      Mean
      Median
      Min
      STD
      OnDemand
      yrTerm1Standard.allUpfront
      yrTerm1Standard.noUpfront
      yrTerm1Standard.partialUpfront
      yrTerm3Convertible.allUpfront
      yrTerm3Convertible.noUpfront
      yrTerm3Convertible.partialUpfront
      yrTerm3Standard.allUpfront
      yrTerm3Standard.partialUpfront
    
  
  
    
      0
      ap-southeast-2b
      r4.large
      0.108
      0.072216
      0.0661
      0.066
      0.013839
      0.16
      0.094
      0.112
      0.096
      0.084
      0.101
      0.086
      0.06
      0.064
    
    
      1
      ap-southeast-2c
      r4.large
      0.108
      0.072179
      0.0661
      0.066
      0.013854
      0.16
      0.094
      0.112
      0.096
      0.084
      0.101
      0.086
      0.06
      0.064
    
    
      2
      eu-central-1a
      r4.large
      0.108
      0.068157
      0.0662
      0.066
      0.007208
      0.16
      0.094
      0.112
      0.096
      0.085
      0.101
      0.086
      0.06
      0.064



In [130]:

    
dfA.to_csv("data/instance_price_stats.csv")

Analysis Using the MAX MEAN, and ondemand + cheapest prices



In [221]:

    
dfA = pd.read_csv("data/instance_price_stats.csv")
dfA.drop('Unnamed: 0', axis=1,inplace=True)



In [222]:

    
### DO analysis by subtracting the Mean & Max with the prices
#lets only look at the cheapest prices
dfA = dfA.drop('yrTerm1Standard.partialUpfront', axis=1).drop('yrTerm1Standard.noUpfront', axis=1).drop('yrTerm3Standard.partialUpfront',axis=1)
dfA = dfA.drop('yrTerm3Convertible.noUpfront',axis=1).drop('yrTerm3Convertible.partialUpfront',axis=1).drop('yrTerm3Convertible.allUpfront',axis=1)
#dfA.head(2)



In [223]:

    
dfMean = dfA.copy()
dfMax =  dfA.copy()

dfMean = dfMean.drop('Max', axis=1).drop('Min', axis=1).drop('STD', axis=1).drop('Median', axis=1)
dfMax = dfMax.drop('Mean', axis=1).drop('Min', axis=1).drop('STD', axis=1).drop('Median', axis=1)



In [224]:

    
dfMean.head(1)









    Out[224]:






  
    
      
      AvailabilityZone
      InstanceType
      Mean
      OnDemand
      yrTerm1Standard.allUpfront
      yrTerm3Standard.allUpfront
    
  
  
    
      0
      ap-southeast-2b
      r4.large
      0.072216
      0.16
      0.094
      0.06



In [225]:

    
dfMax.head(1)









    Out[225]:






  
    
      
      AvailabilityZone
      InstanceType
      Max
      OnDemand
      yrTerm1Standard.allUpfront
      yrTerm3Standard.allUpfront
    
  
  
    
      0
      ap-southeast-2b
      r4.large
      0.108
      0.16
      0.094
      0.06



In [226]:

    
def npCount(dfin):
    NPcounter={'odp':0, 'odn':0, '1p':0, '1n':0, '3p':0, '3n':0}
    for row in dfin['OnDemand']:
        if(row > 0):
            NPcounter['odp'] = NPcounter['odp'] +1
        else:
            NPcounter['odn'] = NPcounter['odn'] +1
    for row in dfin['yrTerm1Standard.allUpfront']:
        if(row > 0):
            NPcounter['1p'] = NPcounter['1p'] +1
        else:
            NPcounter['1n'] = NPcounter['1n'] +1
    for row in dfin['OnDemand']:
        if(row > 0):
            NPcounter['3p'] = NPcounter['3p'] +1
        else:
            NPcounter['3n'] = NPcounter['3n'] +1
    return NPcounter

How much money would it cost to only use spot instance?

Negative Values means it is cheaper to use that method, Positive Value indicates it is cheaper to use Spot Instance



In [227]:

    
dfMean['OnDemand'] = dfMean['OnDemand'].sub(dfMean['Mean'])
dfMean['yrTerm1Standard.allUpfront'] = dfMean['yrTerm1Standard.allUpfront'].sub(dfMean['Mean'])
dfMean['yrTerm3Standard.allUpfront'] = dfMean['yrTerm3Standard.allUpfront'].sub(dfMean['Mean'])
dfMean.head(1)









    Out[227]:






  
    
      
      AvailabilityZone
      InstanceType
      Mean
      OnDemand
      yrTerm1Standard.allUpfront
      yrTerm3Standard.allUpfront
    
  
  
    
      0
      ap-southeast-2b
      r4.large
      0.072216
      0.087784
      0.021784
      -0.012216



In [228]:

    
print("Median OnDemand "+ str(dfMean['OnDemand'].median()))
print("Median 1 Yr Upfront "+ str(dfMean['yrTerm1Standard.allUpfront'].median()))
print("Median 3 Yr Upfront "+ str(dfMean['yrTerm3Standard.allUpfront'].median()))
print("AVG OnDemand "+ str(dfMean['OnDemand'].mean()))
print("AVG 1 Yr Upfront "+ str(dfMean['yrTerm1Standard.allUpfront'].mean()))
print("AVG 3 Yr Upfront "+ str(dfMean['yrTerm3Standard.allUpfront'].mean()))
print("Min OnDemand "+ str(dfMean['OnDemand'].min()))
print("Min 1 Yr Upfront "+ str(dfMean['yrTerm1Standard.allUpfront'].min()))
print("Min 3 Yr Upfront "+ str(dfMean['yrTerm3Standard.allUpfront'].min()))
print("Max OnDemand "+ str(dfMean['OnDemand'].max()))
print("Max 1 Yr Upfront "+ str(dfMean['yrTerm1Standard.allUpfront'].max()))
print("Max 3 Yr Upfront "+ str(dfMean['yrTerm3Standard.allUpfront'].max()))
countRes = npCount(dfMean)
print("Positive OnDemand "+ str(countRes['odp']))
print("Negative OnDemand "+ str(countRes['odn']))
print("Positive 1 yr "+ str(countRes['1p']))
print("Negative 1 yr "+ str(countRes['1n']))
print("Positive 3 yr "+ str(countRes['3p']))
print("Negative 3 yr "+ str(countRes['3n']))









    



Median OnDemand 0.3969030627221103
Median 1 Yr Upfront 0.124961276125377
Median 3 Yr Upfront 0.007324014730859932
AVG OnDemand -0.4227119827733383
AVG 1 Yr Upfront -1.2808091320602544
AVG 3 Yr Upfront -1.7002691127738512
Min OnDemand -141.164719653
Min 1 Yr Upfront -146.775719653
Min 3 Yr Upfront -149.798719653
Max OnDemand 15.8683402304
Max 1 Yr Upfront 7.65034023045
Max 3 Yr Upfront 2.02622203225
Positive OnDemand 986
Negative OnDemand 311
Positive 1 yr 841
Negative 1 yr 456
Positive 3 yr 986
Negative 3 yr 311

Do Spot Instance ever go over the original price?

Negative values means its cheaper to use that option (money lost), Positve Values means its cheaper to use Spot Instances (Money gained)



In [229]:

    
dfMax['OnDemand'] = dfMax['OnDemand'].sub(dfMax['Max'])
dfMax['yrTerm1Standard.allUpfront'] = dfMax['yrTerm1Standard.allUpfront'].sub(dfMax['Max'])
dfMax['yrTerm3Standard.allUpfront'] = dfMax['yrTerm3Standard.allUpfront'].sub(dfMax['Max'])
dfMax.head(1)









    Out[229]:






  
    
      
      AvailabilityZone
      InstanceType
      Max
      OnDemand
      yrTerm1Standard.allUpfront
      yrTerm3Standard.allUpfront
    
  
  
    
      0
      ap-southeast-2b
      r4.large
      0.108
      0.052
      -0.014
      -0.048



In [230]:

    
print("MEDIAN OnDemand "+ str(dfMax['OnDemand'].median()))
print("MEDIAN 1 Yr Upfront "+ str(dfMax['yrTerm1Standard.allUpfront'].median()))
print("MEDIAN 3 Yr Upfront "+ str(dfMax['yrTerm3Standard.allUpfront'].median()))
print("AVG OnDemand "+ str(dfMax['OnDemand'].mean()))
print("AVG 1 Yr Upfront "+ str(dfMax['yrTerm1Standard.allUpfront'].mean()))
print("AVG 3 Yr Upfront "+ str(dfMax['yrTerm3Standard.allUpfront'].mean()))
print("MIN OnDemand "+ str(dfMax['OnDemand'].min()))
print("MIN 1 Yr Upfront "+ str(dfMax['yrTerm1Standard.allUpfront'].min()))
print("MIN 3 Yr Upfront "+ str(dfMax['yrTerm3Standard.allUpfront'].min()))
print("MAX OnDemand "+ str(dfMax['OnDemand'].max()))
print("MAX 1 Yr Upfront "+ str(dfMax['yrTerm1Standard.allUpfront'].max()))
print("MAX 3 Yr Upfront "+ str(dfMax['yrTerm3Standard.allUpfront'].max()))
countRes = npCount(dfMax)
print("Positive OnDemand "+ str(countRes['odp']))
print("Negative OnDemand "+ str(countRes['odn']))
print("Positive 1 yr "+ str(countRes['1p']))
print("Negative 1 yr "+ str(countRes['1n']))
print("Positive 3 yr "+ str(countRes['3p']))
print("Negative 3 yr "+ str(countRes['3n']))









    



MEDIAN OnDemand 0.0018422222222222243
MEDIAN 1 Yr Upfront -0.22749999999999998
MEDIAN 3 Yr Upfront -0.4948297872340438
AVG OnDemand -5.7695350078298615
AVG 1 Yr Upfront -6.625474195952593
AVG 3 Yr Upfront -7.044934176666191
MIN OnDemand -202.934
MIN 1 Yr Upfront -209.735
MIN 3 Yr Upfront -214.462
MAX OnDemand 10.9348
MAX 1 Yr Upfront 2.7168
MAX 3 Yr Upfront 1.05127068966
Positive OnDemand 531
Negative OnDemand 766
Positive 1 yr 221
Negative 1 yr 1076
Positive 3 yr 531
Negative 3 yr 766

DAILY TRENDS Mon, Tue, Wed, Thu, Fri, Sat, Sun



In [ ]:

	TimeStamp	AvailabilityZone	InstanceType	SpotPrice
0	2016-12-12 10:44:30	ap-northeast-1a	t1.micro	0.0135
1	2016-12-12 10:44:30	ap-northeast-1a	t1.micro	0.0035
2	2016-12-12 10:43:50	ap-northeast-1a	t1.micro	0.0134

	AvailabilityZone	InstanceType	SpotPrice
TimeStamp
2016-10-06 10:44:39	eu-west-1b	p2.16xlarge	184.96
2016-10-06 10:44:39	eu-west-1c	p2.16xlarge	184.96
2016-10-06 10:44:39	eu-west-1a	p2.16xlarge	184.96

	AvailabilityZone	InstanceType	SpotPrice
TimeStamp
2016-10-13	us-east-1e	m3.xlarge	0.1395
2016-10-13	ap-northeast-1a	r3.8xlarge	0.4891
2016-10-13	ap-northeast-1a	r3.8xlarge	0.5891

	ap-southeast-2b	ap-southeast-2c	eu-central-1a	eu-central-1b	eu-west-1a	eu-west-1b	eu-west-1c	us-east-1a	us-east-1c	us-east-1d	us-east-1e	us-west-1b	us-west-1c	us-west-2a	us-west-2b	us-west-2c
TimeStamp
2016-12-05 18:00:00	0.08	0.08	0.08	0.08	0.078	0.078	0.078	0.063000	0.063000	0.063000	0.063000	0.078	0.078	0.077	0.077	0.077
2016-12-05 19:00:00	0.08	0.08	0.08	0.08	0.078	0.078	0.078	0.077067	0.077067	0.081667	0.077267	0.078	0.078	0.077	0.077	0.077
2016-12-05 20:00:00	0.08	0.08	0.08	0.08	0.078	0.078	0.078	0.077033	0.077033	0.097500	0.077133	0.078	0.078	0.077	0.077	0.077

	AvailabilityZone	InstanceType	OnDemand	yrTerm1Standard.allUpfront	yrTerm1Standard.noUpfront	yrTerm1Standard.partialUpfront	yrTerm3Convertible.allUpfront	yrTerm3Convertible.noUpfront	yrTerm3Convertible.partialUpfront	yrTerm3Standard.allUpfront	yrTerm3Standard.partialUpfront
1	us-west-2	t1.micro	0.020	0.012	0.014	0.012	NaN	NaN	NaN	0.008	0.009
2	us-west-1	t1.micro	0.025	0.015	0.017	0.015	NaN	NaN	NaN	0.011	0.012
3	eu-west-1	t1.micro	0.020	0.015	0.016	0.015	NaN	NaN	NaN	0.011	0.012

	AvailabilityZone	InstanceType	Max	Mean	Median	Min	STD	OnDemand	yrTerm1Standard.allUpfront	yrTerm1Standard.noUpfront	yrTerm1Standard.partialUpfront	yrTerm3Convertible.allUpfront	yrTerm3Convertible.noUpfront	yrTerm3Convertible.partialUpfront	yrTerm3Standard.allUpfront	yrTerm3Standard.partialUpfront
0	ap-southeast-2b	r4.large	0.108	0.072216	0.0661	0.066	0.013839	0.16	0.094	0.112	0.096	0.084	0.101	0.086	0.06	0.064
1	ap-southeast-2c	r4.large	0.108	0.072179	0.0661	0.066	0.013854	0.16	0.094	0.112	0.096	0.084	0.101	0.086	0.06	0.064
2	eu-central-1a	r4.large	0.108	0.068157	0.0662	0.066	0.007208	0.16	0.094	0.112	0.096	0.085	0.101	0.086	0.06	0.064