Residential Primary Energy Consumption, by Year and Fuel Type 2009

Following The Data Analysis in Python with Pandas Video https://www.youtube.com/watch?v=w26x-z-BdWQ And other resources



In [1]:

    
from pandas import *
#turn off html
#pandas.set_option(notebook_repr_html=False)
#pandas.set_printoptions(...) is deprecated. Instead, use pandas.set_option.
#figure this out later

#still broken
#pandas.set_option(max_rows=5000)
#also broken.

import pandas
pandas.__version__

#NnOTpandas.set_printoptions(notebook_repr_html=False)  # turn off html, i.e., without table.
#NOTpandas.set_printoptions(max_rows=500)   # getting the summary of the view versus getting everything. Terminal takes time to buffer.

import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
import pandas
pandas.__version__









    Out[2]:





'0.14.1'



In [3]:

    
Res_Primary_E_Consumption = read_csv('ResPrimaryEnergyConsumptionBtu2.csv')
#Res_Primary_E_Consumption = read_csv('Res Primary Energy Consumption Btu.csv')



In [5]:

    
# Drop a column-- for previous file
#http://stackoverflow.com/questions/13411544/delete-column-from-pandas-dataframe
# DF = DF.drop('column_name', 1)
#Res_Primary_E_Consumption = Res_Primary_E_Consumption.drop('Unnamed: 11', 1)
# how to do this permanently? put it with the open file command?



In [6]:

    
#Res_Primary_E_Consumption.head(10)
#locking up. can't scroll.
# I hate technology.
# remove the html?



In [7]:

    
#Res_Primary_E_Consumption.tail(10)



In [8]:

    
# names
#why not showing the summary info as his first example did?
#names.head()
#names[names.year == 1880].tail()
#boolean array in numpy-- scalar operation.
#why not showing the summary info as his first example did?
#boys = names[names.sex == 'boy']
#girls = names[names.sex == 'girl']



In [4]:

    
#names.groupby(['sex','year']).size()
#names.groupby(['year','sex']).size().ix[2000]
Res_Primary_E_Consumption.groupby(['Year','NaturalGas']).size().ix[2000]
#a hierarchical index
# a series.









    Out[4]:





NaturalGas
5.07          1
dtype: int64



In [5]:

    
#names.groupby(['sex','year']).size()
#names.groupby(['year','sex']).size().ix[2000]
Res_Primary_E_Consumption.groupby(['Year','NaturalGas']).size().ix[1980]
#a hierarchical index
# a series.









    Out[5]:





NaturalGas
4.79          1
dtype: int64



In [6]:

    
#names.groupby(['sex','year']).size()
#names.groupby(['year','sex']).size().ix[2000]
Res_Primary_E_Consumption.groupby(['Year','NaturalGas_perc']).size().ix[1980]
#a hierarchical index
# a series. 

#this variable name works. and it seems to accept the highest percent 30%.
# but it seems to have a problem with the datatype later.









    Out[6]:





NaturalGas_perc
30%                1
dtype: int64



In [109]:

    
#boys[boys.year== 2000].prop.idxmax()
# gives the index of the maximum value: idmax
#boys.ix[boys[boys.year== 2000].prop.idxmax()]
# for the whole row
Res_Primary_E_Consumption.ix[Res_Primary_E_Consumption[Res_Primary_E_Consumption.Year== 2000].TOTAL.idxmax()]









    Out[109]:





Year                       2000
NaturalGas                 5.07
NaturalGas_perc             25%
Petroleum                  1.52
Petroleum_perc               7%
Coal                       0.01
Coal_perc                    0%
Renewable                  0.49
Renewable_perc               2%
Electricity_Sales          4.07
Electricity_Losses          9.2
Electricity_Total         13.27
Electricity_Total_perc      65%
TOTAL                     20.36
TOTAL_perc                 100%
Change_since2010              -
Name: 20, dtype: object



In [7]:

    
def get_max_record(group):
    return group.ix[group.TOTAL.idxmax()]
get_max_record(Res_Primary_E_Consumption)









    Out[7]:





Year                       2035
NaturalGas                 4.76
NaturalGas_perc             20%
Petroleum                  0.87
Petroleum_perc               4%
Coal                       0.01
Coal_perc                    0%
Renewable                  0.55
Renewable_perc               2%
Electricity_Sales          5.94
Electricity_Losses        11.56
Electricity_Total          17.5
Electricity_Total_perc      74%
TOTAL                     23.69
TOTAL_perc                 100%
Change_since2010           0.3%
Name: 55, dtype: object



In [13]:



In [14]:

    
#Rename a column
# http://stackoverflow.com/questions/11346283/renaming-columns-in-pandas 

# df.columns.values[0]='XX' 
#Res_Primary_E_Consumption.columns.values[1]='NaturalGas'



In [15]:

    
#Rename a column
# http://stackoverflow.com/questions/11346283/renaming-columns-in-pandas 

# df.columns.values[0]='XX' 
#Res_Primary_E_Consumption.columns.values[-3]='MYTOTAL'



In [16]:

    
#Rename a column
# http://stackoverflow.com/questions/11346283/renaming-columns-in-pandas 

# df.columns.values[0]='XX' 
#Res_Primary_E_Consumption.columns.values[2]='NaturalGas_perc'



In [17]:

    
#Rename a column
# http://stackoverflow.com/questions/11346283/renaming-columns-in-pandas 

# df.columns.values[0]='XX' 
#Res_Primary_E_Consumption.columns.values[15]='Growth Rate(2010-Year)'



In [17]:



In [1]:

    
# split by year-- proportion in each year and stack them.
# 
#def get_max_record(group):
 #   return group.ix[group.prop.idxmax()]
#result =boys.groupby('year').apply(get_max_record)     
# my full data is not showing due to html, which I cannot remove.
# diversity of data going up.

def get_max_record(group):
    return group.ix[group.TOTAL.idxmax()]
result =Res_Primary_E_Consumption.groupby('Year').apply(get_max_record)   

#why am I getting this error? The TOTAL column has been renamed to YESTOTAL.
#AttributeError: 'DataFrame' object has no attribute 'YESTOTAL'
#YESTOTAL works beow with result.

#no error when returned to TOTAL.









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-dcd6aec99d2e> in <module>()
      9 def get_max_record(group):
     10     return group.ix[group.TOTAL.idxmax()]
---> 11 result =Res_Primary_E_Consumption.groupby('Year').apply(get_max_record)
     12 
     13 #why am I getting this error? The TOTAL column has been renamed to YESTOTAL.

NameError: name 'Res_Primary_E_Consumption' is not defined



In [9]:

    
# split by year-- proportion in each year and stack them.
# 
#def get_max_record(group):
 #   return group.ix[group.prop.idxmax()]
#result =boys.groupby('year').apply(get_max_record)     
# my full data is not showing due to html, which I cannot remove.
# diversity of data going up.

def get_max_record(group):
    return group.ix[group.NaturalGas.idxmax()]
NG_result =Res_Primary_E_Consumption.groupby('Year').apply(get_max_record)



In [21]:

    
# split by year-- proportion in each year and stack them.
# 
#def get_max_record(group):
 #   return group.ix[group.prop.idxmax()]
#result =boys.groupby('year').apply(get_max_record)     
# my full data is not showing due to html, which I cannot remove.
# diversity of data going up.

# NaturalGas
##def get_max_record(group):
  #  return group.ix[group.NaturalGas_perc.idxmax()]
##    return group.ix[group.Renewable_perc.idxmax()]
##NG_result =Res_Primary_E_Consumption.groupby('Year').apply(get_max_record)     

#Renewable_perc
#    return group.ix[group.NaturalGas.idxmax()]
# ??? AttributeError: 'DataFrame' object has no attribute 'NaturalGas'
# Why Not? It has TOTAL

#    return group.ix[group.NaturalGas_perc.idxmax()]
#ValueError: invalid literal for float(): 30%
# does it have this. but is it yeilding an error on this?

Total change over time



In [18]:

    
result.TOTAL









    Out[18]:





Year
1980    15.72
1981    15.23
1982    15.48
1983    15.38
1984    15.90
1985    16.02
1986    15.94
1987    16.21
1988    17.12
1989    17.76
1990    16.91
1991    17.37
1992    17.31
1993    18.18
1994    18.07
1995    18.49
1996    19.47
1997    18.93
1998    18.92
1999    19.52
2000    20.36
2001    20.00
2002    20.74
2003    21.06
2004    21.05
2005    21.58
2006    20.64
2007    21.49
2008    21.53
2009    20.99
2010    22.07
2011    22.10
2012    21.77
2013    21.26
2014    21.15
2015    21.06
2016    21.13
2017    21.28
2018    21.41
2019    21.56
2020    21.66
2021    21.77
2022    21.89
2023    22.04
2024    22.19
2025    22.39
2026    22.51
2027    22.65
2028    22.78
2029    22.91
2030    23.06
2031    23.19
2032    23.30
2033    23.43
2034    23.56
2035    23.69
Name: TOTAL, Length: 56, dtype: float64



In [19]:

    
NG_result.NaturalGas









    Out[19]:





Year
1980    4.79
1981    4.57
1982    4.68
1983    4.45
1984    4.64
1985    4.51
1986    4.38
1987    4.40
1988    4.72
1989    4.88
1990    4.47
1991    4.64
1992    4.78
1993    5.05
1994    4.94
1995    4.94
1996    5.35
1997    5.09
1998    4.64
1999    4.82
2000    5.07
2001    4.87
2002    4.98
2003    5.20
2004    4.97
2005    4.94
2006    4.46
2007    4.83
2008    5.00
2009    4.88
2010    5.06
2011    5.05
2012    5.17
2013    4.99
2014    5.00
2015    4.99
2016    4.98
2017    4.98
2018    4.96
2019    4.95
2020    4.95
2021    4.93
2022    4.92
2023    4.90
2024    4.89
2025    4.88
2026    4.87
2027    4.86
2028    4.85
2029    4.85
2030    4.84
2031    4.83
2032    4.81
2033    4.79
2034    4.78
2035    4.76
Name: NaturalGas, Length: 56, dtype: float64



In [13]:

    
# Regression Test-- Should be R^2= 1.00 since the independent variables add to TOTAL.
#df name: Res_Primary_E_Consumption
#http://allendowney.blogspot.com/
# from http://allendowney.blogspot.com/
#NaturalGas Petroleum TOTAL
import statsmodels.formula.api as smf
model = smf.ols('TOTAL ~ NaturalGas + Petroleum + Renewable + Electricity_Total', data=Res_Primary_E_Consumption)
results = model.fit()
results.summary()









    Out[13]:





OLS Regression Results

  Dep. Variable:           TOTAL         R-squared:             1.000 


  Model:                    OLS          Adj. R-squared:        1.000 


  Method:              Least Squares     F-statistic:        1.510e+06


  Date:              Tue, 28 Oct 2014    Prob (F-statistic):  1.11e-128


  Time:                  13:21:09        Log-Likelihood:       196.34 


  No. Observations:           56         AIC:                  -382.7 


  Df Residuals:               51         BIC:                  -372.6 


  Df Model:                    4                                      




                       coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept              0.0365      0.041      0.897   0.374     -0.045     0.118


  NaturalGas             0.9912      0.007    148.044   0.000      0.978     1.005


  Petroleum              1.0144      0.010    106.295   0.000      0.995     1.034


  Renewable              1.0358      0.012     88.663   0.000      1.012     1.059


  Electricity_Total      0.9986      0.001    875.085   0.000      0.996     1.001




  Omnibus:         1.606    Durbin-Watson:         1.869


  Prob(Omnibus):   0.448    Jarque-Bera (JB):      1.333


  Skew:            0.199    Prob(JB):              0.513


  Kurtosis:        2.357    Cond. No.               607.



In [14]:

    
result.TOTAL.plot()
# linear increase since modeled after 2010.
# but hits later









    Out[14]:





<matplotlib.axes.AxesSubplot at 0x7fdccd9ee710>



In [15]:

    
result.NaturalGas.plot()
# linear increase since modeled after 2010.
# but hits later









    Out[15]:





<matplotlib.axes.AxesSubplot at 0x7fdcf0b46390>



In [16]:

    
result[:50]









    Out[16]:






  
    
      
      Year
      NaturalGas
      NaturalGas_perc
      Petroleum
      Petroleum_perc
      Coal
      Coal_perc
      Renewable
      Renewable_perc
      Electricity_Sales
      Electricity_Losses
      Electricity_Total
      Electricity_Total_perc
      TOTAL
      TOTAL_perc
      Change_since2010
    
    
      Year
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1980
       1980
       4.79
       30%
       1.72
       11%
       0.03
       0%
       0.85
       5%
       2.45
        5.89
        8.33
       53%
       15.72
       100%
           -
    
    
      1981
       1981
       4.57
       30%
       1.52
       10%
       0.03
       0%
       0.87
       6%
       2.46
        5.77
        8.24
       54%
       15.23
       100%
           -
    
    
      1982
       1982
       4.68
       30%
       1.42
        9%
       0.03
       0%
       0.97
       6%
       2.49
        5.89
        8.38
       54%
       15.48
       100%
           -
    
    
      1983
       1983
       4.45
       29%
       1.33
        9%
       0.03
       0%
       0.97
       6%
       2.56
        6.03
        8.59
       56%
       15.38
       100%
           -
    
    
      1984
       1984
       4.64
       29%
       1.51
       10%
       0.04
       0%
       0.98
       6%
       2.66
        6.07
        8.73
       55%
       15.90
       100%
           -
    
    
      1985
       1985
       4.51
       28%
       1.55
       10%
       0.04
       0%
       1.01
       6%
       2.71
        6.21
        8.92
       56%
       16.02
       100%
           -
    
    
      1986
       1986
       4.38
       28%
       1.52
       10%
       0.04
       0%
       0.92
       6%
       2.79
        6.27
        9.07
       57%
       15.94
       100%
           -
    
    
      1987
       1987
       4.40
       27%
       1.60
       10%
       0.04
       0%
       0.85
       5%
       2.90
        6.42
        9.32
       58%
       16.21
       100%
           -
    
    
      1988
       1988
       4.72
       28%
       1.66
       10%
       0.04
       0%
       0.91
       5%
       3.05
        6.75
        9.79
       57%
       17.12
       100%
           -
    
    
      1989
       1989
       4.88
       28%
       1.64
        9%
       0.03
       0%
       0.97
       5%
       3.09
        7.13
       10.22
       58%
       17.76
       100%
           -
    
    
      1990
       1990
       4.47
       26%
       1.37
        8%
       0.03
       0%
       0.64
       4%
       3.15
        7.24
       10.39
       61%
       16.91
       100%
           -
    
    
      1991
       1991
       4.64
       27%
       1.36
        8%
       0.03
       0%
       0.67
       4%
       3.26
        7.42
       10.68
       61%
       17.37
       100%
           -
    
    
      1992
       1992
       4.78
       28%
       1.39
        8%
       0.03
       0%
       0.70
       4%
       3.19
        7.22
       10.41
       60%
       17.31
       100%
           -
    
    
      1993
       1993
       5.05
       28%
       1.42
        8%
       0.03
       0%
       0.61
       3%
       3.39
        7.68
       11.08
       61%
       18.18
       100%
           -
    
    
      1994
       1994
       4.94
       27%
       1.39
        8%
       0.02
       0%
       0.58
       3%
       3.44
        7.70
       11.14
       62%
       18.07
       100%
           -
    
    
      1995
       1995
       4.94
       27%
       1.35
        7%
       0.02
       0%
       0.58
       3%
       3.56
        8.03
       11.59
       63%
       18.49
       100%
           -
    
    
      1996
       1996
       5.35
       27%
       1.46
        7%
       0.02
       0%
       0.60
       3%
       3.69
        8.35
       12.04
       62%
       19.47
       100%
           -
    
    
      1997
       1997
       5.09
       27%
       1.40
        7%
       0.02
       0%
       0.49
       3%
       3.67
        8.27
       11.94
       63%
       18.93
       100%
           -
    
    
      1998
       1998
       4.64
       25%
       1.28
        7%
       0.01
       0%
       0.44
       2%
       3.86
        8.69
       12.55
       66%
       18.92
       100%
           -
    
    
      1999
       1999
       4.82
       25%
       1.43
        7%
       0.01
       0%
       0.46
       2%
       3.91
        8.88
       12.79
       66%
       19.52
       100%
           -
    
    
      2000
       2000
       5.07
       25%
       1.52
        7%
       0.01
       0%
       0.49
       2%
       4.07
        9.20
       13.27
       65%
       20.36
       100%
           -
    
    
      2001
       2001
       4.87
       24%
       1.50
        7%
       0.01
       0%
       0.43
       2%
       4.10
        9.08
       13.18
       66%
       20.00
       100%
           -
    
    
      2002
       2002
       4.98
       24%
       1.43
        7%
       0.01
       0%
       0.44
       2%
       4.32
        9.57
       13.89
       67%
       20.74
       100%
           -
    
    
      2003
       2003
       5.20
       25%
       1.49
        7%
       0.01
       0%
       0.46
       2%
       4.35
        9.55
       13.90
       66%
       21.06
       100%
           -
    
    
      2004
       2004
       4.97
       24%
       1.49
        7%
       0.01
       0%
       0.47
       2%
       4.41
        9.70
       14.10
       67%
       21.05
       100%
           -
    
    
      2005
       2005
       4.94
       23%
       1.42
        7%
       0.01
       0%
       0.49
       2%
       4.64
       10.08
       14.72
       68%
       21.58
       100%
           -
    
    
      2006
       2006
       4.46
       22%
       1.20
        6%
       0.01
       0%
       0.45
       2%
       4.61
        9.91
       14.53
       70%
       20.64
       100%
           -
    
    
      2007
       2007
       4.83
       22%
       1.22
        6%
       0.01
       0%
       0.50
       2%
       4.75
       10.19
       14.94
       69%
       21.49
       100%
           -
    
    
      2008
       2008
       5.00
       23%
       1.21
        6%
       0.01
       0%
       0.53
       2%
       4.71
       10.08
       14.79
       69%
       21.53
       100%
           -
    
    
      2009
       2009
       4.88
       23%
       1.14
        5%
       0.01
       0%
       0.52
       2%
       4.66
        9.80
       14.45
       69%
       20.99
       100%
           -
    
    
      2010
       2010
       5.06
       23%
       1.22
        6%
       0.01
       0%
       0.45
       2%
       4.95
       10.39
       15.34
       69%
       22.07
       100%
           -
    
    
      2011
       2011
       5.05
       23%
       1.19
        5%
       0.01
       0%
       0.47
       2%
       4.94
       10.45
       15.39
       70%
       22.10
       100%
        0.1%
    
    
      2012
       2012
       5.17
       24%
       1.19
        5%
       0.01
       0%
       0.50
       2%
       4.84
       10.07
       14.91
       68%
       21.77
       100%
       -0.7%
    
    
      2013
       2013
       4.99
       23%
       1.13
        5%
       0.01
       0%
       0.48
       2%
       4.79
        9.86
       14.66
       69%
       21.26
       100%
       -1.2%
    
    
      2014
       2014
       5.00
       24%
       1.11
        5%
       0.01
       0%
       0.49
       2%
       4.79
        9.75
       14.54
       69%
       21.15
       100%
       -1.1%
    
    
      2015
       2015
       4.99
       24%
       1.08
        5%
       0.01
       0%
       0.51
       2%
       4.79
        9.68
       14.47
       69%
       21.06
       100%
       -0.9%
    
    
      2016
       2016
       4.98
       24%
       1.06
        5%
       0.01
       0%
       0.53
       2%
       4.82
        9.72
       14.54
       69%
       21.13
       100%
       -0.7%
    
    
      2017
       2017
       4.98
       23%
       1.05
        5%
       0.01
       0%
       0.53
       2%
       4.87
        9.85
       14.72
       69%
       21.28
       100%
       -0.5%
    
    
      2018
       2018
       4.96
       23%
       1.03
        5%
       0.01
       0%
       0.53
       2%
       4.93
        9.95
       14.88
       69%
       21.41
       100%
       -0.4%
    
    
      2019
       2019
       4.95
       23%
       1.02
        5%
       0.01
       0%
       0.53
       2%
       4.98
       10.06
       15.05
       70%
       21.56
       100%
       -0.3%
    
    
      2020
       2020
       4.95
       23%
       1.01
        5%
       0.01
       0%
       0.54
       2%
       5.02
       10.15
       15.17
       70%
       21.66
       100%
       -0.2%
    
    
      2021
       2021
       4.93
       23%
       1.00
        5%
       0.01
       0%
       0.54
       2%
       5.07
       10.22
       15.29
       70%
       21.77
       100%
       -0.1%
    
    
      2022
       2022
       4.92
       22%
       0.99
        5%
       0.01
       0%
       0.54
       2%
       5.13
       10.31
       15.44
       71%
       21.89
       100%
       -0.1%
    
    
      2023
       2023
       4.90
       22%
       0.98
        4%
       0.01
       0%
       0.54
       2%
       5.18
       10.44
       15.62
       71%
       22.04
       100%
        0.0%
    
    
      2024
       2024
       4.89
       22%
       0.97
        4%
       0.01
       0%
       0.54
       2%
       5.24
       10.55
       15.79
       71%
       22.19
       100%
        0.0%
    
    
      2025
       2025
       4.88
       22%
       0.95
        4%
       0.01
       0%
       0.54
       2%
       5.30
       10.70
       16.00
       71%
       22.39
       100%
        0.1%
    
    
      2026
       2026
       4.87
       22%
       0.95
        4%
       0.01
       0%
       0.54
       2%
       5.37
       10.78
       16.14
       72%
       22.51
       100%
        0.1%
    
    
      2027
       2027
       4.86
       21%
       0.94
        4%
       0.01
       0%
       0.54
       2%
       5.43
       10.88
       16.31
       72%
       22.65
       100%
        0.2%
    
    
      2028
       2028
       4.85
       21%
       0.93
        4%
       0.01
       0%
       0.54
       2%
       5.49
       10.96
       16.45
       72%
       22.78
       100%
        0.2%
    
    
      2029
       2029
       4.85
       21%
       0.92
        4%
       0.01
       0%
       0.55
       2%
       5.56
       11.03
       16.59
       72%
       22.91
       100%
        0.2%



In [17]:

    
# df.[variablename].cumsum()

Res_Primary_E_Consumption.TOTAL.cumsum()[:130]
#first 130









    Out[17]:





0       15.72
1       30.95
2       46.43
3       61.81
4       77.71
5       93.73
6      109.67
7      125.88
8      143.00
9      160.76
10     177.67
11     195.04
12     212.35
13     230.53
14     248.60
15     267.09
16     286.56
17     305.49
18     324.41
19     343.93
20     364.29
21     384.29
22     405.03
23     426.09
24     447.14
25     468.72
26     489.36
27     510.85
28     532.38
29     553.37
30     575.44
31     597.54
32     619.31
33     640.57
34     661.72
35     682.78
36     703.91
37     725.19
38     746.60
39     768.16
40     789.82
41     811.59
42     833.48
43     855.52
44     877.71
45     900.10
46     922.61
47     945.26
48     968.04
49     990.95
50    1014.01
51    1037.20
52    1060.50
53    1083.93
54    1107.49
55    1131.18
Name: TOTAL, Length: 56, dtype: float64



In [ ]:



In [20]:

    
TestData2 = read_csv('TestData2.csv')



In [21]:

    
# pandas join function is merge
#merge?
merge(Res_Primary_E_Consumption, TestData2, on= [ 'Year'])
#simple join on year 
#inner join is default









    Out[21]:






  
    
      
      Year
      NaturalGas
      NaturalGas_perc
      Petroleum
      Petroleum_perc
      Coal
      Coal_perc
      Renewable
      Renewable_perc
      Electricity_Sales
      Electricity_Losses
      Electricity_Total
      Electricity_Total_perc
      TOTAL
      TOTAL_perc
      Change_since2010
      Col1
      Col2
      Col3
    
  
  
    
      0 
       1980
       4.79
       30%
       1.72
       11%
       0.03
       0%
       0.85
       5%
       2.45
        5.89
        8.33
       53%
       15.72
       100%
           -
        1
       0.229849
        1
    
    
      1 
       1981
       4.57
       30%
       1.52
       10%
       0.03
       0%
       0.87
       6%
       2.46
        5.77
        8.24
       54%
       15.23
       100%
           -
        2
       0.003063
        2
    
    
      2 
       1982
       4.68
       30%
       1.42
        9%
       0.03
       0%
       0.97
       6%
       2.49
        5.89
        8.38
       54%
       15.48
       100%
           -
        3
       0.418250
        1
    
    
      3 
       1983
       4.45
       29%
       1.33
        9%
       0.03
       0%
       0.97
       6%
       2.56
        6.03
        8.59
       56%
       15.38
       100%
           -
        4
       0.689310
        2
    
    
      4 
       1984
       4.64
       29%
       1.51
       10%
       0.04
       0%
       0.98
       6%
       2.66
        6.07
        8.73
       55%
       15.90
       100%
           -
        5
       0.432489
        3
    
    
      5 
       1985
       4.51
       28%
       1.55
       10%
       0.04
       0%
       1.01
       6%
       2.71
        6.21
        8.92
       56%
       16.02
       100%
           -
        6
       0.987474
        4
    
    
      6 
       1986
       4.38
       28%
       1.52
       10%
       0.04
       0%
       0.92
       6%
       2.79
        6.27
        9.07
       57%
       15.94
       100%
           -
        7
       0.209367
        5
    
    
      7 
       1987
       4.40
       27%
       1.60
       10%
       0.04
       0%
       0.85
       5%
       2.90
        6.42
        9.32
       58%
       16.21
       100%
           -
        8
       0.582228
        6
    
    
      8 
       1988
       4.72
       28%
       1.66
       10%
       0.04
       0%
       0.91
       5%
       3.05
        6.75
        9.79
       57%
       17.12
       100%
           -
        9
       0.638592
        7
    
    
      9 
       1989
       4.88
       28%
       1.64
        9%
       0.03
       0%
       0.97
       5%
       3.09
        7.13
       10.22
       58%
       17.76
       100%
           -
       10
       0.286306
        8
    
    
      10
       1990
       4.47
       26%
       1.37
        8%
       0.03
       0%
       0.64
       4%
       3.15
        7.24
       10.39
       61%
       16.91
       100%
           -
       11
       0.359775
        9
    
    
      11
       1991
       4.64
       27%
       1.36
        8%
       0.03
       0%
       0.67
       4%
       3.26
        7.42
       10.68
       61%
       17.37
       100%
           -
       12
       0.130956
       10
    
    
      12
       1992
       4.78
       28%
       1.39
        8%
       0.03
       0%
       0.70
       4%
       3.19
        7.22
       10.41
       60%
       17.31
       100%
           -
       13
       0.323788
       11
    
    
      13
       1993
       5.05
       28%
       1.42
        8%
       0.03
       0%
       0.61
       3%
       3.39
        7.68
       11.08
       61%
       18.18
       100%
           -
       14
       0.053721
       12
    
    
      14
       1994
       4.94
       27%
       1.39
        8%
       0.02
       0%
       0.58
       3%
       3.44
        7.70
       11.14
       62%
       18.07
       100%
           -
       15
       0.663227
       13
    
    
      15
       1995
       4.94
       27%
       1.35
        7%
       0.02
       0%
       0.58
       3%
       3.56
        8.03
       11.59
       63%
       18.49
       100%
           -
       16
       0.633252
       14
    
    
      16
       1996
       5.35
       27%
       1.46
        7%
       0.02
       0%
       0.60
       3%
       3.69
        8.35
       12.04
       62%
       19.47
       100%
           -
       17
       0.891918
       15
    
    
      17
       1997
       5.09
       27%
       1.40
        7%
       0.02
       0%
       0.49
       3%
       3.67
        8.27
       11.94
       63%
       18.93
       100%
           -
       18
       0.495942
       16
    
    
      18
       1998
       4.64
       25%
       1.28
        7%
       0.01
       0%
       0.44
       2%
       3.86
        8.69
       12.55
       66%
       18.92
       100%
           -
       19
       0.811847
       17
    
    
      19
       1999
       4.82
       25%
       1.43
        7%
       0.01
       0%
       0.46
       2%
       3.91
        8.88
       12.79
       66%
       19.52
       100%
           -
       20
       0.609916
       18
    
    
      20
       2000
       5.07
       25%
       1.52
        7%
       0.01
       0%
       0.49
       2%
       4.07
        9.20
       13.27
       65%
       20.36
       100%
           -
       21
       0.643762
       19
    
    
      21
       2001
       4.87
       24%
       1.50
        7%
       0.01
       0%
       0.43
       2%
       4.10
        9.08
       13.18
       66%
       20.00
       100%
           -
       22
       0.505611
       20
    
    
      22
       2002
       4.98
       24%
       1.43
        7%
       0.01
       0%
       0.44
       2%
       4.32
        9.57
       13.89
       67%
       20.74
       100%
           -
       23
       0.007572
       21
    
    
      23
       2003
       5.20
       25%
       1.49
        7%
       0.01
       0%
       0.46
       2%
       4.35
        9.55
       13.90
       66%
       21.06
       100%
           -
       24
       0.336711
       22
    
    
      24
       2004
       4.97
       24%
       1.49
        7%
       0.01
       0%
       0.47
       2%
       4.41
        9.70
       14.10
       67%
       21.05
       100%
           -
       25
       0.560270
       23
    
    
      25
       2005
       4.94
       23%
       1.42
        7%
       0.01
       0%
       0.49
       2%
       4.64
       10.08
       14.72
       68%
       21.58
       100%
           -
       26
       0.925150
       24
    
    
      26
       2006
       4.46
       22%
       1.20
        6%
       0.01
       0%
       0.45
       2%
       4.61
        9.91
       14.53
       70%
       20.64
       100%
           -
       27
       0.843730
       25
    
    
      27
       2007
       4.83
       22%
       1.22
        6%
       0.01
       0%
       0.50
       2%
       4.75
       10.19
       14.94
       69%
       21.49
       100%
           -
       28
       0.308464
       26
    
    
      28
       2008
       5.00
       23%
       1.21
        6%
       0.01
       0%
       0.53
       2%
       4.71
       10.08
       14.79
       69%
       21.53
       100%
           -
       29
       0.446437
       27
    
    
      29
       2009
       4.88
       23%
       1.14
        5%
       0.01
       0%
       0.52
       2%
       4.66
        9.80
       14.45
       69%
       20.99
       100%
           -
       30
       0.866504
       28
    
    
      30
       2010
       5.06
       23%
       1.22
        6%
       0.01
       0%
       0.45
       2%
       4.95
       10.39
       15.34
       69%
       22.07
       100%
           -
       31
       0.881132
       29
    
    
      31
       2011
       5.05
       23%
       1.19
        5%
       0.01
       0%
       0.47
       2%
       4.94
       10.45
       15.39
       70%
       22.10
       100%
        0.1%
       32
       0.347649
       30
    
    
      32
       2012
       5.17
       24%
       1.19
        5%
       0.01
       0%
       0.50
       2%
       4.84
       10.07
       14.91
       68%
       21.77
       100%
       -0.7%
       33
       0.285232
       31
    
    
      33
       2013
       4.99
       23%
       1.13
        5%
       0.01
       0%
       0.48
       2%
       4.79
        9.86
       14.66
       69%
       21.26
       100%
       -1.2%
       34
       0.429977
       32
    
    
      34
       2014
       5.00
       24%
       1.11
        5%
       0.01
       0%
       0.49
       2%
       4.79
        9.75
       14.54
       69%
       21.15
       100%
       -1.1%
       35
       0.834912
       33
    
    
      35
       2015
       4.99
       24%
       1.08
        5%
       0.01
       0%
       0.51
       2%
       4.79
        9.68
       14.47
       69%
       21.06
       100%
       -0.9%
       36
       0.851379
       34
    
    
      36
       2016
       4.98
       24%
       1.06
        5%
       0.01
       0%
       0.53
       2%
       4.82
        9.72
       14.54
       69%
       21.13
       100%
       -0.7%
       37
       0.573889
       35
    
    
      37
       2017
       4.98
       23%
       1.05
        5%
       0.01
       0%
       0.53
       2%
       4.87
        9.85
       14.72
       69%
       21.28
       100%
       -0.5%
       38
       0.035307
       36
    
    
      38
       2018
       4.96
       23%
       1.03
        5%
       0.01
       0%
       0.53
       2%
       4.93
        9.95
       14.88
       69%
       21.41
       100%
       -0.4%
       39
       0.759066
       37
    
    
      39
       2019
       4.95
       23%
       1.02
        5%
       0.01
       0%
       0.53
       2%
       4.98
       10.06
       15.05
       70%
       21.56
       100%
       -0.3%
       40
       0.211777
       38
    
    
      40
       2020
       4.95
       23%
       1.01
        5%
       0.01
       0%
       0.54
       2%
       5.02
       10.15
       15.17
       70%
       21.66
       100%
       -0.2%
       41
       0.641565
       39
    
    
      41
       2021
       4.93
       23%
       1.00
        5%
       0.01
       0%
       0.54
       2%
       5.07
       10.22
       15.29
       70%
       21.77
       100%
       -0.1%
       42
       0.537387
       40
    
    
      42
       2022
       4.92
       22%
       0.99
        5%
       0.01
       0%
       0.54
       2%
       5.13
       10.31
       15.44
       71%
       21.89
       100%
       -0.1%
       43
       0.947203
       41
    
    
      43
       2023
       4.90
       22%
       0.98
        4%
       0.01
       0%
       0.54
       2%
       5.18
       10.44
       15.62
       71%
       22.04
       100%
        0.0%
       44
       0.878931
       42
    
    
      44
       2024
       4.89
       22%
       0.97
        4%
       0.01
       0%
       0.54
       2%
       5.24
       10.55
       15.79
       71%
       22.19
       100%
        0.0%
       45
       0.344067
       43
    
    
      45
       2025
       4.88
       22%
       0.95
        4%
       0.01
       0%
       0.54
       2%
       5.30
       10.70
       16.00
       71%
       22.39
       100%
        0.1%
       46
       0.559808
       44
    
    
      46
       2026
       4.87
       22%
       0.95
        4%
       0.01
       0%
       0.54
       2%
       5.37
       10.78
       16.14
       72%
       22.51
       100%
        0.1%
       47
       0.088891
       45
    
    
      47
       2027
       4.86
       21%
       0.94
        4%
       0.01
       0%
       0.54
       2%
       5.43
       10.88
       16.31
       72%
       22.65
       100%
        0.2%
       48
       0.551840
       46
    
    
      48
       2028
       4.85
       21%
       0.93
        4%
       0.01
       0%
       0.54
       2%
       5.49
       10.96
       16.45
       72%
       22.78
       100%
        0.2%
       49
       0.249141
       47
    
    
      49
       2029
       4.85
       21%
       0.92
        4%
       0.01
       0%
       0.55
       2%
       5.56
       11.03
       16.59
       72%
       22.91
       100%
        0.2%
       50
       0.014454
       48
    
    
      50
       2030
       4.84
       21%
       0.91
        4%
       0.01
       0%
       0.55
       2%
       5.63
       11.12
       16.76
       73%
       23.06
       100%
        0.2%
       51
       0.084236
       49
    
    
      51
       2031
       4.83
       21%
       0.90
        4%
       0.01
       0%
       0.55
       2%
       5.70
       11.20
       16.90
       73%
       23.19
       100%
        0.2%
       52
       0.301971
       50
    
    
      52
       2032
       4.81
       21%
       0.90
        4%
       0.01
       0%
       0.55
       2%
       5.76
       11.28
       17.04
       73%
       23.30
       100%
        0.2%
       53
       0.683238
       51
    
    
      53
       2033
       4.79
       20%
       0.89
        4%
       0.01
       0%
       0.55
       2%
       5.82
       11.37
       17.19
       73%
       23.43
       100%
        0.3%
       54
       0.841487
       52
    
    
      54
       2034
       4.78
       20%
       0.88
        4%
       0.01
       0%
       0.55
       2%
       5.88
       11.47
       17.35
       74%
       23.56
       100%
        0.3%
       55
       0.103193
       53
    
    
      55
       2035
       4.76
       20%
       0.87
        4%
       0.01
       0%
       0.55
       2%
       5.94
       11.56
       17.50
       74%
       23.69
       100%
        0.3%
       56
       0.127020
       54

# note that many to many joins create the cartesian product of the two tables.



In [51]:

    
merged = merge(Res_Primary_E_Consumption, TestData2, on= ['Year'])



In [72]:

    
from __future__ import division



In [93]:

    
merged['NG'] = np.float32(merged.NaturalGas)



In [95]:

    
merged['TTL']  = np.float32(merged.TOTAL)



In [101]:

    
merged['RNW']  = np.float32(merged.Renewable)



In [101]:

    
merged['RNW']  = np.float32(merged.Renewable)



In [98]:

    
# Create a new proportion variable
merged['NG_percent'] = (merged.NG / merged.TTL)



In [106]:

    
#Create a new variable
merged['RNW_percent'] = (merged.RNW / merged.TTL)



In [23]:

    
TestData3 = read_csv('TestData3_missingsomeyears.csv')



In [24]:

    
# pandas join function is merge
#merge?
merge(Res_Primary_E_Consumption, TestData3, on= [ 'Year'])
#simple join on year 
#inner join is default
# check for merge of missing data-- should have all and accept some blank data

### But no-- it just takes those columns that match.

# need to find the pandas command for those that









    Out[24]:






  
    
      
      Year
      NaturalGas
      NaturalGas_perc
      Petroleum
      Petroleum_perc
      Coal
      Coal_perc
      Renewable
      Renewable_perc
      Electricity_Sales
      Electricity_Losses
      Electricity_Total
      Electricity_Total_perc
      TOTAL
      TOTAL_perc
      Change_since2010
      Col1
      Col2
      Col3
    
  
  
    
      0 
       1980
       4.79
       30%
       1.72
       11%
       0.03
       0%
       0.85
       5%
       2.45
        5.89
        8.33
       53%
       15.72
       100%
       -
        1
       0.229849
        1
    
    
      1 
       1981
       4.57
       30%
       1.52
       10%
       0.03
       0%
       0.87
       6%
       2.46
        5.77
        8.24
       54%
       15.23
       100%
       -
        2
       0.003063
        2
    
    
      2 
       1982
       4.68
       30%
       1.42
        9%
       0.03
       0%
       0.97
       6%
       2.49
        5.89
        8.38
       54%
       15.48
       100%
       -
        3
       0.418250
        1
    
    
      3 
       1983
       4.45
       29%
       1.33
        9%
       0.03
       0%
       0.97
       6%
       2.56
        6.03
        8.59
       56%
       15.38
       100%
       -
        4
       0.689310
        2
    
    
      4 
       1984
       4.64
       29%
       1.51
       10%
       0.04
       0%
       0.98
       6%
       2.66
        6.07
        8.73
       55%
       15.90
       100%
       -
        5
       0.432489
        3
    
    
      5 
       1985
       4.51
       28%
       1.55
       10%
       0.04
       0%
       1.01
       6%
       2.71
        6.21
        8.92
       56%
       16.02
       100%
       -
        6
       0.987474
        4
    
    
      6 
       1986
       4.38
       28%
       1.52
       10%
       0.04
       0%
       0.92
       6%
       2.79
        6.27
        9.07
       57%
       15.94
       100%
       -
        7
       0.209367
        5
    
    
      7 
       1987
       4.40
       27%
       1.60
       10%
       0.04
       0%
       0.85
       5%
       2.90
        6.42
        9.32
       58%
       16.21
       100%
       -
        8
       0.582228
        6
    
    
      8 
       1988
       4.72
       28%
       1.66
       10%
       0.04
       0%
       0.91
       5%
       3.05
        6.75
        9.79
       57%
       17.12
       100%
       -
        9
       0.638592
        7
    
    
      9 
       1989
       4.88
       28%
       1.64
        9%
       0.03
       0%
       0.97
       5%
       3.09
        7.13
       10.22
       58%
       17.76
       100%
       -
       10
       0.286306
        8
    
    
      10
       1990
       4.47
       26%
       1.37
        8%
       0.03
       0%
       0.64
       4%
       3.15
        7.24
       10.39
       61%
       16.91
       100%
       -
       11
       0.359775
        9
    
    
      11
       1991
       4.64
       27%
       1.36
        8%
       0.03
       0%
       0.67
       4%
       3.26
        7.42
       10.68
       61%
       17.37
       100%
       -
       12
       0.130956
       10
    
    
      12
       1992
       4.78
       28%
       1.39
        8%
       0.03
       0%
       0.70
       4%
       3.19
        7.22
       10.41
       60%
       17.31
       100%
       -
       13
       0.323788
       11
    
    
      13
       1993
       5.05
       28%
       1.42
        8%
       0.03
       0%
       0.61
       3%
       3.39
        7.68
       11.08
       61%
       18.18
       100%
       -
       14
       0.053721
       12
    
    
      14
       1994
       4.94
       27%
       1.39
        8%
       0.02
       0%
       0.58
       3%
       3.44
        7.70
       11.14
       62%
       18.07
       100%
       -
       15
       0.663227
       13
    
    
      15
       1995
       4.94
       27%
       1.35
        7%
       0.02
       0%
       0.58
       3%
       3.56
        8.03
       11.59
       63%
       18.49
       100%
       -
       16
       0.633252
       14
    
    
      16
       1996
       5.35
       27%
       1.46
        7%
       0.02
       0%
       0.60
       3%
       3.69
        8.35
       12.04
       62%
       19.47
       100%
       -
       17
       0.891918
       15
    
    
      17
       1997
       5.09
       27%
       1.40
        7%
       0.02
       0%
       0.49
       3%
       3.67
        8.27
       11.94
       63%
       18.93
       100%
       -
       18
       0.495942
       16
    
    
      18
       1998
       4.64
       25%
       1.28
        7%
       0.01
       0%
       0.44
       2%
       3.86
        8.69
       12.55
       66%
       18.92
       100%
       -
       19
       0.811847
       17
    
    
      19
       1999
       4.82
       25%
       1.43
        7%
       0.01
       0%
       0.46
       2%
       3.91
        8.88
       12.79
       66%
       19.52
       100%
       -
       20
       0.609916
       18
    
    
      20
       2000
       5.07
       25%
       1.52
        7%
       0.01
       0%
       0.49
       2%
       4.07
        9.20
       13.27
       65%
       20.36
       100%
       -
       21
       0.643762
       19
    
    
      21
       2001
       4.87
       24%
       1.50
        7%
       0.01
       0%
       0.43
       2%
       4.10
        9.08
       13.18
       66%
       20.00
       100%
       -
       22
       0.505611
       20
    
    
      22
       2002
       4.98
       24%
       1.43
        7%
       0.01
       0%
       0.44
       2%
       4.32
        9.57
       13.89
       67%
       20.74
       100%
       -
       23
       0.007572
       21
    
    
      23
       2003
       5.20
       25%
       1.49
        7%
       0.01
       0%
       0.46
       2%
       4.35
        9.55
       13.90
       66%
       21.06
       100%
       -
       24
       0.336711
       22
    
    
      24
       2004
       4.97
       24%
       1.49
        7%
       0.01
       0%
       0.47
       2%
       4.41
        9.70
       14.10
       67%
       21.05
       100%
       -
       25
       0.560270
       23
    
    
      25
       2005
       4.94
       23%
       1.42
        7%
       0.01
       0%
       0.49
       2%
       4.64
       10.08
       14.72
       68%
       21.58
       100%
       -
       26
       0.925150
       24
    
    
      26
       2006
       4.46
       22%
       1.20
        6%
       0.01
       0%
       0.45
       2%
       4.61
        9.91
       14.53
       70%
       20.64
       100%
       -
       27
       0.843730
       25
    
    
      27
       2007
       4.83
       22%
       1.22
        6%
       0.01
       0%
       0.50
       2%
       4.75
       10.19
       14.94
       69%
       21.49
       100%
       -
       28
       0.308464
       26
    
    
      28
       2008
       5.00
       23%
       1.21
        6%
       0.01
       0%
       0.53
       2%
       4.71
       10.08
       14.79
       69%
       21.53
       100%
       -
       29
       0.446437
       27
    
    
      29
       2009
       4.88
       23%
       1.14
        5%
       0.01
       0%
       0.52
       2%
       4.66
        9.80
       14.45
       69%
       20.99
       100%
       -
       30
       0.866504
       28



In [25]:

    
merged_data3 = merge(Res_Primary_E_Consumption, TestData3, on= ['Year'])



In [26]:

    
merged_data3.tail(10)









    Out[26]:






  
    
      
      Year
      NaturalGas
      NaturalGas_perc
      Petroleum
      Petroleum_perc
      Coal
      Coal_perc
      Renewable
      Renewable_perc
      Electricity_Sales
      Electricity_Losses
      Electricity_Total
      Electricity_Total_perc
      TOTAL
      TOTAL_perc
      Change_since2010
      Col1
      Col2
      Col3
    
  
  
    
      20
       2000
       5.07
       25%
       1.52
       7%
       0.01
       0%
       0.49
       2%
       4.07
        9.20
       13.27
       65%
       20.36
       100%
       -
       21
       0.643762
       19
    
    
      21
       2001
       4.87
       24%
       1.50
       7%
       0.01
       0%
       0.43
       2%
       4.10
        9.08
       13.18
       66%
       20.00
       100%
       -
       22
       0.505611
       20
    
    
      22
       2002
       4.98
       24%
       1.43
       7%
       0.01
       0%
       0.44
       2%
       4.32
        9.57
       13.89
       67%
       20.74
       100%
       -
       23
       0.007572
       21
    
    
      23
       2003
       5.20
       25%
       1.49
       7%
       0.01
       0%
       0.46
       2%
       4.35
        9.55
       13.90
       66%
       21.06
       100%
       -
       24
       0.336711
       22
    
    
      24
       2004
       4.97
       24%
       1.49
       7%
       0.01
       0%
       0.47
       2%
       4.41
        9.70
       14.10
       67%
       21.05
       100%
       -
       25
       0.560270
       23
    
    
      25
       2005
       4.94
       23%
       1.42
       7%
       0.01
       0%
       0.49
       2%
       4.64
       10.08
       14.72
       68%
       21.58
       100%
       -
       26
       0.925150
       24
    
    
      26
       2006
       4.46
       22%
       1.20
       6%
       0.01
       0%
       0.45
       2%
       4.61
        9.91
       14.53
       70%
       20.64
       100%
       -
       27
       0.843730
       25
    
    
      27
       2007
       4.83
       22%
       1.22
       6%
       0.01
       0%
       0.50
       2%
       4.75
       10.19
       14.94
       69%
       21.49
       100%
       -
       28
       0.308464
       26
    
    
      28
       2008
       5.00
       23%
       1.21
       6%
       0.01
       0%
       0.53
       2%
       4.71
       10.08
       14.79
       69%
       21.53
       100%
       -
       29
       0.446437
       27
    
    
      29
       2009
       4.88
       23%
       1.14
       5%
       0.01
       0%
       0.52
       2%
       4.66
        9.80
       14.45
       69%
       20.99
       100%
       -
       30
       0.866504
       28



In [53]:

    
# total test.. Create a new column called TOTALtest,
# need to add the number of persons
merged['TOTALtest'] = np.floor(merged.TOTAL * merged.Col2)

#decimal numbers so use npl.floor



In [ ]:

    
#TRY other merge options.
# pandas join function is merge
#merge?

# DOes the order matter? NO
#merge(TestData3, Res_Primary_E_Consumption, on= [ 'Year'])
#simple join on year 
#inner join is default
# it just takes those columns that match.
# inner is union?

# need to find the pandas command for those that 

#how to right outer and lerft outer joins in pandas?



In [99]:

    
merged.head()









    Out[99]:






  
    
      
      Year
      NaturalGas
      NaturalGas_perc
      Petroleum
      Petroleum_perc
      Coal
      Coal_perc
      Renewable
      Renewable_perc
      Electricity_Sales
      ...
      TOTAL
      TOTAL_perc
      Change_since2010
      Col1
      Col2
      Col3
      TOTALtest
      NG_percent
      NG
      TTL
    
  
  
    
      0
       1980
       4.79
       30%
       1.72
       11%
       0.03
       0%
       0.85
       5%
       2.45
      ...
       15.72
       100%
       -
       1
       0.229849
       1
       75
       0.304707
       4.79
       15.72
    
    
      1
       1981
       4.57
       30%
       1.52
       10%
       0.03
       0%
       0.87
       6%
       2.46
      ...
       15.23
       100%
       -
       2
       0.003063
       2
       69
       0.300066
       4.57
       15.23
    
    
      2
       1982
       4.68
       30%
       1.42
        9%
       0.03
       0%
       0.97
       6%
       2.49
      ...
       15.48
       100%
       -
       3
       0.418250
       1
       72
       0.302326
       4.68
       15.48
    
    
      3
       1983
       4.45
       29%
       1.33
        9%
       0.03
       0%
       0.97
       6%
       2.56
      ...
       15.38
       100%
       -
       4
       0.689310
       2
       68
       0.289337
       4.45
       15.38
    
    
      4
       1984
       4.64
       29%
       1.51
       10%
       0.04
       0%
       0.98
       6%
       2.66
      ...
       15.90
       100%
       -
       5
       0.432489
       3
       73
       0.291824
       4.64
       15.90
    
  

5 rows × 23 columns



In [ ]:

    
# total test.. Create a new column called TOTALtest,
# need to add the number of persons
merged['TOTALtest'] = np.floor(merged.TOTAL * merged.Col2)

#decimal numbers so use npl.floor



In [57]:

    
merged.groupby(['Year', 'TOTALtest'])['Col3'].sum()
#for each year TOTALtest combo-- the total number of Col3.
#hierarchical labeling.









    Out[57]:





Year  TOTALtest
1980  3             1
1981  0             2
1982  6             1
1983  10            2
1984  6             3
1985  15            4
1986  3             5
1987  9             6
1988  10            7
1989  5             8
1990  6             9
1991  2            10
1992  5            11
1993  0            12
1994  11           13
1995  11           14
1996  17           15
1997  9            16
1998  15           17
1999  11           18
2000  13           19
2001  10           20
2002  0            21
2003  7            22
2004  11           23
2005  19           24
2006  17           25
2007  6            26
2008  9            27
2009  18           28
2010  19           29
2011  7            30
2012  6            31
2013  9            32
2014  17           33
2015  17           34
2016  12           35
2017  0            36
2018  16           37
2019  4            38
2020  13           39
2021  11           40
2022  20           41
2023  19           42
2024  7            43
2025  12           44
2026  2            45
2027  12           46
2028  5            47
2029  0            48
2030  1            49
2031  7            50
2032  15           51
2033  19           52
2034  2            53
2035  3            54
Name: Col3, Length: 56, dtype: int64



In [142]:

    
# Regression Test-- 
#made up independent variable.
import statsmodels.formula.api as smf
model = smf.ols('merged.TOTAL ~ merged.Col2', data=merged)
results_merged = model.fit()
results_merged.summary()









    Out[142]:





OLS Regression Results

  Dep. Variable:       merged.TOTAL      R-squared:             0.000


  Model:                    OLS          Adj. R-squared:       -0.018


  Method:              Least Squares     F-statistic:        0.008511


  Date:              Sun, 26 Oct 2014    Prob (F-statistic):    0.927 


  Time:                  15:35:14        Log-Likelihood:      -130.76


  No. Observations:           56         AIC:                   265.5


  Df Residuals:               54         BIC:                   269.6


  Df Model:                    1                                     




                 coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept       20.1465      0.669     30.101   0.000     18.805    21.488


  merged.Col2      0.1100      1.193      0.092   0.927     -2.282     2.502




  Omnibus:         7.837    Durbin-Watson:         0.032


  Prob(Omnibus):   0.020    Jarque-Bera (JB):      5.402


  Skew:           -0.616    Prob(JB):             0.0671


  Kurtosis:        2.107    Cond. No.               4.38



In [ ]:



In [84]:

    
#result = _   # underscore in ipython: the output of the last statement, because we don't want to compute again the same thing.



In [108]:

    
#result['Change_since2010'].plot()
#result['TTL'].plot()



In [140]:

    
#import numpy as np
#import matplotlib.pyplot as plt

# evenly sampled time at 200ms intervals
#t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
#plt.plot(merged.Year, merged.TOTAL, 'r--')
#plt.show()

plt.plot(merged.Year, merged.TOTAL, 'rs', merged.Year,merged.NaturalGas, 'b--', merged.Year,merged.Coal, 'k--',merged.Year,merged.Renewable, 'g--',
         merged.Year,merged.Electricity_Total, 'm--')
plt.ylabel('MMBtu')
plt.xlabel('Year')
plt.title('US Residential Energy Use (MMBtu)',fontsize=16)
plt.show()

#result['merged.TOTAL',
#'NaturalGas',
#'Petroleum',
#'Coal',
#'Renewable',
#'Electricity_Total'].plot()



In [96]:

    
mgirls = merge(girls, births)  # inner join by default.



In [97]:

    
mgirls['persons'] = np.floor(mgirls.prop * mgirls.births)



In [98]:

    
gpersons = mgirls.set_index(['year', 'name']).persons   # Select out persons



In [144]:

    
type(merged.TTL)  # One dimensional series.









    Out[144]:





pandas.core.series.Series



In [145]:

    
result = _   # underscore in ipython: the output of the last statement, because we don't want to compute again the same thing.



In [146]:

    
#pandas.DataFrame.mean
#DataFrame.mean(axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
#Return the mean of the values for the requested axis
merged.TTL.mean()









    Out[146]:





20.199642811502731



In [33]:

    
#pandas.DataFrame.mean
#DataFrame.mean(axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
#Return the mean of the values for the requested axis
Res_Primary_E_Consumption.mean()









    Out[33]:





Year                  2007.500000
NaturalGas               4.853214
Petroleum                1.232321
Coal                     0.016607
Renewable                0.599643
Electricity_Sales        4.341429
Electricity_Losses       9.158393
Electricity_Total       13.500179
TOTAL                   20.199643
dtype: float64



In [34]:

    
#pandas.DataFrame.mean
#DataFrame.mean(axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
#Return the mean of the values for the requested axis
Res_Primary_E_Consumption.TOTAL.mean()









    Out[34]:





20.199642857142855

pandas.DataFrame.mean DataFrame.mean(axis=None, skipna=None, level=None, numeric_only=None, **kwargs) Return the mean of the values for the requested axis Parameters : axis : {index (0), columns (1)} skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a Series numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use everything, then use only numeric data Returns : mean : Series or DataFrame (if level specified) Res_Primary_E_Consumption.TOTAL.mean(axis=None, skipna=None, level=None, numeric_only=None)



In [35]:

    
Res_Primary_E_Consumption.TOTAL.mean(axis=None, skipna=None, level=None, numeric_only=None)









    Out[35]:





20.199642857142855



In [ ]:

Dep. Variable:	TOTAL	R-squared:	1.000
Model:	OLS	Adj. R-squared:	1.000
Method:	Least Squares	F-statistic:	1.510e+06
Date:	Tue, 28 Oct 2014	Prob (F-statistic):	1.11e-128
Time:	13:21:09	Log-Likelihood:	196.34
No. Observations:	56	AIC:	-382.7
Df Residuals:	51	BIC:	-372.6
Df Model:	4

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
Intercept	0.0365	0.041	0.897	0.374	-0.045 0.118
NaturalGas	0.9912	0.007	148.044	0.000	0.978 1.005
Petroleum	1.0144	0.010	106.295	0.000	0.995 1.034
Renewable	1.0358	0.012	88.663	0.000	1.012 1.059
Electricity_Total	0.9986	0.001	875.085	0.000	0.996 1.001

Omnibus:	1.606	Durbin-Watson:	1.869
Prob(Omnibus):	0.448	Jarque-Bera (JB):	1.333
Skew:	0.199	Prob(JB):	0.513
Kurtosis:	2.357	Cond. No.	607.

	Year	NaturalGas	NaturalGas_perc	Petroleum	Petroleum_perc	Coal	Coal_perc	Renewable	Renewable_perc	Electricity_Sales	Electricity_Losses	Electricity_Total	Electricity_Total_perc	TOTAL	TOTAL_perc	Change_since2010
Year
1980	1980	4.79	30%	1.72	11%	0.03	0%	0.85	5%	2.45	5.89	8.33	53%	15.72	100%	-
1981	1981	4.57	30%	1.52	10%	0.03	0%	0.87	6%	2.46	5.77	8.24	54%	15.23	100%	-
1982	1982	4.68	30%	1.42	9%	0.03	0%	0.97	6%	2.49	5.89	8.38	54%	15.48	100%	-
1983	1983	4.45	29%	1.33	9%	0.03	0%	0.97	6%	2.56	6.03	8.59	56%	15.38	100%	-
1984	1984	4.64	29%	1.51	10%	0.04	0%	0.98	6%	2.66	6.07	8.73	55%	15.90	100%	-
1985	1985	4.51	28%	1.55	10%	0.04	0%	1.01	6%	2.71	6.21	8.92	56%	16.02	100%	-
1986	1986	4.38	28%	1.52	10%	0.04	0%	0.92	6%	2.79	6.27	9.07	57%	15.94	100%	-
1987	1987	4.40	27%	1.60	10%	0.04	0%	0.85	5%	2.90	6.42	9.32	58%	16.21	100%	-
1988	1988	4.72	28%	1.66	10%	0.04	0%	0.91	5%	3.05	6.75	9.79	57%	17.12	100%	-
1989	1989	4.88	28%	1.64	9%	0.03	0%	0.97	5%	3.09	7.13	10.22	58%	17.76	100%	-
1990	1990	4.47	26%	1.37	8%	0.03	0%	0.64	4%	3.15	7.24	10.39	61%	16.91	100%	-
1991	1991	4.64	27%	1.36	8%	0.03	0%	0.67	4%	3.26	7.42	10.68	61%	17.37	100%	-
1992	1992	4.78	28%	1.39	8%	0.03	0%	0.70	4%	3.19	7.22	10.41	60%	17.31	100%	-
1993	1993	5.05	28%	1.42	8%	0.03	0%	0.61	3%	3.39	7.68	11.08	61%	18.18	100%	-
1994	1994	4.94	27%	1.39	8%	0.02	0%	0.58	3%	3.44	7.70	11.14	62%	18.07	100%	-
1995	1995	4.94	27%	1.35	7%	0.02	0%	0.58	3%	3.56	8.03	11.59	63%	18.49	100%	-
1996	1996	5.35	27%	1.46	7%	0.02	0%	0.60	3%	3.69	8.35	12.04	62%	19.47	100%	-
1997	1997	5.09	27%	1.40	7%	0.02	0%	0.49	3%	3.67	8.27	11.94	63%	18.93	100%	-
1998	1998	4.64	25%	1.28	7%	0.01	0%	0.44	2%	3.86	8.69	12.55	66%	18.92	100%	-
1999	1999	4.82	25%	1.43	7%	0.01	0%	0.46	2%	3.91	8.88	12.79	66%	19.52	100%	-
2000	2000	5.07	25%	1.52	7%	0.01	0%	0.49	2%	4.07	9.20	13.27	65%	20.36	100%	-
2001	2001	4.87	24%	1.50	7%	0.01	0%	0.43	2%	4.10	9.08	13.18	66%	20.00	100%	-
2002	2002	4.98	24%	1.43	7%	0.01	0%	0.44	2%	4.32	9.57	13.89	67%	20.74	100%	-
2003	2003	5.20	25%	1.49	7%	0.01	0%	0.46	2%	4.35	9.55	13.90	66%	21.06	100%	-
2004	2004	4.97	24%	1.49	7%	0.01	0%	0.47	2%	4.41	9.70	14.10	67%	21.05	100%	-
2005	2005	4.94	23%	1.42	7%	0.01	0%	0.49	2%	4.64	10.08	14.72	68%	21.58	100%	-
2006	2006	4.46	22%	1.20	6%	0.01	0%	0.45	2%	4.61	9.91	14.53	70%	20.64	100%	-
2007	2007	4.83	22%	1.22	6%	0.01	0%	0.50	2%	4.75	10.19	14.94	69%	21.49	100%	-
2008	2008	5.00	23%	1.21	6%	0.01	0%	0.53	2%	4.71	10.08	14.79	69%	21.53	100%	-
2009	2009	4.88	23%	1.14	5%	0.01	0%	0.52	2%	4.66	9.80	14.45	69%	20.99	100%	-
2010	2010	5.06	23%	1.22	6%	0.01	0%	0.45	2%	4.95	10.39	15.34	69%	22.07	100%	-
2011	2011	5.05	23%	1.19	5%	0.01	0%	0.47	2%	4.94	10.45	15.39	70%	22.10	100%	0.1%
2012	2012	5.17	24%	1.19	5%	0.01	0%	0.50	2%	4.84	10.07	14.91	68%	21.77	100%	-0.7%
2013	2013	4.99	23%	1.13	5%	0.01	0%	0.48	2%	4.79	9.86	14.66	69%	21.26	100%	-1.2%
2014	2014	5.00	24%	1.11	5%	0.01	0%	0.49	2%	4.79	9.75	14.54	69%	21.15	100%	-1.1%
2015	2015	4.99	24%	1.08	5%	0.01	0%	0.51	2%	4.79	9.68	14.47	69%	21.06	100%	-0.9%
2016	2016	4.98	24%	1.06	5%	0.01	0%	0.53	2%	4.82	9.72	14.54	69%	21.13	100%	-0.7%
2017	2017	4.98	23%	1.05	5%	0.01	0%	0.53	2%	4.87	9.85	14.72	69%	21.28	100%	-0.5%
2018	2018	4.96	23%	1.03	5%	0.01	0%	0.53	2%	4.93	9.95	14.88	69%	21.41	100%	-0.4%
2019	2019	4.95	23%	1.02	5%	0.01	0%	0.53	2%	4.98	10.06	15.05	70%	21.56	100%	-0.3%
2020	2020	4.95	23%	1.01	5%	0.01	0%	0.54	2%	5.02	10.15	15.17	70%	21.66	100%	-0.2%
2021	2021	4.93	23%	1.00	5%	0.01	0%	0.54	2%	5.07	10.22	15.29	70%	21.77	100%	-0.1%
2022	2022	4.92	22%	0.99	5%	0.01	0%	0.54	2%	5.13	10.31	15.44	71%	21.89	100%	-0.1%
2023	2023	4.90	22%	0.98	4%	0.01	0%	0.54	2%	5.18	10.44	15.62	71%	22.04	100%	0.0%
2024	2024	4.89	22%	0.97	4%	0.01	0%	0.54	2%	5.24	10.55	15.79	71%	22.19	100%	0.0%
2025	2025	4.88	22%	0.95	4%	0.01	0%	0.54	2%	5.30	10.70	16.00	71%	22.39	100%	0.1%
2026	2026	4.87	22%	0.95	4%	0.01	0%	0.54	2%	5.37	10.78	16.14	72%	22.51	100%	0.1%
2027	2027	4.86	21%	0.94	4%	0.01	0%	0.54	2%	5.43	10.88	16.31	72%	22.65	100%	0.2%
2028	2028	4.85	21%	0.93	4%	0.01	0%	0.54	2%	5.49	10.96	16.45	72%	22.78	100%	0.2%
2029	2029	4.85	21%	0.92	4%	0.01	0%	0.55	2%	5.56	11.03	16.59	72%	22.91	100%	0.2%

Dep. Variable:	merged.TOTAL	R-squared:	0.000
Model:	OLS	Adj. R-squared:	-0.018
Method:	Least Squares	F-statistic:	0.008511
Date:	Sun, 26 Oct 2014	Prob (F-statistic):	0.927
Time:	15:35:14	Log-Likelihood:	-130.76
No. Observations:	56	AIC:	265.5
Df Residuals:	54	BIC:	269.6
Df Model:	1

Omnibus:	7.837	Durbin-Watson:	0.032
Prob(Omnibus):	0.020	Jarque-Bera (JB):	5.402
Skew:	-0.616	Prob(JB):	0.0671
Kurtosis:	2.107	Cond. No.	4.38