In [119]:
## import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
#from mpl_toolkits.basemap import Basemap

#stats libraries
import statsmodels.api as sm
import scipy.misc

#allows us to show plots in notebook
%matplotlib inline
#rcParams['figure.figsize'] = (8,6)

In [120]:
#figure out what kind of delimiter we have with my current data set for ease of data
#upload configurations

def detectDelimiter(csvFile):
    with open(csvFile, 'r') as myCsvfile:
        header=myCsvfile.readline()
        if header.find(";")!=-1:
            return ";"
        if header.find(",")!=-1:
            return ","
        if header.find("\s+"):
            return "\s+"
        if header.find(''):
            return ""
    #default delimiter (MS Office export)
    return "Other"

In [121]:
detectDelimiter('data/Earthquake_Datasets/EQCanOB_2004.csv')


Out[121]:
','

In [ ]:


In [123]:
#created variables that it would be easy to call
filename_1990 = 'data/Earthquake_Datasets/EQCanOB_1990.csv'
filename_2004 = 'data/Earthquake_Datasets/EQCanOB_2004.csv'
filename_2006 = 'data/Earthquake_Datasets/EQCanOB_2006.csv'
filename_2008 = 'data/Earthquake_Datasets/EQCanOB_2008.csv'
filename_2010 = 'data/Earthquake_Datasets/EQCanOB_2010.csv'
filename_2012 = 'data/Earthquake_Datasets/EQCanOB_2012.csv'
fracking_company_data = 'data/fracking_company_data.csv'
filename_all = ''

earthquakes_1990 = pd.read_csv(filename_1990, sep = ',')
earthquakes_2004 = pd.read_csv(filename_2004, sep = ',')
earthquakes_2006 = pd.read_csv(filename_2006, sep = ',')
earthquakes_2008 = pd.read_csv(filename_2008, sep = ',')
earthquakes_2010 = pd.read_csv(filename_2010, sep = ',')
earthquakes_2012 = pd.read_csv(filename_2012, sep = ',')
fracking_data = pd.read_csv(fracking_company_data,sep = ',')

#create a dataframe of all earthquakes
df_all_earthquakes = pd.concat([earthquakes_1990, earthquakes_2004, earthquakes_2006, earthquakes_2008, earthquakes_2010, earthquakes_2012], ignore_index=True)
#create a list of all earthquakes
all_earthquakes = (earthquakes_1990, earthquakes_2004, earthquakes_2006, earthquakes_2008, earthquakes_2010, earthquakes_2012, df_all_earthquakes)

#create a group that is divided by years
df_all_earthquakes['Date']
year = pd.DatetimeIndex(df_all_earthquakes['Date']).year
resampled_data = df_all_earthquakes.groupby(year)

In [124]:
def date_time (data):
    data.ix[:,0] = pd.to_datetime(data.ix[:,0], )   
    return data

In [125]:
for dataframe in all_earthquakes:
    date_time (dataframe)

In [133]:
earthquakes_2004.head()


Out[133]:
Date Lat Long Depth Mag
0 2004-01-04 61.740 -127.653 1 2.0
1 2004-01-21 56.512 -122.268 10 2.4
2 2004-01-31 56.456 -122.301 25 2.4
3 2004-02-01 56.513 -122.278 10 1.8
4 2004-02-09 56.482 -122.167 20 2.0

In [127]:
# make a new depth column with is only a number from Depth DF and then change
# the string to a float

def munge_depth (data):
    data.ix[:,4] = data.ix[:,4].str.extract('([0-9.]*)').replace('%','',regex=True).astype('float')
    return data

In [128]:
# make a new depth column with is only a number from Depth DF and then change
# the string to a float

def munge_mag (data):
    data.ix[:, 5] = data.ix[:,5].str.extract('([0-9.]*)').replace('%','',regex=True).astype('float')
    return data

In [ ]:


In [130]:
#loop munge data over all dataframes by calling functions
for dataframe in all_earthquakes:
    munge_depth(dataframe)
    munge_mag (dataframe)
    del dataframe['Time(UT)']

In [ ]:
df_all_earthquakes.head()

In [131]:
def magnitude_over_time (data):
    plt.figure(figsize=(14,6))
    plt.plot(data.index, data.Mag)

    plt.title ('Magnitude of Earthquakes over Time')
    plt.xlabel ('Index Number over Nine-Months')
    plt.ylabel ('Magnitude')
    plt.show()

In [132]:
for dataframe in all_earthquakes:
    plot = magnitude_over_time(dataframe)
#return plot



In [ ]:
plt.plot(fracking_data.Year, fracking_data['Gross Production'])
plt.xlabel('Year')
plt.ylabel('Gross Daily Production')

In [ ]:


In [134]:
def depth_over_time (data):
    plt.figure(figsize=(14,6))
    plt.bar(data.index, data.Depth)

    plt.title ('Depth of Earthquakes over Time')
    plt.xlabel ('Index Number over Nine-Months')
    plt.ylabel ('Depth')
    plt.show()

In [135]:
for dataframe in all_earthquakes:
    plot = depth_over_time(dataframe)



In [136]:
#create a scatter plot function that shows earthquakes on a 'map'. By entering different
#dataframes into the function line, it should be able to be used for all the data sets 
#I have with this project. As long as they all have the same set up! 

def scatter_plot(dataframe):
    
    x = (dataframe.Long)
    y = (dataframe.Lat)
    colors = 'c'
    area = (dataframe.Mag)*1000

    fig = plt.figure(figsize=(30,20))

    ax = fig.add_subplot(1,1,1)
    
    im = plt.imread('data/map_background.jpg')
    implot = plt.imshow(im, extent=[-128, -120, 53, 59])
                
    ax.scatter(x, y, s=area, c=colors)
    plt.xlim(-128, -120)
    plt.ylim(53, 59)

In [137]:
#This statement ensures that the bubbles in my plot will not have that much of a size difference because the difference
#between the magnitudes are pretty negligable. Instead, this gives an idea of the location of earthquakes because it is
#plotted 

np.std(df_all_earthquakes.Mag)


Out[137]:
0.624272566554045

In [138]:
for dataframe in all_earthquakes:
    map_plot = scatter_plot(dataframe)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-138-7f62ef30b4f8> in <module>()
      1 for dataframe in all_earthquakes:
----> 2     map_plot = scatter_plot(dataframe)
      3 

<ipython-input-136-6a8b6725e7eb> in scatter_plot(dataframe)
     21     plt.ylim(53, 59)
     22 
---> 23     plt.savefig()

/Users/Jessi/anaconda/lib/python3.4/site-packages/matplotlib/pyplot.py in savefig(*args, **kwargs)
    686 def savefig(*args, **kwargs):
    687     fig = gcf()
--> 688     res = fig.savefig(*args, **kwargs)
    689     fig.canvas.draw_idle()   # need this if 'transparent=True' to reset colors
    690     return res

/Users/Jessi/anaconda/lib/python3.4/site-packages/matplotlib/figure.py in savefig(self, *args, **kwargs)
   1537             self.set_frameon(frameon)
   1538 
-> 1539         self.canvas.print_figure(*args, **kwargs)
   1540 
   1541         if frameon:

TypeError: print_figure() missing 1 required positional argument: 'filename'

In [ ]:
plt.plot(fracking_data['Year'], fracking_data['Gross Production'])

In [ ]:
fig = plt.figure(figsize=(30,20))

ax = fig.add_subplot(fracking_data)
    
                
ax.plot(resampled_data.Date)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: