notebook.community

Edit and run



In [119]:

    
## import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
#from mpl_toolkits.basemap import Basemap

#stats libraries
import statsmodels.api as sm
import scipy.misc

#allows us to show plots in notebook
%matplotlib inline
#rcParams['figure.figsize'] = (8,6)



In [120]:

    
#figure out what kind of delimiter we have with my current data set for ease of data
#upload configurations

def detectDelimiter(csvFile):
    with open(csvFile, 'r') as myCsvfile:
        header=myCsvfile.readline()
        if header.find(";")!=-1:
            return ";"
        if header.find(",")!=-1:
            return ","
        if header.find("\s+"):
            return "\s+"
        if header.find(''):
            return ""
    #default delimiter (MS Office export)
    return "Other"



In [121]:

    
detectDelimiter('data/Earthquake_Datasets/EQCanOB_2004.csv')









    Out[121]:





','



In [ ]:



In [123]:

    
#created variables that it would be easy to call
filename_1990 = 'data/Earthquake_Datasets/EQCanOB_1990.csv'
filename_2004 = 'data/Earthquake_Datasets/EQCanOB_2004.csv'
filename_2006 = 'data/Earthquake_Datasets/EQCanOB_2006.csv'
filename_2008 = 'data/Earthquake_Datasets/EQCanOB_2008.csv'
filename_2010 = 'data/Earthquake_Datasets/EQCanOB_2010.csv'
filename_2012 = 'data/Earthquake_Datasets/EQCanOB_2012.csv'
fracking_company_data = 'data/fracking_company_data.csv'
filename_all = ''

earthquakes_1990 = pd.read_csv(filename_1990, sep = ',')
earthquakes_2004 = pd.read_csv(filename_2004, sep = ',')
earthquakes_2006 = pd.read_csv(filename_2006, sep = ',')
earthquakes_2008 = pd.read_csv(filename_2008, sep = ',')
earthquakes_2010 = pd.read_csv(filename_2010, sep = ',')
earthquakes_2012 = pd.read_csv(filename_2012, sep = ',')
fracking_data = pd.read_csv(fracking_company_data,sep = ',')

#create a dataframe of all earthquakes
df_all_earthquakes = pd.concat([earthquakes_1990, earthquakes_2004, earthquakes_2006, earthquakes_2008, earthquakes_2010, earthquakes_2012], ignore_index=True)
#create a list of all earthquakes
all_earthquakes = (earthquakes_1990, earthquakes_2004, earthquakes_2006, earthquakes_2008, earthquakes_2010, earthquakes_2012, df_all_earthquakes)

#create a group that is divided by years
df_all_earthquakes['Date']
year = pd.DatetimeIndex(df_all_earthquakes['Date']).year
resampled_data = df_all_earthquakes.groupby(year)



In [124]:

    
def date_time (data):
    data.ix[:,0] = pd.to_datetime(data.ix[:,0], )   
    return data



In [125]:

    
for dataframe in all_earthquakes:
    date_time (dataframe)



In [133]:

    
earthquakes_2004.head()



In [127]:

    
# make a new depth column with is only a number from Depth DF and then change
# the string to a float

def munge_depth (data):
    data.ix[:,4] = data.ix[:,4].str.extract('([0-9.]*)').replace('%','',regex=True).astype('float')
    return data



In [128]:

    
# make a new depth column with is only a number from Depth DF and then change
# the string to a float

def munge_mag (data):
    data.ix[:, 5] = data.ix[:,5].str.extract('([0-9.]*)').replace('%','',regex=True).astype('float')
    return data



In [ ]:



In [130]:

    
#loop munge data over all dataframes by calling functions
for dataframe in all_earthquakes:
    munge_depth(dataframe)
    munge_mag (dataframe)
    del dataframe['Time(UT)']



In [ ]:

    
df_all_earthquakes.head()



In [131]:

    
def magnitude_over_time (data):
    plt.figure(figsize=(14,6))
    plt.plot(data.index, data.Mag)

    plt.title ('Magnitude of Earthquakes over Time')
    plt.xlabel ('Index Number over Nine-Months')
    plt.ylabel ('Magnitude')
    plt.show()



In [132]:

    
for dataframe in all_earthquakes:
    plot = magnitude_over_time(dataframe)
#return plot



In [ ]:

    
plt.plot(fracking_data.Year, fracking_data['Gross Production'])
plt.xlabel('Year')
plt.ylabel('Gross Daily Production')



In [ ]:



In [134]:

    
def depth_over_time (data):
    plt.figure(figsize=(14,6))
    plt.bar(data.index, data.Depth)

    plt.title ('Depth of Earthquakes over Time')
    plt.xlabel ('Index Number over Nine-Months')
    plt.ylabel ('Depth')
    plt.show()



In [135]:

    
for dataframe in all_earthquakes:
    plot = depth_over_time(dataframe)



In [136]:

    
#create a scatter plot function that shows earthquakes on a 'map'. By entering different
#dataframes into the function line, it should be able to be used for all the data sets 
#I have with this project. As long as they all have the same set up! 

def scatter_plot(dataframe):
    
    x = (dataframe.Long)
    y = (dataframe.Lat)
    colors = 'c'
    area = (dataframe.Mag)*1000

    fig = plt.figure(figsize=(30,20))

    ax = fig.add_subplot(1,1,1)
    
    im = plt.imread('data/map_background.jpg')
    implot = plt.imshow(im, extent=[-128, -120, 53, 59])
                
    ax.scatter(x, y, s=area, c=colors)
    plt.xlim(-128, -120)
    plt.ylim(53, 59)



In [137]:

    
#This statement ensures that the bubbles in my plot will not have that much of a size difference because the difference
#between the magnitudes are pretty negligable. Instead, this gives an idea of the location of earthquakes because it is
#plotted 

np.std(df_all_earthquakes.Mag)









    Out[137]:





0.624272566554045



In [138]:

    
for dataframe in all_earthquakes:
    map_plot = scatter_plot(dataframe)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-138-7f62ef30b4f8> in <module>()
      1 for dataframe in all_earthquakes:
----> 2     map_plot = scatter_plot(dataframe)
      3 

<ipython-input-136-6a8b6725e7eb> in scatter_plot(dataframe)
     21     plt.ylim(53, 59)
     22 
---> 23     plt.savefig()

/Users/Jessi/anaconda/lib/python3.4/site-packages/matplotlib/pyplot.py in savefig(*args, **kwargs)
    686 def savefig(*args, **kwargs):
    687     fig = gcf()
--> 688     res = fig.savefig(*args, **kwargs)
    689     fig.canvas.draw_idle()   # need this if 'transparent=True' to reset colors
    690     return res

/Users/Jessi/anaconda/lib/python3.4/site-packages/matplotlib/figure.py in savefig(self, *args, **kwargs)
   1537             self.set_frameon(frameon)
   1538 
-> 1539         self.canvas.print_figure(*args, **kwargs)
   1540 
   1541         if frameon:

TypeError: print_figure() missing 1 required positional argument: 'filename'



In [ ]:

    
plt.plot(fracking_data['Year'], fracking_data['Gross Production'])



In [ ]:

    
fig = plt.figure(figsize=(30,20))

ax = fig.add_subplot(fracking_data)
    
                
ax.plot(resampled_data.Date)



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	Date	Lat	Long	Depth	Mag
0	2004-01-04	61.740	-127.653	1	2.0
1	2004-01-21	56.512	-122.268	10	2.4
2	2004-01-31	56.456	-122.301	25	2.4
3	2004-02-01	56.513	-122.278	10	1.8
4	2004-02-09	56.482	-122.167	20	2.0