In [119]:
## import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
#from mpl_toolkits.basemap import Basemap
#stats libraries
import statsmodels.api as sm
import scipy.misc
#allows us to show plots in notebook
%matplotlib inline
#rcParams['figure.figsize'] = (8,6)
In [120]:
#figure out what kind of delimiter we have with my current data set for ease of data
#upload configurations
def detectDelimiter(csvFile):
with open(csvFile, 'r') as myCsvfile:
header=myCsvfile.readline()
if header.find(";")!=-1:
return ";"
if header.find(",")!=-1:
return ","
if header.find("\s+"):
return "\s+"
if header.find(''):
return ""
#default delimiter (MS Office export)
return "Other"
In [121]:
detectDelimiter('data/Earthquake_Datasets/EQCanOB_2004.csv')
Out[121]:
In [ ]:
In [123]:
#created variables that it would be easy to call
filename_1990 = 'data/Earthquake_Datasets/EQCanOB_1990.csv'
filename_2004 = 'data/Earthquake_Datasets/EQCanOB_2004.csv'
filename_2006 = 'data/Earthquake_Datasets/EQCanOB_2006.csv'
filename_2008 = 'data/Earthquake_Datasets/EQCanOB_2008.csv'
filename_2010 = 'data/Earthquake_Datasets/EQCanOB_2010.csv'
filename_2012 = 'data/Earthquake_Datasets/EQCanOB_2012.csv'
fracking_company_data = 'data/fracking_company_data.csv'
filename_all = ''
earthquakes_1990 = pd.read_csv(filename_1990, sep = ',')
earthquakes_2004 = pd.read_csv(filename_2004, sep = ',')
earthquakes_2006 = pd.read_csv(filename_2006, sep = ',')
earthquakes_2008 = pd.read_csv(filename_2008, sep = ',')
earthquakes_2010 = pd.read_csv(filename_2010, sep = ',')
earthquakes_2012 = pd.read_csv(filename_2012, sep = ',')
fracking_data = pd.read_csv(fracking_company_data,sep = ',')
#create a dataframe of all earthquakes
df_all_earthquakes = pd.concat([earthquakes_1990, earthquakes_2004, earthquakes_2006, earthquakes_2008, earthquakes_2010, earthquakes_2012], ignore_index=True)
#create a list of all earthquakes
all_earthquakes = (earthquakes_1990, earthquakes_2004, earthquakes_2006, earthquakes_2008, earthquakes_2010, earthquakes_2012, df_all_earthquakes)
#create a group that is divided by years
df_all_earthquakes['Date']
year = pd.DatetimeIndex(df_all_earthquakes['Date']).year
resampled_data = df_all_earthquakes.groupby(year)
In [124]:
def date_time (data):
data.ix[:,0] = pd.to_datetime(data.ix[:,0], )
return data
In [125]:
for dataframe in all_earthquakes:
date_time (dataframe)
In [133]:
earthquakes_2004.head()
Out[133]:
In [127]:
# make a new depth column with is only a number from Depth DF and then change
# the string to a float
def munge_depth (data):
data.ix[:,4] = data.ix[:,4].str.extract('([0-9.]*)').replace('%','',regex=True).astype('float')
return data
In [128]:
# make a new depth column with is only a number from Depth DF and then change
# the string to a float
def munge_mag (data):
data.ix[:, 5] = data.ix[:,5].str.extract('([0-9.]*)').replace('%','',regex=True).astype('float')
return data
In [ ]:
In [130]:
#loop munge data over all dataframes by calling functions
for dataframe in all_earthquakes:
munge_depth(dataframe)
munge_mag (dataframe)
del dataframe['Time(UT)']
In [ ]:
df_all_earthquakes.head()
In [131]:
def magnitude_over_time (data):
plt.figure(figsize=(14,6))
plt.plot(data.index, data.Mag)
plt.title ('Magnitude of Earthquakes over Time')
plt.xlabel ('Index Number over Nine-Months')
plt.ylabel ('Magnitude')
plt.show()
In [132]:
for dataframe in all_earthquakes:
plot = magnitude_over_time(dataframe)
#return plot
In [ ]:
plt.plot(fracking_data.Year, fracking_data['Gross Production'])
plt.xlabel('Year')
plt.ylabel('Gross Daily Production')
In [ ]:
In [134]:
def depth_over_time (data):
plt.figure(figsize=(14,6))
plt.bar(data.index, data.Depth)
plt.title ('Depth of Earthquakes over Time')
plt.xlabel ('Index Number over Nine-Months')
plt.ylabel ('Depth')
plt.show()
In [135]:
for dataframe in all_earthquakes:
plot = depth_over_time(dataframe)
In [136]:
#create a scatter plot function that shows earthquakes on a 'map'. By entering different
#dataframes into the function line, it should be able to be used for all the data sets
#I have with this project. As long as they all have the same set up!
def scatter_plot(dataframe):
x = (dataframe.Long)
y = (dataframe.Lat)
colors = 'c'
area = (dataframe.Mag)*1000
fig = plt.figure(figsize=(30,20))
ax = fig.add_subplot(1,1,1)
im = plt.imread('data/map_background.jpg')
implot = plt.imshow(im, extent=[-128, -120, 53, 59])
ax.scatter(x, y, s=area, c=colors)
plt.xlim(-128, -120)
plt.ylim(53, 59)
In [137]:
#This statement ensures that the bubbles in my plot will not have that much of a size difference because the difference
#between the magnitudes are pretty negligable. Instead, this gives an idea of the location of earthquakes because it is
#plotted
np.std(df_all_earthquakes.Mag)
Out[137]:
In [138]:
for dataframe in all_earthquakes:
map_plot = scatter_plot(dataframe)
In [ ]:
plt.plot(fracking_data['Year'], fracking_data['Gross Production'])
In [ ]:
fig = plt.figure(figsize=(30,20))
ax = fig.add_subplot(fracking_data)
ax.plot(resampled_data.Date)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: