In [183]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.formula.api as smf
import sklearn.linear_model
In [184]:
df = pd.read_csv("../final_projects/assets/ufo_reports.csv", sep='\t')
df=df[(~df.Long.isnull()) & (~df.Lat.isnull())]
df.columns
df.shape
Out[184]:
In [185]:
from datetime import datetime
def days_between(d1, d2):
d1 = datetime.strptime(d1, "%Y-%m-%d")
d2 = datetime.strptime(d2, "%Y-%m-%d")
return abs((d2 - d1).days)
In [186]:
features = ['Date','Lat', 'Long', 'City', 'State']
data = pd.DataFrame(df, columns=features)
data['Diff'] = data['Date'].apply(lambda x: days_between(str(x), '2014-01-01'))
data.head(2)
Out[186]:
In [187]:
df_single = df[df.Date=='2014-01-01']
plt.scatter(df_single['Lat'], df_single['Long'])
Out[187]:
In [195]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')
dataz = np.array(data['Diff'])
datax = np.array(data['Lat'])
datay = np.array(data['Long'])
for i in range(len(datax)-1):
ax.quiver3D(datax[i], datay[i], dataz[i],datax[i+1], datay[i+1], dataz[i+1])
plt.xlabel('Latitude')
plt.ylabel('Longitude')
ax.set_zlabel('Date')
Out[195]:
In [196]:
data=[]
for i in range(0,len(datax)):
data.append([datax[i], datay[i], dataz[i]])
data = np.array(data)
# Calculate the mean of the points, i.e. the 'center' of the cloud
datamean = data.mean(axis=0)
datamean[0] = int(datamean[0])
# Do an SVD on the mean-centered data.
uu, dd, vv = np.linalg.svd(data - datamean)
linepts = vv[0] * np.mgrid[-1000:2000:1][:, np.newaxis]
linepts += datamean
In [197]:
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as m3d
ax = m3d.Axes3D(plt.figure())
ax.scatter3D(*data.T)
ax.plot3D(*linepts.T, c='red', alpha=0.5)
ax.view_init(elev=10, azim=50)
plt.xlabel('date')
plt.ylabel('Longitude')
ax.set_zlabel('Latitude')
plt.show()
In [198]:
import numpy as np
#data
data=[]
for i in range(0,len(datax)):
data.append([dataz[i], datay[i], datax[i]])
data = np.array(data)
# Calculate the mean of the points, i.e. the 'center' of the cloud
datamean = data.mean(axis=0)
datamean[0] = int(datamean[0])
# Do an SVD on the mean-centered data.
uu, dd, vv = np.linalg.svd(data - datamean)
# Now vv[0] contains the first principal component, i.e. the direction
# vector of the 'best fit' line in the least squares sense.
# generate some points along this best fit line, for plotting.
#we want it to have mean 0 (like the points we did
# the svd on). Also, it's a straight line, so we only need 2 points.
linepts = vv[0] * np.mgrid[-1000:2000:1][:, np.newaxis]
# shift by the mean to get the line in the right place
linepts += datamean
In [199]:
print linepts[:,0]
In [200]:
for i in range(0,len(linepts)):
linepts[i,0]= int(linepts[i,0]+1)
print linepts[:,0]
In [201]:
# Verify that everything looks right.
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as m3d
ax = m3d.Axes3D(plt.figure())
ax.scatter3D(*data.T)
ax.plot3D(*linepts.T, c='red', alpha=0.5)
ax.view_init(elev=10, azim=50)
plt.xlabel('date')
plt.ylabel('Longitude')
ax.set_zlabel('Latitude')
plt.show()
In [122]:
print data[10,1]
In [125]:
array = linepts[linepts[:,0]==data[10,0]]
In [126]:
print array
In [128]:
print array[0,1]
In [215]:
import math
def error(data, line_model):
err = 0
l = len(data)
for i in range(0,l):
array = line_model[(line_model[:,0]== data[i,0])]
err += (data[i,1] - array[0,1]) ** 2 + (data[i,2] - array[0,2]) ** 2
err = math.sqrt(err)
return err/(data[:, 2].max()-data[:,2].min())
In [216]:
print days_between('2016-06-11','2014-01-01')
In [217]:
print 'eulicdean distance error', error(data, linepts)
In [84]:
from geopy.geocoders import ArcGIS
from geopy.exc import GeocoderTimedOut
In [85]:
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.reverse("38.95675005, -95.38474512")
print(location.address)
In [ ]:
In [ ]: