In [9]:
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
import math
import geopandas as gpd
import pandas as pd
import random

In [12]:
#input csv
input_file = "data575update.csv"

# comma delimited is the default
df = pd.read_csv("data575update.csv", header = 0)

# put the original column names in a python list
original_headers = list(df.columns.values)
# remove the non-numeric columns
# df = df._get_numeric_data() #select columns :['Unnamed: 0', 'id', 'latitude', 'longitude', 'photos count', 'views'] 
# attributes = df[['id','longitude','latitude','views']] # select columns to be part of featu
# print (type(attributes))
# print (attributes)


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-12-fbb94d0ec08c> in <module>
      3 
      4 # comma delimited is the default
----> 5 df = pd.read_csv("data575update.csv", header = 0)
      6 
      7 # put the original column names in a python list

~\Anaconda3\envs\GEOG573\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    674         )
    675 
--> 676         return _read(filepath_or_buffer, kwds)
    677 
    678     parser_f.__name__ = name

~\Anaconda3\envs\GEOG573\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    446 
    447     # Create the parser.
--> 448     parser = TextFileReader(fp_or_buf, **kwds)
    449 
    450     if chunksize or iterator:

~\Anaconda3\envs\GEOG573\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds)
    878             self.options["has_index_names"] = kwds["has_index_names"]
    879 
--> 880         self._make_engine(self.engine)
    881 
    882     def close(self):

~\Anaconda3\envs\GEOG573\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine)
   1112     def _make_engine(self, engine="c"):
   1113         if engine == "c":
-> 1114             self._engine = CParserWrapper(self.f, **self.options)
   1115         else:
   1116             if engine == "python":

~\Anaconda3\envs\GEOG573\lib\site-packages\pandas\io\parsers.py in __init__(self, src, **kwds)
   1889         kwds["usecols"] = self.usecols
   1890 
-> 1891         self._reader = parsers.TextReader(src, **kwds)
   1892         self.unnamed_cols = self._reader.unnamed_cols
   1893 

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File data575update.csv does not exist: 'data575update.csv'

In [ ]:
# Compute DBSCAN
model = DBSCAN(eps=1, min_samples=25) #set the radius ~= 100km (eps=1), and there are 5 obvious clusters distributed in the US
attributes = df[['longitude','latitude']] # select spatial columns (2) to be part of features
db=model.fit(attributes)
print(db.labels_) #Cluster labels for each point in the dataset given to fit(). Noisy samples are given the label -1.

#Visualise the classifier results
plt.figure(figsize=(20,7))# Set the size of the figure
colormap = np.array(['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22'])# Create a colormap
plt.scatter(attributes.longitude, attributes.latitude, c=colormap[db.labels_], s=20)
plt.title('DBSCAN Clustering')