In [1]:
%matplotlib inline
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print sys.version
print "Pandas:", pd.__version__
In [2]:
cover = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/probably bad/cover.csv')
transect = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/probably bad/cover.csv')
photo = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/probably bad/photo.csv')
food = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/probably bad/food_analysis.csv')
infiltration = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/probably bad/infiltration.csv')
In [16]:
analysis = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/probably bad/analysis.csv')
In [6]:
comb = [cover, transect, photo, infiltration, food]
combined = pd.concat(comb)
In [5]:
allobs1 = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/allobsOct23.csv')
In [8]:
allcols = allobs1.columns.tolist()
combined['filename'] = ' '
combcols = combined.columns.tolist()
In [10]:
print allcols
print combcols
In [11]:
combined = combined[allcols]
In [12]:
combined.to_csv('/Users/Peter/Documents/scc/challenge/PSQL/combined.csv')
In [30]:
allobs.obs_type.unique()
Out[30]:
In [37]:
combined.count()
Out[37]:
In [51]:
combined.to_csv = ('/Users/Peter/Documents/scc/challenge/PSQL/combined.csv')
In [50]:
combined.describe()
Out[50]:
In [ ]:
In [2]:
df = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/allobsOct23.csv', parse_dates=['date'])
In [3]:
df.describe()
Out[3]:
In [134]:
#decompose URLs by / and get last element (filename) with spaces replaced by hyphens
df.filename = df.url.str.split('/').str[-1:].apply(str)
df.filename = df.filename.str.replace(' ','-').str.replace('[','').str.replace(']','').str.replace("'","")
df.filename.replace('-',np.nan, inplace=True)
df.filename[7]
Out[134]:
In [135]:
df.dirs = df.url.str.split('/').str[-2:-1].apply(str)
In [137]:
df.dirs.to_csv('/Users/Peter/Documents/scc/challenge/PSQL/dirs.csv')
In [122]:
empties = df[df.filename.isnull()]
empties.describe()
Out[122]:
In [14]:
df.size()
Out[14]:
In [ ]:
In [15]:
df = pd.read_csv('/Users/Peter/Documents/scc/challenge/PSQL/analysis.csv', parse_dates=['date'])
df = df.replace(np.nan,' ', regex=True)#MUST REPLACE NAN AS NAN IS NOT ITERABLE
df.label4.unique()
Out[15]:
In [16]:
df.label3.unique()
Out[16]:
In [22]:
groups = df.groupby('obs_type')
groups.size()
Out[22]:
In [45]:
groups
Out[45]: