In [1]:
import pandas as pd
import numpy as np
import tempfile
import csv
In [2]:
CSV_COLUMN_NAMES = ['OBSERVER', 'DATE', 'SITE', 'SPECIES', 'MEASUREMENT_TYPE', 'COUNTING',]
CSV_HEADER_ROW = 2
Remove blank lines from the CSV
In [3]:
tmp_csv = tempfile.TemporaryFile(mode='w+b')
writer = csv.writer(tmp_csv)
with open('fixtures/example_field_data.csv', 'rU') as csv_file:
for row in csv.reader(csv_file):
if any(field.strip() for field in row):
print('Writing: ' + str(row))
writer.writerow(row)
In [4]:
tmp_csv.seek(0)
df = pd.read_csv(tmp_csv, header=CSV_HEADER_ROW, index_col=[1], parse_dates=True, prefix='Measurement', na_values=[''])
In [5]:
df.index
Out[5]:
In [6]:
df.loc[:,['Observer']]
Out[6]:
In [7]:
np.unique(np.asarray(df.index))
Out[7]:
In [8]:
for name in np.unique(df.Site):
print(name)
In [275]:
for (i,r) in df.groupby([lambda x: x, lambda y: df.loc[y]['Site']]):
print len(r)
In [9]:
df.groupby([lambda x: x, lambda y: df.loc[y]['Site']]).size()
Out[9]:
In [21]:
df.iloc[0,5:].dropna()
Out[21]:
In [23]:
pd.Series([1,2,3], index=['species']*3)
Out[23]:
In [26]:
Out[26]: