In [90]:
import pandas as pd

In [91]:
pd.read_csv('data/jan1_7_09163500.txt', skiprows=26, delimiter='\t')


Out[91]:
5s 15s 20d 6s 14n 10s 14n.1 10s.1
0 USGS 9163500 2016-01-01 00:00 MST 3510 P 3.78 P
1 USGS 9163500 2016-01-01 00:15 MST 3510 P 3.78 P
2 USGS 9163500 2016-01-01 00:30 MST 3490 P 3.77 P
3 USGS 9163500 2016-01-01 00:45 MST 3490 P 3.77 P
4 USGS 9163500 2016-01-01 01:00 MST 3470 P 3.76 P
5 USGS 9163500 2016-01-01 01:15 MST 3460 P 3.75 P
6 USGS 9163500 2016-01-01 01:30 MST 3420 P 3.73 P
7 USGS 9163500 2016-01-01 01:45 MST 3410 P 3.72 P
8 USGS 9163500 2016-01-01 02:00 MST 3390 P 3.71 P
9 USGS 9163500 2016-01-01 02:15 MST 3390 P 3.70 P
10 USGS 9163500 2016-01-01 02:30 MST 3360 P 3.68 P
11 USGS 9163500 2016-01-01 02:45 MST 3340 P 3.67 P
12 USGS 9163500 2016-01-01 03:00 MST 3340 P 3.67 P
13 USGS 9163500 2016-01-01 03:15 MST 3320 P 3.66 P
14 USGS 9163500 2016-01-01 03:30 MST 3310 P 3.65 P
15 USGS 9163500 2016-01-01 03:45 MST 3310 P 3.65 P
16 USGS 9163500 2016-01-01 04:00 MST 3290 P 3.64 P
17 USGS 9163500 2016-01-01 04:15 MST 3270 P 3.63 P
18 USGS 9163500 2016-01-01 04:30 MST 3270 P 3.63 P
19 USGS 9163500 2016-01-01 04:45 MST 3260 P 3.62 P
20 USGS 9163500 2016-01-01 05:00 MST 3260 P 3.62 P
21 USGS 9163500 2016-01-01 05:15 MST 3230 P 3.60 P
22 USGS 9163500 2016-01-01 05:30 MST 3240 P 3.61 P
23 USGS 9163500 2016-01-01 05:45 MST 3230 P 3.60 P
24 USGS 9163500 2016-01-01 06:00 MST 3230 P 3.60 P
25 USGS 9163500 2016-01-01 06:15 MST 3230 P 3.59 P
26 USGS 9163500 2016-01-01 06:30 MST 3210 P 3.58 P
27 USGS 9163500 2016-01-01 06:45 MST 3210 P 3.58 P
28 USGS 9163500 2016-01-01 07:00 MST 3190 P 3.57 P
29 USGS 9163500 2016-01-01 07:15 MST 3160 P 3.55 P
... ... ... ... ... ... ... ... ...
642 USGS 9163500 2016-01-07 16:30 MST 3790 P 3.96 P
643 USGS 9163500 2016-01-07 16:45 MST 3770 P 3.95 P
644 USGS 9163500 2016-01-07 17:00 MST 3790 P 3.96 P
645 USGS 9163500 2016-01-07 17:15 MST 3770 P 3.95 P
646 USGS 9163500 2016-01-07 17:30 MST 3790 P 3.96 P
647 USGS 9163500 2016-01-07 17:45 MST 3770 P 3.95 P
648 USGS 9163500 2016-01-07 18:00 MST 3770 P 3.95 P
649 USGS 9163500 2016-01-07 18:15 MST 3770 P 3.95 P
650 USGS 9163500 2016-01-07 18:30 MST 3790 P 3.96 P
651 USGS 9163500 2016-01-07 18:45 MST 3770 P 3.95 P
652 USGS 9163500 2016-01-07 19:00 MST 3770 P 3.95 P
653 USGS 9163500 2016-01-07 19:15 MST 3790 P 3.96 P
654 USGS 9163500 2016-01-07 19:30 MST 3770 P 3.95 P
655 USGS 9163500 2016-01-07 19:45 MST 3770 P 3.95 P
656 USGS 9163500 2016-01-07 20:00 MST 3770 P 3.95 P
657 USGS 9163500 2016-01-07 20:15 MST 3770 P 3.95 P
658 USGS 9163500 2016-01-07 20:30 MST 3790 P 3.96 P
659 USGS 9163500 2016-01-07 20:45 MST 3790 P 3.96 P
660 USGS 9163500 2016-01-07 21:00 MST 3770 P 3.95 P
661 USGS 9163500 2016-01-07 21:15 MST 3790 P 3.96 P
662 USGS 9163500 2016-01-07 21:30 MST 3770 P 3.95 P
663 USGS 9163500 2016-01-07 21:45 MST 3770 P 3.95 P
664 USGS 9163500 2016-01-07 22:00 MST 3770 P 3.95 P
665 USGS 9163500 2016-01-07 22:15 MST 3790 P 3.96 P
666 USGS 9163500 2016-01-07 22:30 MST 3770 P 3.95 P
667 USGS 9163500 2016-01-07 22:45 MST 3770 P 3.95 P
668 USGS 9163500 2016-01-07 23:00 MST 3770 P 3.95 P
669 USGS 9163500 2016-01-07 23:15 MST 3770 P 3.95 P
670 USGS 9163500 2016-01-07 23:30 MST 3770 P 3.95 P
671 USGS 9163500 2016-01-07 23:45 MST 3770 P 3.95 P

672 rows × 8 columns


In [92]:
data = pd.read_csv('data/jan1_7_09163500.txt', skiprows=26, delimiter='\t')

In [93]:
data.columns


Out[93]:
array(['5s', '15s', '20d', '6s', '14n', '10s', '14n.1', '10s.1'], dtype=object)

In [139]:
# this is a list

column_names = ['agency','station_number','date_time','timezone','discharge','d_status','stage','s_status']

In [95]:
data.columns = column_names

In [96]:
data.head()


Out[96]:
agency station_number date_time timezone discharge d_status stage s_status
0 USGS 9163500 2016-01-01 00:00 MST 3510 P 3.78 P
1 USGS 9163500 2016-01-01 00:15 MST 3510 P 3.78 P
2 USGS 9163500 2016-01-01 00:30 MST 3490 P 3.77 P
3 USGS 9163500 2016-01-01 00:45 MST 3490 P 3.77 P
4 USGS 9163500 2016-01-01 01:00 MST 3470 P 3.76 P

In [97]:
%matplotlib inline
from matplotlib import pyplot as plt

In [98]:
data['discharge'].plot()


Out[98]:
<matplotlib.axes._subplots.AxesSubplot at 0x10722e510>

In [99]:
plt.plot(data['discharge'], data['stage'], 'k.')


Out[99]:
[<matplotlib.lines.Line2D at 0x10781ba50>]

In [100]:
data['stage_meters'] = data['stage'] * 0.3048

In [101]:
data.head()


Out[101]:
agency station_number date_time timezone discharge d_status stage s_status stage_meters
0 USGS 9163500 2016-01-01 00:00 MST 3510 P 3.78 P 1.152144
1 USGS 9163500 2016-01-01 00:15 MST 3510 P 3.78 P 1.152144
2 USGS 9163500 2016-01-01 00:30 MST 3490 P 3.77 P 1.149096
3 USGS 9163500 2016-01-01 00:45 MST 3490 P 3.77 P 1.149096
4 USGS 9163500 2016-01-01 01:00 MST 3470 P 3.76 P 1.146048

In [102]:
column_names =['agency','station_number','date_time','timezone','discharge','d_status','stage','stage_meters','s_status']

In [103]:
data = data[column_names]

In [104]:
data.head()


Out[104]:
agency station_number date_time timezone discharge d_status stage stage_meters s_status
0 USGS 9163500 2016-01-01 00:00 MST 3510 P 3.78 1.152144 P
1 USGS 9163500 2016-01-01 00:15 MST 3510 P 3.78 1.152144 P
2 USGS 9163500 2016-01-01 00:30 MST 3490 P 3.77 1.149096 P
3 USGS 9163500 2016-01-01 00:45 MST 3490 P 3.77 1.149096 P
4 USGS 9163500 2016-01-01 01:00 MST 3470 P 3.76 1.146048 P

In [105]:
del data['s_status']
del data['d_status']
del data['agency']

data.head()


Out[105]:
station_number date_time timezone discharge stage stage_meters
0 9163500 2016-01-01 00:00 MST 3510 3.78 1.152144
1 9163500 2016-01-01 00:15 MST 3510 3.78 1.152144
2 9163500 2016-01-01 00:30 MST 3490 3.77 1.149096
3 9163500 2016-01-01 00:45 MST 3490 3.77 1.149096
4 9163500 2016-01-01 01:00 MST 3470 3.76 1.146048

challenge

  1. Make a column for discharge in cubic meters per second [one cubic foot = 0.3048**3 cubic meters]

  2. Reorder the columns so the last four are discharge, discharge_meters, stage, stage_meters


In [162]:
# TALK HERE ABOUT INTEGER MULTIPLICATION

In [119]:
data['date_time'] = pd.to_datetime(data['date_time'])

In [143]:
data.plot(x = 'date_time', y = 'discharge')


Out[143]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b111ed0>

In [128]:
data.index = data['date_time']

In [138]:
fig, ax1 = plt.subplots()

ax2 = ax1.twinx()
ax1.plot(data['discharge'], 'g-')
ax2.plot(data['stage'], 'b-')

ax1.set_xlabel('date')
ax1.set_ylabel('discharge', color='g')
ax2.set_ylabel('stage', color='b')

plt.show()



In [163]:
# pull more data using web services

In [255]:
url = 'http://nwis.waterservices.usgs.gov/nwis/iv/?format=rdb&sites=06730200&startDT=2013-09-08&endDT=2013-09-14&parameterCd=00060'

data2 = pd.read_csv(url, comment='#', delimiter='\t', header=1)

In [256]:
data2.dtypes


Out[256]:
5s     object
15s     int64
20d    object
6s     object
14n     int64
10s    object
dtype: object

In [257]:
data2.head()


Out[257]:
5s 15s 20d 6s 14n 10s
0 USGS 6730200 2013-09-08 00:00 MDT 37 A
1 USGS 6730200 2013-09-08 00:15 MDT 37 A
2 USGS 6730200 2013-09-08 00:30 MDT 38 A
3 USGS 6730200 2013-09-08 00:45 MDT 38 A
4 USGS 6730200 2013-09-08 01:00 MDT 38 A

In [ ]: