Using the PyCDS package as an interface to the Provincial Climate Data Set database


In [1]:
import datetime

from pycds import *
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import and_, or_

We need to do a few things to set up our database session, and then the session object is used to make all our queries


In [2]:
connection_string = 'postgresql+psycopg2://hiebert@atlas.pcic/crmp'
engine = create_engine(connection_string)
Session = sessionmaker(bind=engine)
session = Session()

Query and print the names of all networks in the database


In [3]:
for net, in session.query(Network.name).order_by(Network.name):
    print(net)


AGRI
ARD
ARDA
BCH
EC
EC_AHCCD
EC_Buoy
EC_raw
ENV-AQN
ENV-ASP
FLNRO-FERN
FLNRO-WMB
FRBC
MoTI
MoTIe
MoTIm
MVan
RTA

Print the first 10 stations in the EC network when ordered ascencding


In [4]:
for station in session.query(Station).filter(Network.name=='EC').order_by(Station.native_id.asc())[:10]:
    print(station.native_id)


0110031
0260011
0450270
0770703
0770705
0770708
08GA071
08GE003
08GF005
08GF006

Count the observations for station 1010066 in the EC network We must use explicit query joins here or sqlalchemy creates ridculous crossjoins and subqueries


In [5]:
query = session.query(Obs).join(History).join(Station).join(Network).filter(Network.name=='EC').filter(Station.native_id=='1010066')
print(query.count())


15857

Find all history_ids for station 1010066 in the EC network using filter criteria


In [6]:
for hist, in session.query(History.id).filter(History.station_id==Station.id).filter(and_(Station.native_id=='1010066', Network.name=='EC')):
    print(hist)


404

Find all history ids representing EC native_id 1010066 using explicit joins


In [7]:
for hist, in session.query(History.id).join(Station).join(Network).filter(Network.name=='EC').filter(Station.native_id=='1010066'):
    print(hist)


404

Count the number of observations from this year


In [8]:
this_year = datetime.datetime(2015, 1, 1)
q = session.query(Obs).filter(Obs.time >= this_year)
print(q.count())


13226315

You can access related objects through object attributes. For example, each Network object has a list of its stations in the stations attribute and a list of each available network variable in the variables object. These are Python native objects, all with the proper typing.


In [9]:
q = session.query(Network)
net = q.first()
net.name


Out[9]:
u'ENV-ASP'

In [10]:
net.stations


Out[10]:
[<pycds.Station at 0x7f6690bea710>,
 <pycds.Station at 0x7f6690bea8d0>,
 <pycds.Station at 0x7f6690bea790>,
 <pycds.Station at 0x7f6690bea9d0>,
 <pycds.Station at 0x7f6690bea210>,
 <pycds.Station at 0x7f6690bfc1d0>,
 <pycds.Station at 0x7f6690bfc710>,
 <pycds.Station at 0x7f6690bfc7d0>,
 <pycds.Station at 0x7f6690bfc890>,
 <pycds.Station at 0x7f6690bfc950>,
 <pycds.Station at 0x7f6690bfca10>,
 <pycds.Station at 0x7f6690bfcad0>,
 <pycds.Station at 0x7f6690bfcb90>,
 <pycds.Station at 0x7f6690bfcc50>,
 <pycds.Station at 0x7f6690bfcd10>,
 <pycds.Station at 0x7f6690bfcdd0>,
 <pycds.Station at 0x7f6690bfce90>,
 <pycds.Station at 0x7f6690bfcf50>,
 <pycds.Station at 0x7f6690b97050>,
 <pycds.Station at 0x7f6690b97110>,
 <pycds.Station at 0x7f6690b971d0>,
 <pycds.Station at 0x7f6690b97290>,
 <pycds.Station at 0x7f6690b97350>,
 <pycds.Station at 0x7f6690b97410>,
 <pycds.Station at 0x7f66a4070c10>,
 <pycds.Station at 0x7f66a5b97c50>,
 <pycds.Station at 0x7f6690b974d0>,
 <pycds.Station at 0x7f6690b97590>,
 <pycds.Station at 0x7f6690b97650>,
 <pycds.Station at 0x7f6690b97710>,
 <pycds.Station at 0x7f6690b977d0>,
 <pycds.Station at 0x7f6690b97890>,
 <pycds.Station at 0x7f6690b97950>,
 <pycds.Station at 0x7f6690b97a10>,
 <pycds.Station at 0x7f6690b97ad0>,
 <pycds.Station at 0x7f6690b97b90>,
 <pycds.Station at 0x7f6690b97c50>,
 <pycds.Station at 0x7f6690b97d10>,
 <pycds.Station at 0x7f6690b97dd0>,
 <pycds.Station at 0x7f6690b97e90>,
 <pycds.Station at 0x7f6690b97f50>,
 <pycds.Station at 0x7f6690b9b050>,
 <pycds.Station at 0x7f6690b9b110>,
 <pycds.Station at 0x7f6690b9b1d0>,
 <pycds.Station at 0x7f6690b9b290>,
 <pycds.Station at 0x7f6690b9b350>,
 <pycds.Station at 0x7f6690b9b410>,
 <pycds.Station at 0x7f6690b9b4d0>,
 <pycds.Station at 0x7f6690b9b590>,
 <pycds.Station at 0x7f6690b9b650>,
 <pycds.Station at 0x7f6690b9b710>,
 <pycds.Station at 0x7f6690b9b7d0>,
 <pycds.Station at 0x7f6690b9b890>,
 <pycds.Station at 0x7f6690b9b950>,
 <pycds.Station at 0x7f6690b9ba10>,
 <pycds.Station at 0x7f6690b9bad0>,
 <pycds.Station at 0x7f6690b9bb90>,
 <pycds.Station at 0x7f6690b9bc50>,
 <pycds.Station at 0x7f6690b9bd10>,
 <pycds.Station at 0x7f6690b9bdd0>,
 <pycds.Station at 0x7f6690b9be90>,
 <pycds.Station at 0x7f6690b9bf50>,
 <pycds.Station at 0x7f6690ba3050>,
 <pycds.Station at 0x7f6690ba3110>,
 <pycds.Station at 0x7f6690ba31d0>,
 <pycds.Station at 0x7f6690ba3290>,
 <pycds.Station at 0x7f6690ba3350>,
 <pycds.Station at 0x7f6690ba3410>,
 <pycds.Station at 0x7f6690ba34d0>,
 <pycds.Station at 0x7f6690ba3590>,
 <pycds.Station at 0x7f6690ba3650>]

In [11]:
for s in net.stations:
    print(s.id, s.native_id)


(2572, u'4B16P')
(2522, u'1C18P')
(2542, u'2A17P')
(2562, u'3B17P')
(2561, u'3A25P')
(2528, u'1D12P')
(2563, u'3B23P')
(2514, u'1A03P')
(2539, u'1F03P')
(2557, u'3A17P')
(2534, u'1E02P')
(2568, u'4A29P')
(2546, u'2B08P')
(2549, u'2C10P')
(2531, u'1D17P')
(2536, u'1E10P')
(2527, u'1D11P')
(2577, u'4C14P')
(2519, u'1B02P')
(2533, u'1D19P')
(2547, u'2C09P')
(2558, u'3A22P')
(2582, u'4D14P')
(2543, u'2A21P')
(2575, u'4C09P')
(2569, u'4A30P')
(2541, u'2A06P')
(2520, u'1B08P')
(2532, u'1D18P')
(2560, u'3A24P')
(2525, u'1D06P')
(2581, u'4D13P')
(2529, u'1D14P')
(2540, u'1F06P')
(2579, u'4D10P')
(2556, u'2G03P')
(2521, u'1C12P')
(2559, u'3A23P')
(2548, u'2C09Q')
(2545, u'2B06P')
(2513, u'1A02P')
(2518, u'1B01P')
(2517, u'1A19P')
(2516, u'1A17P')
(2576, u'4C11P')
(2538, u'1E14P')
(2512, u'1A01P')
(2566, u'4A09P')
(2565, u'4A02P')
(2552, u'2D14P')
(2515, u'1A14P')
(2574, u'4B18P')
(2550, u'2C14P')
(2554, u'2F05P')
(2573, u'4B17P')
(2526, u'1D09P')
(2535, u'1E08P')
(2570, u'4B12P')
(2551, u'2D08P')
(2555, u'2F18P')
(2567, u'4A27P')
(2523, u'1C20P')
(2578, u'4D08P')
(2537, u'1E11P')
(2524, u'1C41P')
(2571, u'4B15P')
(2544, u'2A30P')
(2580, u'4D11P')
(2553, u'2E07P')
(2564, u'3C08P')
(2530, u'1D15P')

Note that the station id, which is an integer in the database, is automatically an integer when we access it as well. Likewise for the native_id which are free form strings in the database and here in Python.


In [12]:
net.variables


Out[12]:
[<pycds.Variable at 0x7f6690ba7cd0>,
 <pycds.Variable at 0x7f6690ba7d50>,
 <pycds.Variable at 0x7f6690ba7dd0>,
 <pycds.Variable at 0x7f6690ba7e50>,
 <pycds.Variable at 0x7f6690ba7ed0>,
 <pycds.Variable at 0x7f6690ba7f90>,
 <pycds.Variable at 0x7f6690bb7090>,
 <pycds.Variable at 0x7f6690bb7150>,
 <pycds.Variable at 0x7f6690bb7210>,
 <pycds.Variable at 0x7f6690bb72d0>]

In [13]:
for v in net.variables:
    print(v.standard_name, v.unit)


(u'air_temperature', u'celsius')
(u'surface_snow_thickness', u'cm')
(u'thickness_of_snowfall_amount', u'cm')
(u'lwe_thickness_of_precipitation_amount', u'mm')
(u'air_temperature', u'celsius')
(u'thickness_of_rainfall_amount', u'mm')
(u'lwe_thickness_of_precipitation_amount', u'mm')
(u'air_temperature', u'celsius')
(u'air_temperature', u'celsius')
(u'air_temperature', u'celsius')

In [ ]: