In [ ]:
! module use /g/data3/hh5/public/modules
! module load conda/analysis27
In [2]:
from ARCCSSive import CMIP5
from ARCCSSive.CMIP5.Model import Instance
from ARCCSSive.CMIP5.other_functions import unique
db=CMIP5.connect()
Let's start from a simple query and see how we can use different operators to refine it.
In [ ]:
results=db.outputs(ensemble='r1i1p1',experiment='rcp45',mip='day')
results.count()
In [ ]:
miroc5=results.filter(Instance.model == 'MIROC5')
miroc5.count()
unique(miroc5,'model')
In [ ]:
not_miroc5=results.filter(Instance.model != 'MIROC5')
not_miroc5.count()
'MIROC5' in unique(not_miroc5,'model')
In [ ]:
miroc_models=results.filter(Instance.model.like('MIROC%'))
# miroc_models=results.filter(Instance.model.like('miroc%'))
miroc_models.count()
unique(miroc_models,'model')
In [ ]:
tasmin_tasmax=results.filter(Instance.variable.in_(['tasmin','tasmax']))
tasmin_tasmax.count()
unique(tasmin_tasmax,'variable')
In [ ]:
not_tasmin_tasmax=results.filter(~Instance.variable.in_(['tasmin','tasmax']))
not_tasmin_tasmax.count()
print(unique(not_tasmin_tasmax,'variable'))
The AND and OR operators need to be explicitly imported
In [ ]:
from sqlalchemy import and_, or_
In [ ]:
miroc5_tas=results.filter(and_(Instance.model == 'MIROC5',Instance.variable == 'tas'))
print( miroc5_tas.count() )
print( unique(miroc5_tas,'model') )
print( unique(miroc5_tas,'variable') )
Let's try using the same two constraints as consecutive filter calls.
In [ ]:
miroc5_tas=results.filter(Instance.model == 'MIROC5').filter(Instance.variable == 'tas')
print( miroc5_tas.count() )
print( unique(miroc5_tas,'model') )
print( unique(miroc5_tas,'variable') )
and_ returns the same results as using two filters one after the other.
Let's try to pass two constraints directly to the same filter call.
In [ ]:
miroc5_tas=results.filter(Instance.model == 'MIROC5', Instance.variable == 'tas')
print( miroc5_tas.count() )
print( unique(miroc5_tas,'model') )
print( unique(miroc5_tas,'variable') )
Again we're getting the same result as if we used and_.
In [ ]:
miroc5_or_clt=results.filter(or_(Instance.model == 'MIROC5', Instance.variable == 'clt'))
miroc5_or_clt.count()
for o in miroc5_or_clt:
print( o.model, o.variable )
Let's check exactly what the outputs() function returns
In [3]:
results=db.outputs(variable='tas',experiment='historical',mip='Amon',model='MIROC5')
print(type(results))
outputs( ) is a method of an ARCCSSive session which is actually using the SQLalchemy Session.query( ) method. For example in this case is equivalent to
db.query( )
results is a query object. This means that we haven't yet retrieve any actual value from the Instance table.
What SQLalchemy has done up to now is to generate an SQL query statement from our input arguments to pass
to the database.
The SQL statement is executed only when we explicitly retrieve the results to use them.
This why the query is always instantaneous
Try to run
results=db.output()
that effectivily retrieves the entire Istance table and see how long it takes.
In [4]:
results=db.outputs()
results.count()
Out[4]:
We can directly loop through the query object returned or we can use one of the methods that return a value, as: all( ), one( ) and first( ).
In [5]:
results=db.outputs(variable='tas',experiment='historical',mip='Amon',model='MIROC5').all()
print(type(results))
print( results )
If we specified all the 5 constraints we can pass to the outputs function we should always get only one row back, since you cannot have two rows sharing all these values.
In this case we can use the one( ) method to return that row.
In [12]:
result=db.outputs(variable='tas',experiment='historical',mip='Amon',model='MIROC5',ensemble='r1i1p1').one()
print(type(result))
Let'see what happens if you use one( ) with a query that returns multiple rows.
In [11]:
result=db.outputs(variable='tas',experiment='historical',mip='Amon',model='MIROC5').one()
This generates an error, so we should use only when we are expecting one row back or if we want to generate two different responses inc ase a query returns one or many rows.
If we have multiple rows returned by the query we use can use the method first() to get only the first result.
In [10]:
result=db.outputs(variable='tas',experiment='historical',mip='Amon',model='MIROC5').first()
print(type(result))
Another useful method of the query is order_by( ).
In [14]:
results=db.outputs(variable='tas',experiment='historical',mip='Amon',model='MIROC5').order_by(Instance.ensemble)
for o in results:
print(o.ensemble)
In [ ]: