In [2]:
# Import a bunch of python sub packages
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import xray
import xray.ufuncs as xu
import dask
import seaborn as sn
from datetime import datetime
from dask.diagnostics import ProgressBar
import warnings
import os

warnings.filterwarnings('ignore')

In [6]:
# Create a file list of all the netCDF files
import glob
fileList = glob.glob('/Users/carina/desktop/WRF_data/*.nc')
fileList


Out[6]:
['/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Mo1_2013-11.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-07.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-08.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-09.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-10.nc']

In [28]:
# This cell converts concatenates the netCDF files and creates new ones that are better organized

# Create a function that will take the way the time is stored as a variable in the 
# WRF netCDF files and decode it
# Build a function to go from numpy.bytes_ to a string - WRF files were compressed
def decode(d):
    decoded = datetime.strptime(d.decode(encoding='UTF-8'),  "%Y-%m-%d_%H:%M:%S")
    return decoded

first = True # create a boolean for the loop below


# Stores the creates a new netCDF files with coordinat
for file in fileList:
    if first:      
        ds = xray.open_dataset(file , engine = 'netcdf4') # may not need enginer = 'netcdf4' others are using engine = 'scipy'
        dates = ds.Times.to_series().apply(decode) # this creates a list of the varaibles Times in the netCDF file ds and then applies the decode function to each element
        dsTotal = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
                      'temp2m':(['time','x','y'], ds.t2.values)},
                       coords = {'longitude': (['x','y'], ds.lon.values),
                              'latitude': (['x','y'], ds.lat.values),
                              'time': xray.DataArray.from_series(dates).values})
                # first the prec and temp2m variables are created
                # then we add coordinates
        
    
        first = False # now that new 'clean' netCDF file is created switch the boolean operator so the below is run and we finally concatenate
    
    # the rest does the same as above but concatenates the end
    else:
        ds = xray.open_dataset(file, engine = 'netcdf4' )
        dates = ds.Times.to_series().apply(decode)
        dsTemp = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
                      'temp2m':(['time','x','y'], ds.t2.values)},
                       coords = {'longitude': (['x','y'], ds.lon.values),
                              'latitude': (['x','y'], ds.lat.values),
                              'time': xray.DataArray.from_series(dates).values})
        
        dsTotal = xray.concat([dsTotal, dsTemp], dim = 'time') # this is concatenating

        
# add attributes to the netCDF
dsTotal.attrs['prec'] = 'Precipitation Hourly [mm]'
dsTotal.attrs['temp2m'] = 'Two Meter Temperature [deg K]'

In [8]:
dsTotal


Out[8]:
<xray.Dataset>
Dimensions:    (time: 3672, x: 180, y: 150)
Coordinates:
    latitude   (x, y) float32 32.5872 32.588 32.5887 32.5894 32.5901 32.5907 ...
    longitude  (x, y) float32 -125.395 -125.331 -125.267 -125.203 -125.139 ...
  * x          (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ...
  * y          (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ...
  * time       (time) datetime64[ns] 2013-11-01 2013-11-01T01:00:00 ...
Data variables:
    temp2m     (time, x, y) float32 290.044 290.06 290.073 290.095 290.122 ...
    prec       (time, x, y) float32 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...
Attributes:
    prec: Precipitation Hourly [mm]
    temp2m: Two Meter Temperature [deg K]

In [9]:
%matplotlib notebook 
# this is needed before plotting in python3
dsTotal.temp2mC=dsTotal.temp2m-273.15 # convert temperature to deg C
ds_by_month = dsTotal.temp2mC.groupby('time.month').mean('time') # group temperatures by month
ds_by_month[1].plot() # plot mean August temperatures


Out[9]:
<matplotlib.collections.QuadMesh at 0x130798080>

In [29]:
%matplotlib notebook 
xycord = np.where(dsTotal.coords['longitude'].values == -125.07546997) # similar to MATLAB's 'find' function. Insert longitude which is unique due to the warped nature of the WRF grid and find the indices assosciated with it
xcord = xycord[0][0] # find xcord from the tuple
ycord = xycord[1][0]
dsTotal.temp2m.isel(x = xcord, y = ycord).plot() # select the coordiante location and plot a time-series of temperature
# note use isel when grouping by index values and sel when grouping by times or lat and long values


Out[29]:
[<matplotlib.lines.Line2D at 0x118c95208>]

In [12]:
%matplotlib notebook 
DataArray = dsTotal.temp2m.sel(time = '2013-07-02T02:00:00')
DataArray.plot(levels=10)


Out[12]:
<matplotlib.collections.QuadMesh at 0x1317b1860>

In [13]:
%matplotlib notebook 
dsTotal.temp2m[:,0,0].plot() # a simple way to plot a time-series if you know the indices


Out[13]:
[<matplotlib.lines.Line2D at 0x107369390>]

In [14]:
#this is for testing, because we dont have the final list of WRF nodes that are within the watershed - this reads the latof the top 9 nodes
shortLongList = dsTotal.coords['longitude'].values[0,0:9]
shortLongList


Out[14]:
array([-125.39456177, -125.33074951, -125.266922  , -125.20309448,
       -125.13928223, -125.07546997, -125.01165771, -124.9478302 ,
       -124.88400269], dtype=float32)

In [15]:
#this loops through the lon list and back calculates the x and y indices needed to plot or extract data
xcord = []
ycord = []

#this is not working yet because the list of lat - long is old from old WRF runs - need to identify the nodes in my watershed
#need to update the csv file
for Lon in shortLongList:
    xycord = np.where(dsTotal.coords['longitude'].values == Lon) # similar to MATLAB's 'find' function. Insert longitude which is unique due to the warped nature of the WRF grid and find the indices assosciated with it
    xcord.append(xycord[0][0]) # find xcord from the tuple
    ycord.append(xycord[1][0])
    
ycord


Out[15]:
[0, 1, 2, 3, 4, 5, 6, 7, 8]

In [16]:
#this is an example of seleting temperature values at the 9 points
selectTemp = dsTotal.temp2m.sel_points(x = xcord, y = ycord)
selectTemp


Out[16]:
<xray.DataArray 'temp2m' (points: 9, time: 3672)>
array([[ 290.04446411,  289.70251465,  288.94180298, ...,  290.21450806,
         290.23049927,  290.18441772],
       [ 290.05984497,  289.74081421,  289.04653931, ...,  290.20788574,
         290.22393799,  290.18963623],
       [ 290.07324219,  289.78927612,  289.16171265, ...,  290.18011475,
         290.20965576,  290.1882019 ],
       ..., 
       [ 290.1918335 ,  290.02316284,  289.73822021, ...,  290.11224365,
         290.20162964,  290.24057007],
       [ 290.22180176,  290.06787109,  289.85028076, ...,  290.10317993,
         290.20736694,  290.25274658],
       [ 290.20632935,  290.07546997,  289.90557861, ...,  290.05148315,
         290.17553711,  290.23208618]], dtype=float32)
Coordinates:
  * time       (time) datetime64[ns] 2013-11-01 2013-11-01T01:00:00 ...
    x          (points) int64 0 0 0 0 0 0 0 0 0
    longitude  (points) float32 -125.395 -125.331 -125.267 -125.203 -125.139 ...
    latitude   (points) float32 32.5872 32.588 32.5887 32.5894 32.5901 ...
    y          (points) int64 0 1 2 3 4 5 6 7 8
  * points     (points) int64 0 1 2 3 4 5 6 7 8

In [18]:
final_nine_nodes = selectTemp.to_series() #converts xray to pandas timeseries
final_nine_nodes


Out[18]:
points  time               
0       2013-11-01 00:00:00    290.044464
        2013-11-01 01:00:00    289.702515
        2013-11-01 02:00:00    288.941803
        2013-11-01 03:00:00    288.039276
        2013-11-01 04:00:00    287.661987
        2013-11-01 05:00:00    287.530396
        2013-11-01 06:00:00    287.737671
        2013-11-01 07:00:00    288.157562
        2013-11-01 08:00:00    288.202271
        2013-11-01 09:00:00    288.219818
        2013-11-01 10:00:00    288.305359
        2013-11-01 11:00:00    288.548401
        2013-11-01 12:00:00    288.868408
        2013-11-01 13:00:00    289.116241
        2013-11-01 14:00:00    289.265320
        2013-11-01 15:00:00    289.324005
        2013-11-01 16:00:00    289.438904
        2013-11-01 17:00:00    289.312561
        2013-11-01 18:00:00    289.134155
        2013-11-01 19:00:00    289.073181
        2013-11-01 20:00:00    289.074066
        2013-11-01 21:00:00    289.036316
        2013-11-01 22:00:00    288.975067
        2013-11-01 23:00:00    288.915375
        2013-11-02 00:00:00    288.908936
        2013-11-02 01:00:00    288.915924
        2013-11-02 02:00:00    288.911865
        2013-11-02 03:00:00    288.856079
        2013-11-02 04:00:00    288.753479
        2013-11-02 05:00:00    288.736816
                                  ...    
8       2013-10-30 18:00:00    290.413757
        2013-10-30 19:00:00    290.443542
        2013-10-30 20:00:00    290.483337
        2013-10-30 21:00:00    290.479248
        2013-10-30 22:00:00    290.441528
        2013-10-30 23:00:00    290.373566
        2013-10-31 00:00:00    290.320038
        2013-10-31 01:00:00    290.271240
        2013-10-31 02:00:00    290.195831
        2013-10-31 03:00:00    290.088623
        2013-10-31 04:00:00    289.974579
        2013-10-31 05:00:00    289.928070
        2013-10-31 06:00:00    289.935699
        2013-10-31 07:00:00    289.993713
        2013-10-31 08:00:00    290.028931
        2013-10-31 09:00:00    290.028015
        2013-10-31 10:00:00    290.005096
        2013-10-31 11:00:00    289.981262
        2013-10-31 12:00:00    289.969025
        2013-10-31 13:00:00    289.974701
        2013-10-31 14:00:00    290.025177
        2013-10-31 15:00:00    289.966400
        2013-10-31 16:00:00    289.864868
        2013-10-31 17:00:00    289.836761
        2013-10-31 18:00:00    289.837006
        2013-10-31 19:00:00    289.849823
        2013-10-31 20:00:00    289.929779
        2013-10-31 21:00:00    290.051483
        2013-10-31 22:00:00    290.175537
        2013-10-31 23:00:00    290.232086
Name: temp2m, dtype: float32

In [19]:
final_nine_nodes.to_csv('temp_nine_nodes.csv') #this will save data in cvs format

In [20]:
# now we want to read all the files and concatenate by model run
import glob
fileList = glob.glob('/Users/carina/desktop/WRF_data/*.nc')
fileList


Out[20]:
['/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Mo1_2013-11.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-07.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-08.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-09.nc',
 '/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-10.nc']

In [31]:
#this will loop through the files and extract the ones for a particular model run, will rearrange the variables and add attributes to variables 
#of inteest, in this case precipitation and temperature

fileFinder = ['NARR', 'Morr']

def decode(d):
    decoded = datetime.strptime(d.decode(encoding='UTF-8'),  "%Y-%m-%d_%H:%M:%S")
    return decoded

first = True # create a boolean for the loop below

files_read = set()

for file in fileList:
    for model in fileFinder:
        if file.find(model) != -1:
                print('Model: {}'.format(model))
                print('File: "{}"'.format(file))
                if file in files_read:
                    print('Ignoring file "{}"'.format(file))
                    break # break out of the model for loop as we already processed the file
                files_read.add(file)
                if first:      
                    ds = xray.open_dataset(file , engine = 'netcdf4') # may not need enginer = 'netcdf4' others are using engine = 'scipy'
                    dates = ds.Times.to_series().apply(decode) # this creates a list of the varaibles Times in the netCDF file ds and then applies the decode function to each element
                    dsTotal = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
                                  'temp2m':(['time','x','y'], ds.t2.values)},
                                   coords = {'longitude': (['x','y'], ds.lon.values),
                                          'latitude': (['x','y'], ds.lat.values),
                                          'time': xray.DataArray.from_series(dates).values})
                            # first the prec and temp2m variables are created
                            # then we add coordinates


                    first = False # now that new 'clean' netCDF file is created switch the boolean operator so the below is run and we finally concatenate

                # the rest does the same as above but concatenates the end
                else:
                    ds = xray.open_dataset(file, engine = 'netcdf4' )
                    dates = ds.Times.to_series().apply(decode)
                    dsTemp = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
                                  'temp2m':(['time','x','y'], ds.t2.values)},
                                   coords = {'longitude': (['x','y'], ds.lon.values),
                                          'latitude': (['x','y'], ds.lat.values),
                                          'time': xray.DataArray.from_series(dates).values})

                    dsTotal = xray.concat([dsTotal, dsTemp], dim = 'time') # this is concatenating


        # add attributes to the netCDF
dsTotal.attrs['prec'] = 'Precipitation Hourly [mm]'        
dsTotal.attrs['temp2m'] = 'Two Meter Temperature [deg K]'


Model: NARR
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Mo1_2013-11.nc"
Model: NARR
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-07.nc"
Model: Morr
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-07.nc"
Ignoring file "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-07.nc"
Model: NARR
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-08.nc"
Model: Morr
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-08.nc"
Ignoring file "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-08.nc"
Model: NARR
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-09.nc"
Model: Morr
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-09.nc"
Ignoring file "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-09.nc"
Model: NARR
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-10.nc"
Model: Morr
File: "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-10.nc"
Ignoring file "/Users/carina/desktop/WRF_data/surfoutd02_CONUSnl_WRF_NARR_Morr_2013-10.nc"

In [32]:
%matplotlib notebook 
dsTotal.temp2m[:,0,0].plot() # a simple way to plot a time-series if you know the indices


Out[32]:
[<matplotlib.lines.Line2D at 0x118c85ac8>]

In [ ]: