In [2]:
# Import a bunch of python sub packages
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import xray
import xray.ufuncs as xu
import dask
import seaborn as sn
from datetime import datetime
from dask.diagnostics import ProgressBar
import warnings
import os
warnings.filterwarnings('ignore')
In [6]:
# Create a file list of all the netCDF files
import glob
fileList = glob.glob('/Users/carina/desktop/WRF_data/*.nc')
fileList
Out[6]:
In [28]:
# This cell converts concatenates the netCDF files and creates new ones that are better organized
# Create a function that will take the way the time is stored as a variable in the
# WRF netCDF files and decode it
# Build a function to go from numpy.bytes_ to a string - WRF files were compressed
def decode(d):
decoded = datetime.strptime(d.decode(encoding='UTF-8'), "%Y-%m-%d_%H:%M:%S")
return decoded
first = True # create a boolean for the loop below
# Stores the creates a new netCDF files with coordinat
for file in fileList:
if first:
ds = xray.open_dataset(file , engine = 'netcdf4') # may not need enginer = 'netcdf4' others are using engine = 'scipy'
dates = ds.Times.to_series().apply(decode) # this creates a list of the varaibles Times in the netCDF file ds and then applies the decode function to each element
dsTotal = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
'temp2m':(['time','x','y'], ds.t2.values)},
coords = {'longitude': (['x','y'], ds.lon.values),
'latitude': (['x','y'], ds.lat.values),
'time': xray.DataArray.from_series(dates).values})
# first the prec and temp2m variables are created
# then we add coordinates
first = False # now that new 'clean' netCDF file is created switch the boolean operator so the below is run and we finally concatenate
# the rest does the same as above but concatenates the end
else:
ds = xray.open_dataset(file, engine = 'netcdf4' )
dates = ds.Times.to_series().apply(decode)
dsTemp = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
'temp2m':(['time','x','y'], ds.t2.values)},
coords = {'longitude': (['x','y'], ds.lon.values),
'latitude': (['x','y'], ds.lat.values),
'time': xray.DataArray.from_series(dates).values})
dsTotal = xray.concat([dsTotal, dsTemp], dim = 'time') # this is concatenating
# add attributes to the netCDF
dsTotal.attrs['prec'] = 'Precipitation Hourly [mm]'
dsTotal.attrs['temp2m'] = 'Two Meter Temperature [deg K]'
In [8]:
dsTotal
Out[8]:
In [9]:
%matplotlib notebook
# this is needed before plotting in python3
dsTotal.temp2mC=dsTotal.temp2m-273.15 # convert temperature to deg C
ds_by_month = dsTotal.temp2mC.groupby('time.month').mean('time') # group temperatures by month
ds_by_month[1].plot() # plot mean August temperatures
Out[9]:
In [29]:
%matplotlib notebook
xycord = np.where(dsTotal.coords['longitude'].values == -125.07546997) # similar to MATLAB's 'find' function. Insert longitude which is unique due to the warped nature of the WRF grid and find the indices assosciated with it
xcord = xycord[0][0] # find xcord from the tuple
ycord = xycord[1][0]
dsTotal.temp2m.isel(x = xcord, y = ycord).plot() # select the coordiante location and plot a time-series of temperature
# note use isel when grouping by index values and sel when grouping by times or lat and long values
Out[29]:
In [12]:
%matplotlib notebook
DataArray = dsTotal.temp2m.sel(time = '2013-07-02T02:00:00')
DataArray.plot(levels=10)
Out[12]:
In [13]:
%matplotlib notebook
dsTotal.temp2m[:,0,0].plot() # a simple way to plot a time-series if you know the indices
Out[13]:
In [14]:
#this is for testing, because we dont have the final list of WRF nodes that are within the watershed - this reads the latof the top 9 nodes
shortLongList = dsTotal.coords['longitude'].values[0,0:9]
shortLongList
Out[14]:
In [15]:
#this loops through the lon list and back calculates the x and y indices needed to plot or extract data
xcord = []
ycord = []
#this is not working yet because the list of lat - long is old from old WRF runs - need to identify the nodes in my watershed
#need to update the csv file
for Lon in shortLongList:
xycord = np.where(dsTotal.coords['longitude'].values == Lon) # similar to MATLAB's 'find' function. Insert longitude which is unique due to the warped nature of the WRF grid and find the indices assosciated with it
xcord.append(xycord[0][0]) # find xcord from the tuple
ycord.append(xycord[1][0])
ycord
Out[15]:
In [16]:
#this is an example of seleting temperature values at the 9 points
selectTemp = dsTotal.temp2m.sel_points(x = xcord, y = ycord)
selectTemp
Out[16]:
In [18]:
final_nine_nodes = selectTemp.to_series() #converts xray to pandas timeseries
final_nine_nodes
Out[18]:
In [19]:
final_nine_nodes.to_csv('temp_nine_nodes.csv') #this will save data in cvs format
In [20]:
# now we want to read all the files and concatenate by model run
import glob
fileList = glob.glob('/Users/carina/desktop/WRF_data/*.nc')
fileList
Out[20]:
In [31]:
#this will loop through the files and extract the ones for a particular model run, will rearrange the variables and add attributes to variables
#of inteest, in this case precipitation and temperature
fileFinder = ['NARR', 'Morr']
def decode(d):
decoded = datetime.strptime(d.decode(encoding='UTF-8'), "%Y-%m-%d_%H:%M:%S")
return decoded
first = True # create a boolean for the loop below
files_read = set()
for file in fileList:
for model in fileFinder:
if file.find(model) != -1:
print('Model: {}'.format(model))
print('File: "{}"'.format(file))
if file in files_read:
print('Ignoring file "{}"'.format(file))
break # break out of the model for loop as we already processed the file
files_read.add(file)
if first:
ds = xray.open_dataset(file , engine = 'netcdf4') # may not need enginer = 'netcdf4' others are using engine = 'scipy'
dates = ds.Times.to_series().apply(decode) # this creates a list of the varaibles Times in the netCDF file ds and then applies the decode function to each element
dsTotal = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
'temp2m':(['time','x','y'], ds.t2.values)},
coords = {'longitude': (['x','y'], ds.lon.values),
'latitude': (['x','y'], ds.lat.values),
'time': xray.DataArray.from_series(dates).values})
# first the prec and temp2m variables are created
# then we add coordinates
first = False # now that new 'clean' netCDF file is created switch the boolean operator so the below is run and we finally concatenate
# the rest does the same as above but concatenates the end
else:
ds = xray.open_dataset(file, engine = 'netcdf4' )
dates = ds.Times.to_series().apply(decode)
dsTemp = xray.Dataset({'prec': (['time','x','y'], ds.prehourly.values),
'temp2m':(['time','x','y'], ds.t2.values)},
coords = {'longitude': (['x','y'], ds.lon.values),
'latitude': (['x','y'], ds.lat.values),
'time': xray.DataArray.from_series(dates).values})
dsTotal = xray.concat([dsTotal, dsTemp], dim = 'time') # this is concatenating
# add attributes to the netCDF
dsTotal.attrs['prec'] = 'Precipitation Hourly [mm]'
dsTotal.attrs['temp2m'] = 'Two Meter Temperature [deg K]'
In [32]:
%matplotlib notebook
dsTotal.temp2m[:,0,0].plot() # a simple way to plot a time-series if you know the indices
Out[32]:
In [ ]: