In [6]:
%matplotlib inline
In [7]:
def cdf_to_dataframe(netcdf_file, exclude_qc=True):
"""Takes in a netCDF object and returns a pandas DataFrame object
"""
# import packages
from netCDF4 import Dataset
import pandas as pd
import datetime
with Dataset(netcdf_file, 'r') as D:
# create an empty dictionary for the netCDF variables
ncvars = {}
for v in D.variables.keys():
time_check = (D.variables[v].dimensions
== D.variables['time'].dimensions)
if exclude_qc:
qc_check = 'qc_' not in v
var_check = qc_check and time_check
else:
var_check = time_check
if var_check:
ncvars[v] = D.variables[v][:]
D = pd.DataFrame(ncvars,
index = (datetime.datetime.utcfromtimestamp(D.variables['base_time'][:])+
pd.to_timedelta(D.variables['time'][:], unit='s')))
return D
import os
file_path = os.path.abspath('enametC1.b1.20140531.000000.cdf')
DATA = cdf_to_dataframe(file_path)
How about we take a simple example. Let's dive further into the temperature data; specifically, let's do the following:
There are a couple of ways to do this. One is to use the pandas.DataFrame.resample() method we saw earlier to get the data into 1-hour averages. Then, we could do the necessary calculations if we wanted to. Instead, this will demonstrate the DataFrame.groupby() functionality, combined with the aggregate tool. Here we go:
In [9]:
import pandas as pd
import numpy as np
hourly = pd.TimeGrouper('1H')
T = DATA['temp_mean'].groupby(hourly).agg([np.min, np.mean, np.max])
T
Out[9]:
In [ ]: