In [16]:
import pyart
import os
import numpy as np
from matplotlib import pyplot as plt
from __future__ import print_function
import datetime
import matplotlib.dates as mdates
from matplotlib.dates import MonthLocator, DayLocator, HourLocator, DateFormatter, drange
from matplotlib.colors import LogNorm
import netCDF4 
import dateutil.parser
import pickle
%matplotlib inline

In [2]:
status_dir = '/Users/scollis/temp/ok/'
all_files = os.listdir(status_dir)
print(len(all_files))


404499

In [3]:
max_rf =[]
mean_rf = []
good_files = []
datetimes = []
for filen in all_files:
    if 'nc' in filen:
        fh = open(status_dir+filen)
        line = fh.readline()[78::]
        fh.close()
        try:
            p1 = float(line.split(' ')[0])
            p2 = float(line.split(' ')[1])
            max_rf.append(p1)
            mean_rf.append(p2)
            good_files.append(file)
            datetimes.append( datetime.datetime.strptime(filen[6:21], '%Y%m%d_%H%M%S'))
        except:
            pass
    elif 'KVNX' in filen:
        fh = open(status_dir+filen)
        line = fh.readline()[23::]
        fh.close()
        try:
            p1 = float(line.split(' ')[0])
            p2 = float(line.split(' ')[1])
            max_rf.append(p1)
            mean_rf.append(p2)
            good_files.append(filen)
            try:
                datetimes.append(datetime.datetime.strptime(filen[4:-11], 
                                                            '%Y%m%d_%H%M%S'))
            except: #1999
                datetimes.append(datetime.datetime.strptime(all_files[0][4:-7], 
                                                            '%Y%m%d_%H%M%S'))
        except:
            pass
    
mean_rf = np.array(mean_rf)
max_rf = np.array(max_rf)
datetimes = np.array(datetimes)

In [4]:
print(len(all_files), len(good_files))
print(len(datetimes), len(mean_rf))


404499 403218
403218 403218

In [54]:
fig = plt.figure(figsize = [15,6])
plt.plot(mdates.date2num(datetimes), mean_rf, 'b-', label = 'mean')
ax = plt.gca()
ax.xaxis.set_major_locator(MonthLocator(interval = 12))
#ax.xaxis.set_minor_locator(HourLocator(np.arange(0, 25, 6)))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))

ax.fmt_xdata = DateFormatter('%Y-%m')
fig.autofmt_xdate()

cnt = plt.gcf()
fig.autofmt_xdate()
plt.ylabel('Domain mean rain rate (mm/h)', size = 20)
plt.legend(loc = 2)
#ax2 = plt.twinx()
#plt.plot(mdates.date2num(datetimes), max_rf, 'r.', label = 'max')
#plt.ylabel('Max rain rate in domain (mm/h)')
plt.ylim([0,30])

plt.xlim([mdates.date2num(datetime.datetime(1995,1,1)), 
          mdates.date2num(datetime.datetime(2006,12,1))])
plt.legend()
ax = plt.gca()
ax.xaxis.set_tick_params(labelsize=20)
ax.yaxis.set_tick_params(labelsize=20)



In [10]:
all_hist = np.histogram(mean_rf, bins=50, range = [0,0.1])
plt.plot(all_hist[1][0:-1], all_hist[0] )


Out[10]:
[<matplotlib.lines.Line2D at 0x1131e2650>]

In [58]:
fig = plt.figure(figsize = [15,8])
plt.plot(mdates.date2num(datetimes), mean_rf, 'b.', label = 'mean')
ax = plt.gca()
plt.xlim([mdates.date2num(datetime.datetime(2001,1,1)), mdates.date2num(datetime.datetime(2001,6,1))])

ax.xaxis.set_major_locator(DayLocator(interval = 15))
#ax.xaxis.set_minor_locator(HourLocator(np.arange(0, 23, 3)))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d %H'))
ax.fmt_xdata = DateFormatter('%Y-%m-%d')
fig.autofmt_xdate()

cnt = plt.gcf()
fig.autofmt_xdate()
ax = plt.gca()
ax.xaxis.set_tick_params(labelsize=20)
ax.yaxis.set_tick_params(labelsize=20)
plt.ylabel('Domain mean rain rate (mm/h)', size = 20)

plt.legend()


Out[58]:
<matplotlib.legend.Legend at 0x123e7f850>

In [12]:
all_when_rained = mean_rf[np.where(mean_rf>0.05)]
all_when_rained_time = datetimes[np.where(mean_rf>0.05)]

hours = np.linspace(0,23,24)
boxed_means_hours = []
months = np.linspace(1,12,12)
boxed_means_months = []
weeks = np.linspace(1,52, 52)
boxed_means_weeks = []

for hour in hours:
    foo = [ a.hour == hour for a in all_when_rained_time]
    boxed_means_hours.append(all_when_rained[np.where(foo)])

months = np.linspace(1,12,12)
boxed_means_months = []
for month in months:
    foo = [ a.month == month for a in all_when_rained_time]
    boxed_means_months.append(all_when_rained[np.where(foo)])


hrly = np.array([a.mean() for a in boxed_means_hours])
monthly = np.array([a.mean() for a in boxed_means_months])
nbins = 50
rr = (0.05, 5)
hist_rf = np.zeros([len(hours), nbins])
for i in range(len(hours)):
    bine = np.histogram(boxed_means_hours[i], bins=nbins, range= rr)[1]
    hist_rf[i,:] = np.histogram(boxed_means_hours[i], bins=nbins, range= rr)[0]
    #print(np.histogram(boxed_means[i], bins=nbins, range=(0.1, 1))[0])
lt = (hours - 5) % 24
lts = ["%d" % a for a in lt] 
hist_rf_monthly = np.zeros([len(months), nbins])
for i in range(len(months)):
    bine2 = np.histogram(boxed_means_months[i], bins=nbins, range= rr)[1]
    hist_rf_monthly[i,:] = np.histogram(boxed_means_months[i], bins=nbins, range= rr)[0]
    #print(np.histogram(boxed_means[i], bins=nbins, range=(0.1, 1))[0])

In [13]:
monthly = np.array([a.mean() for a in boxed_means_months])
monthly50 = np.array([np.percentile(a, 50) for a in boxed_means_months])
monthly25 = np.array([np.percentile(a, 25) for a in boxed_means_months])
monthly75 = np.array([np.percentile(a, 75) for a in boxed_means_months])
hrly = np.array([a.mean() for a in boxed_means_hours])
hrly50 = np.array([np.percentile(a, 50) for a in boxed_means_hours])
hrly25 = np.array([np.percentile(a, 25) for a in boxed_means_hours])
hrly75 = np.array([np.percentile(a, 75) for a in boxed_means_hours])

In [18]:
vance_data = {'times': datetimes, 'mean':mean_rf, 
              'monthly':monthly, 'monthly50': monthly50,
              'monthly75':monthly75, 'monthly25': monthly25,
             'hrly': hrly, 'hrly50': hrly50, 'hrly25': hrly25,
             'hrly75': hrly75}
pickle.dump(vance_data, open('/Users/scollis/projects/EGU16/vance.dump', 'wb'))

In [14]:
fig = plt.figure(figsize = [15,8])

plt.fill_between(hours, hrly25, hrly75, color='w', facecolor = 'red', alpha = 0.5)
plt.plot(hours, hrly75, 'y--', label = '75th percentile')
plt.plot(hours, hrly25, 'g--', label = '25th percentile')

plt.plot(hours, hrly50, 'k--', label = 'Median')
plt.plot(hours, hrly, 'b--', label = 'Mean')
mty = plt.xticks(hours[::4]+.5, lts[::4])
plt.xlim([0,23])
plt.xlabel('Hour (LT)')
plt.ylabel('Domain mean rain fall rate (mm/h)')
plt.legend()


Out[14]:
<matplotlib.legend.Legend at 0x1132b9190>
/Users/scollis/anaconda/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

In [62]:
fig = plt.figure(figsize = [8,15])

plt.fill_betweenx(hours,hrly25, hrly75, color='w', facecolor = 'red', alpha = 0.5)
plt.plot(hrly75,hours, 'y--', label = '75th percentile', linewidth = 2.0)
plt.plot( hrly25,hours, 'g--', label = '25th percentile', linewidth = 2.0)

plt.plot( hrly50,hours, 'k--', label = 'Median', linewidth = 2.0)
plt.plot( hrly,hours, 'b--', label = 'Mean', linewidth = 2.0)
mty = plt.yticks(hours[::4]+.5, lts[::4])
ax = plt.gca()
ax.xaxis.set_tick_params(labelsize=20)
ax.yaxis.set_tick_params(labelsize=20)

plt.ylim([0,23])
plt.ylabel('Hour (LT)', size = 20)
plt.xlabel('Domain mean rain fall rate (mm/h)', size = 20)
plt.legend(loc = 5, fontsize = 15)


Out[62]:
<matplotlib.legend.Legend at 0x115a92d10>

In [15]:
fig = plt.figure(figsize = [8,15])

plt.fill_betweenx(months,monthly25, monthly75, color='w', facecolor = 'red', alpha = 0.5)
plt.plot(monthly75,months, 'y--', label = '75th percentile', linewidth = 2.0)
plt.plot( monthly25,months, 'g--', label = '25th percentile', linewidth = 2.0)

plt.plot( monthly50,months, 'k--', label = 'Median', linewidth = 2.0)
plt.plot( monthly,months, 'b--', label = 'Mean', linewidth = 2.0)
montext = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep','Oct', 'Dec']
mty = plt.yticks(months + 0.5, montext)
plt.ylim([1,12])
ax = plt.gca()
ax.xaxis.set_tick_params(labelsize=20)
ax.yaxis.set_tick_params(labelsize=20)

plt.ylabel('Month', size = 20)
plt.xlabel('Domain mean rain fall rate (mm/h)', size = 20)
plt.legend(loc = 5, fontsize = 20)


Out[15]:
<matplotlib.legend.Legend at 0x1140b1850>

In [64]:
fig = plt.figure(figsize = [15,8])
plt.pcolormesh(bine, hours, hist_rf, norm=LogNorm(vmin=1, vmax=5000))
ax = plt.gca()
mty = plt.yticks(hours[::4]+.5, lts[::4])
plt.ylim([0,23])
plt.ylabel('Hour (LT)')
plt.xlabel('Domain mean rain fall rate (mm/h)')
plt.colorbar()


Out[64]:
<matplotlib.colorbar.Colorbar instance at 0x123a2c050>

In [65]:
fig = plt.figure(figsize = [15,8])
plt.pcolormesh(bine2, months, hist_rf_monthly, norm=LogNorm(vmin=1, vmax=5000))
plt.ylabel('Month')
plt.xlabel('Domain mean rain fall rate (mm/h)')

plt.colorbar()
montext = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep','Oct', 'Dec']
mty = plt.yticks(months + 0.5, montext)
plt.ylim([1,12])


Out[65]:
(1, 12)

In [66]:
print(len(bine2), len(months), hist_rf_monthly.shape)
fig = plt.figure(figsize = [15,8])
plt.contourf(bine2[0:-1],months, hist_rf_monthly, 10**np.linspace(1,4,20), norm=LogNorm(vmin=1, vmax=5000))
plt.colorbar()


51 12 (12, 50)
Out[66]:
<matplotlib.colorbar.Colorbar instance at 0x12721d638>

In [82]:
f = """
Timing: 
[scollis@blogin2 maine_out]$ date
Tue Dec  1 20:05:27 CST 2015
[scollis@blogin2 maine_out]$ date
Tue Dec  1 20:05:36 CST 2015
[scollis@blogin2 maine_out]$ date
Tue Dec  1 20:05:36 CST 2015
[scollis@blogin2 maine_out]$ ls -lh *.status |wc
   2064   18576  152736
[scollis@blogin2 maine_out]$ date
Tue Dec  1 20:23:01 CST 2015
[scollis@blogin2 maine_out]$ ls -lh *.status |wc
   4771   42939  353054
[scollis@blogin2 maine_out]$ ls -lh *.status |wc
   8607   77463  636918
[scollis@blogin2 maine_out]$ date
Tue Dec  1 20:48:19 CST 2015
[scollis@blogin2 maine_out]$ 
[scollis@blogin2 maine_out]$ ls -lh *.status |wc
  10627   95643  786398
[scollis@blogin2 maine_out]$ date
Tue Dec  1 21:01:35 CST 2015
[scollis@blogin2 maine_out]$ 

#!/bin/bash
#PBS -l nodes=16:ppn=16
#PBS -l walltime=03:59:00
#PBS -j oe
#PBS -V
#PBS -N IPythonMPI0

ipcluster start --n=256 --profile=mpi0
"""

In [83]:
outfile = netCDF4.Dataset('/Users/scollis/temp/maine.nc','w')
time = outfile.createDimension('time', None)
times = outfile.createVariable('time','f8',('time',))
mean_rfr = outfile.createVariable('Domain_mean_rainfall_rate','f8',('time',))
mean_rfr.units = 'mm/h'
mean_rfr[:] =  mean_rf
mean_rfr.standard_name = 'Rainfall Rate'
mean_rfr.long_name = 'Domain mean Rainfall Rate'

max_rfr = outfile.createVariable('Domain_max_rainfall_rate','f8',('time',))
max_rfr.units = 'mm/h'
max_rfr[:] =  max_rf
max_rfr.standard_name = 'Rainfall Rate'
max_rfr.long_name = 'Domain maximum Rainfall Rate'

times.units = 'hours since 1997-01-01 00:00:00.0'
times.calendar = 'gregorian'
times[:] = netCDF4.date2num(datetimes,units=times.units,calendar=times.calendar)
outfile.close()

In [84]:
outfile.close()


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-84-483575512f44> in <module>()
----> 1 outfile.close()

netCDF4.pyx in netCDF4.Dataset.close (netCDF4.c:23693)()

RuntimeError: NetCDF: Not a valid ID

In [ ]:
radar = pyart.io.read('/data/KGYX20071227_132045')

In [265]:
plt.contourf? datetime.datetime.strptime(filen[4:-11]

In [110]:
a = 'cfrad.20080528_045151.761_to_20080528_050012.052_KGYX_v130_Surveillance_SUR.nc.status'
print(datetime.datetime.strptime(a[6:21], '%Y%m%d_%H%M%S'))


2008-05-28 04:51:51

In [124]:
fh = open(status_dir+a)
line = fh.readline()[78::]
fh.close()

In [125]:
print(line)


0.286123424768 0.0218384616554 


In [11]:
for i in range(5):
    print(all_files[i+1][0:-7])
print('......')
for i in range(4):
    print(all_files[-(4-i)][0:-7])


KGYX20100101_000147_V03
KGYX20100101_001131_V03
KGYX20100101_002114_V03
KGYX20100101_003058_V03
KGYX20100101_004041_V03
......
KGYX20151125_012802_V06
KGYX20151125_013744_V06
KGYX20151125_014727_V06
KGYX20151125_015710_V06

In [ ]:
from IPython.parallel import Client
My_Cluster = Client()
My_View = My_Cluster[:]
My_View.block = False
#on all engines do an import of Py-ART
My_View.execute('import matplotlib')
My_View.execute('matplotlib.use("agg")')
My_View.execute('import pyart')
#Map the code and input to all workers
result = My_View.map_async(do_grid_map_gates_to_grid, good_files)
#Reduce the result to get a list of output
rain_rates = result.get()