I want to make climatological time series plot from radiosonde data. To begin with this is surface pressure. I also want to this by time of day the sounding was made

I already have interpolated, derived variables for individual soundings.

I need to choose a station(s), a date range, bin by e.g. day, time of day


In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [46]:
from collections import defaultdict
import collections
import bisect


import datetime
from dateutil.relativedelta import relativedelta

import numpy as np


station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948]
station_list_cs=[42809]
grid=[]

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)
delta = relativedelta(hours=+1)


def DiurnalHourlyGrid(date_min, date_max):    
    
    delta = relativedelta(hours=+1)

    d = date_min

    grid=[]
    while ((d <= date_max) and (d.timetuple().tm_hour not in grid)): 
        grid.append(d.timetuple().tm_hour)
        d += delta
    
    return grid

hourly_grid = DiurnalHourlyGrid(date_min, date_max)

def variable_name_index_match(variable, variable_list):
    for key, value in variable_list.iteritems():   # iter on both keys and values
        if key.startswith('%s' % variable) and key.endswith('%s' % variable):
            arr_index_var=value 
    assert 'arr_index_var' in locals(), "Cannot find index of variable name."
    return arr_index_var


for stat in station_list_cs:

  print stat
  date_bin_mean_all_dates_one_station=[]
  date_bin_mean_all_dates_one_station_single=[]
  min_max_date_bin=[]
  bins=collections.defaultdict(list)
  bins_single=collections.defaultdict(list)
        
# Pressure Levels

 #pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
  npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
          % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
  pressure_file=npz_file['pressures_for_plotting']
  date_file=npz_file['dates_for_plotting_single']
  singles_file=npz_file['single_value_vars']
  date_file_single=npz_file['dates_for_plotting_single_single']
  #variable_list=npz_file['variable_list']
  #variable_list_line=npz_file['variable_list_line']
    
  bins=collections.defaultdict(list)
    
  for di, dates in enumerate(date_file):
   #print idx

   idx=bisect.bisect(hourly_grid,dates.timetuple().tm_hour)
   bins[idx].append((pressure_file[:,di],dates))
   #print idx
  #print len(bins)
  #print len(grid)
   
  bins_single=collections.defaultdict(list)   
        
        
  for di, dates in enumerate(date_file_single):
   #print idx

   idx=bisect.bisect(hourly_grid,dates.timetuple().tm_hour)
   bins_single[idx].append((singles_file[:,di],dates))
   #print idx
  #print len(bins)
  #print len(grid)
    
  for i in range(len(hourly_grid)):
    #if bins[l] is None:
   #bins[l].append(([],[]))
        
   if np.array(bins[i]).size != 0:
            
    empty_array =  np.empty((np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2).shape))
    empty_array[:] = np.NAN
    
    empty_array_list =  (datetime.datetime(1,1,1,1), datetime.datetime(1,1,1,1))
    #empty_array_list[:] = np.NAN
    
   if np.array(bins_single[i]).size != 0: 
        
    empty_array_single =  np.empty((np.nanmean(np.dstack(np.array(bins_single[i])[:,0]), axis=2).shape))
    empty_array_single[:] = np.NAN
    
  for i in range(len(hourly_grid)):
    #if bins[l] is None:
   #bins[l].append(([],[]))
        
   if np.array(bins[i]).size != 0:
  #for i in bins:
   #print i
   #print np.array(bins[i])[:,0].shape
  #bins = np.sort(bins, axis=1)
   #print grid[i]   
   #date_bin_mean = np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2)
   #print date_bin_mean
   #nan_mask = np.ma.masked_array(np.array(date_bin_mean, dtype=float), np.isnan(np.array(date_bin_mean, dtype=float)))
            
  # if np.all(np.isnan(np.dstack(np.array(bins[i])[:,0]))) == False:
        
    date_bin_mean_all_dates_one_station.append(np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2))
    min_max_date_bin.append((min(np.array(bins[i])[:,1]), max(np.array(bins[i])[:,1])))
   
   elif np.array(bins[i]).size == 0:
    date_bin_mean_all_dates_one_station.append(empty_array)
        
        
   if np.array(bins_single[i]).size != 0:
    date_bin_mean_all_dates_one_station_single.append(np.nanmean(np.dstack(np.array(bins_single[i])[:,0]), axis=2))
   #print date_bin_mean
    
    
   elif np.array(bins_single[i]).size == 0:
     date_bin_mean_all_dates_one_station_single.append(empty_array_single)
     min_max_date_bin.append(empty_array_list)
     #print i
    
  #print date_bin_mean_all_dates_one_station

  #print np.array(date_bin_mean_all_dates_one_station).shape
  np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s' \
       % (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat)\
    , date_bin_mean_all_dates_one_station=date_bin_mean_all_dates_one_station, date_bin_mean_all_dates_one_station_single=date_bin_mean_all_dates_one_station_single, min_max_date_bin=min_max_date_bin, times_for_plotting=hourly_grid)


42809

In [61]:
time_difference=5.5
hourly_grid2 = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)

In [55]:
hourly_grid


Out[55]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [63]:
np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][time_sort][nan_mask]


Out[63]:
array([             nan,              nan,              nan,
                    nan,  100107.98013245,  100096.49923896,
        100172.04301075,              nan,  100251.61290323,
        100000.        ,              nan,   99800.        ,
                    nan,   99881.98519515,   99941.26984127])

In [62]:
hourly_grid2[time_sort]


Out[62]:
array([  6.,   7.,   8.,   9.,  10.,  11.,  12.,  13.,  14.,  15.,  16.,
        17.,  18.,  19.,  20.,  21.,  22.,  23.,  24.,   1.,   2.,   3.,
         4.,   5.])

In [73]:
import matplotlib.pyplot as plt

import re

station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948, 42809]

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}


station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
     line = line.strip()
     line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
     line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
     station_metadata.append(line.split())
f.close()

def station_info_search(stat):
    for line in station_metadata:
         if "%s" % stat in line: 
             st = line[2].lower().title().replace('_',' ')
             lo = float(line[3])
             la = float(line[4])
    return st,la,lo

variable='surface_pressure'
var_index = variable_name_index_match(variable, variable_list_line)

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

delta = relativedelta(hours=+1)

for stat in station_list_cs:
    
    load_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s.npz' \
       % (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat))
       
    date_bin_mean_all_dates_one_station_single = load_file['date_bin_mean_all_dates_one_station_single']
    hourly_grid = load_file['times_for_plotting']
    
    nan_mask = ~np.isnan(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index])
    
    time_difference=5.5
    
    hourly_grid = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
    
    station_name,la,lo = station_info_search(stat)
    
    ylim_max = np.max(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
    ylim_min = np.min(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
    
    time_sort=np.argsort(hourly_grid[nan_mask])

    fig=plt.figure()

    plt.plot(np.array(hourly_grid)[nan_mask][time_sort], np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask][time_sort], label=variable, marker='.')  
#plt.legend()
    plt.title('%s' % station_name)
    
    plt.ylim(ylim_min-500,ylim_max+500)
    
    plt.xlim(0,24-3)
    plt.xticks(np.arange(0,24,3))
    
#fig.autofmt_xdate()

    plt.gca().invert_yaxis()
    plt.show()



In [74]:
import matplotlib.pyplot as plt

import re

station_list_cs=[43014, 42867, 43150, 42809]

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}


station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
     line = line.strip()
     line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
     line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
     station_metadata.append(line.split())
f.close()

def station_info_search(stat):
    for line in station_metadata:
         if "%s" % stat in line: 
             st = line[2].lower().title().replace('_',' ')
             lo = float(line[3])
             la = float(line[4])
    return st,la,lo

variable='surface_pressure'
var_index = variable_name_index_match(variable, variable_list_line)

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

delta = relativedelta(hours=+1)

for stat in station_list_cs:
    
    load_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s.npz' \
       % (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat))
       
    date_bin_mean_all_dates_one_station_single = load_file['date_bin_mean_all_dates_one_station_single']
    hourly_grid = load_file['times_for_plotting']
    
    nan_mask = ~np.isnan(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index])
    
    time_difference=5.5
    
    hourly_grid = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
    
    station_name,la,lo = station_info_search(stat)
    
    ylim_max = np.max(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
    ylim_min = np.min(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
    
    time_sort=np.argsort(hourly_grid[nan_mask])

    fig=plt.figure()

    plt.plot(np.array(hourly_grid)[nan_mask][time_sort], np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask][time_sort], label=variable, marker='.')  
#plt.legend()
    plt.title('%s' % station_name)
    
    plt.ylim(ylim_min-500,ylim_max+500)
    
    plt.xlim(0,24-3)
    plt.xticks(np.arange(0,24,3))
    
#fig.autofmt_xdate()

    plt.gca().invert_yaxis()
    plt.show()



In [98]:
import matplotlib.pyplot as plt

import re

station_list_cs=[43014, 42867, 43150, 42809]

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}


station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
     line = line.strip()
     line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
     line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
     station_metadata.append(line.split())
f.close()

def station_info_search(stat):
    for line in station_metadata:
         if "%s" % stat in line: 
             st = line[2].lower().title().replace('_',' ')
             lo = float(line[3])
             la = float(line[4])
    return st,la,lo

variable='surface_pressure'
var_index = variable_name_index_match(variable, variable_list_line)

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

delta = relativedelta(hours=+1)

fig=plt.figure(figsize=(14,8.5))

for stat in station_list_cs:
    
    load_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s.npz' \
       % (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat))
       
    date_bin_mean_all_dates_one_station_single = load_file['date_bin_mean_all_dates_one_station_single']
    hourly_grid = load_file['times_for_plotting']
    
    nan_mask = ~np.isnan(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index])
    
    time_difference=5.5
    
    hourly_grid = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
    
    station_name,la,lo = station_info_search(stat)
    
    ylim_max = np.max(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
    ylim_min = np.min(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
    
    time_sort=np.argsort(hourly_grid[nan_mask])

    

    plt.plot(np.array(hourly_grid)[nan_mask][time_sort], np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask][time_sort], label=station_name, marker='.')  

plt.legend()
plt.title('%s' % variable)
    
    #plt.ylim(ylim_min-500,ylim_max+500)
    
plt.xlim(0,24-3)
plt.xticks(np.arange(0,24,3))

plt.xlabel('Time (Local)')
plt.ylabel('Pressure')
    
#fig.autofmt_xdate()

    #plt.gca().invert_yaxis()
plt.show()



In [ ]:
variable='CAPE'
  var_index = variable_name_index_match(variable, variable_list_line)
  plt.plot_date(date_plot[1:], load_file['date_bin_mean_all_dates_one_station_single'][1:,0,var_index], fmt="-", label=variable)  
  
  variable='CIN'
  var_index = variable_name_index_match(variable, variable_list_line)
  plt.plot_date(date_plot[1:], load_file['date_bin_mean_all_dates_one_station_single'][1:,0,var_index], fmt="-", label=variable)  
  
  plt.legend()
  plt.title('%s' % station_name)
    
  fig.autofmt_xdate()

  #plt.gca().invert_yaxis()
  plt.show()

In [125]:
experiment_ids = ['djznw', 'djzny', 'djznq', 'djzns', 'dklyu', 'dkmbq', 'dklwu', 'dklzq', 'dkbhu', 'djznu', 'dkhgu' ] 

diag = '_408_on_p_levs_mean_by_hour'

pp_file_path='/nfs/a90/eepdw/Data/EMBRACE/'

In [122]:
import imp

model_name_convert_legend = \
imp.load_source('util', '/nfs/see-fs-01_users/eepdw/python_scripts/modules/model_name_convert_legend.py')

In [129]:
p_level = 925.



#experiment_ids = ['djznw']
#experiment_ids = ['djznw', 'djzny', 'djznq']

fig = plt.figure(figsize=(14,8.5))

for experiment_id in experiment_ids:
    
    try:
        expmin1 = experiment_id[:-1]
        data = np.load('%s%s/%s/%s%s_diurnal_monsoon_trough.npz' % (pp_file_path, expmin1, experiment_id, experiment_id, diag))
                   
        plot_idx=np.where(data['pressure']==p_level)[0][0]
    
        hours = np.array([unit.num2date(q, 'hours since 1970-01-01 00:00:00',
             unit.CALENDAR_STANDARD).hour for q in data['time']])
    
        time_difference=5.5
    
        hours_local = np.where(hours<=(24-time_difference),hours+5.5, hours+5.5-24)
    
        time_sort=np.argsort(hours_local)
        
        print experiment_id
        
        print data['data'][:,plot_idx][time_sort]
    
    
        plt.plot(hours_local[time_sort], data['data'][:,plot_idx][time_sort], \
                 label=model_name_convert_legend.main(experiment_id))
        #plt.show()
        
    except Exception:
        pass
    
plt.legend()

plt.title('Diurnal Variation in geopotential height of %s hPa' % p_level )
    
plt.ylim(650,800)
    
plt.xlim(0,24-3)
plt.xticks(np.arange(0,24,3))

plt.xlabel('Time (Local)')
plt.ylabel('Height (m)')

plt.show()


djznw
[ 710.70510902  705.31842994  700.87148078  698.31680213  698.56430839
  703.60308234  708.14818336  713.81874508  717.71204978  717.43129667
  713.91857909  708.15690955  706.55327811  698.52772428  692.07414176
  689.10446753  690.63144745  695.39330083  701.38369809  709.12740934
  715.62467104  719.85803783  721.40663255  719.70021733]
djzny
[ 723.66527987  719.12151604  714.88752718  712.39205958  712.26570936
  715.70472014  721.70563785  727.44111367  729.1088813   728.83924666
  725.47763156  720.76766579  715.45925452  708.63144626  701.7381733
  698.62295964  698.94601854  702.85346766  710.01548508  718.06149628
  723.59019546  727.30438281  728.60299241  726.78344112]
djznq
[ 724.59830445  720.77258867  716.49433761  713.44858417  713.06550355
  715.44618094  722.43769598  729.20500745  731.89886178  732.53656187
  729.38410784  724.54670644  717.52382966  710.61578256  704.24406541
  701.10542097  701.68352744  705.52377843  712.47388203  720.52720205
  725.9167794   729.38925059  730.294904    728.38069779]
djzns
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.]
dklyu
[ 705.8719906   702.57819586  699.51014309  697.84069188  698.00114728
  701.17091702  706.41376503  713.69242431  719.49789964  723.4943796
  724.39016047  722.23152467  714.79183019  705.27902231  693.44436251
  685.08098711  681.64302565  683.02666253  688.07057153  695.66938803
  702.31592946  706.66198079  708.97845893  708.4276311 ]
dkmbq
[ 724.2154039   719.9539758   716.01869724  713.40938638  712.77631392
  715.0479556   721.9917105   728.74738157  731.65182357  732.16819446
  729.12311992  724.21482252  716.99782792  710.11014525  704.03306153
  701.20700799  701.94685153  705.98090018  712.60754708  720.52092505
  726.66531107  729.77557971  730.5875404   728.55193568]
dklwu
[ 698.31895504  695.23289592  692.55782678  691.64291018  692.33961347
  695.38612706  701.24071289  708.89608362  714.31754492  718.0305314
  718.75402318  716.57547662  710.12057337  702.15114416  690.20472551
  680.8655344   676.63867244  677.66186176  682.66549935  689.48431782
  695.69594533  700.10499806  702.20765166  701.43969839]
dklzq
[ 723.7979249   719.56346364  715.72549518  713.23160654  712.71633349
  715.01195512  722.16874675  728.86455671  731.89964963  732.29944097
  729.09215118  724.11835403  716.69854991  709.81124403  703.80420865
  700.92627938  701.60165192  705.52862152  712.14337417  720.1054833
  726.28145244  729.33345878  730.08476538  728.08429657]