I want to make climatological time series plot from radiosonde data. To begin with this is surface pressure. I also want to this by time of day the sounding was made
I already have interpolated, derived variables for individual soundings.
I need to choose a station(s), a date range, bin by e.g. day, time of day
In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
In [46]:
from collections import defaultdict
import collections
import bisect
import datetime
from dateutil.relativedelta import relativedelta
import numpy as np
station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285, 43192, 43150, 42339, 40990, 40948]
station_list_cs=[42809]
grid=[]
date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)
date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)
delta = relativedelta(hours=+1)
def DiurnalHourlyGrid(date_min, date_max):
delta = relativedelta(hours=+1)
d = date_min
grid=[]
while ((d <= date_max) and (d.timetuple().tm_hour not in grid)):
grid.append(d.timetuple().tm_hour)
d += delta
return grid
hourly_grid = DiurnalHourlyGrid(date_min, date_max)
def variable_name_index_match(variable, variable_list):
for key, value in variable_list.iteritems(): # iter on both keys and values
if key.startswith('%s' % variable) and key.endswith('%s' % variable):
arr_index_var=value
assert 'arr_index_var' in locals(), "Cannot find index of variable name."
return arr_index_var
for stat in station_list_cs:
print stat
date_bin_mean_all_dates_one_station=[]
date_bin_mean_all_dates_one_station_single=[]
min_max_date_bin=[]
bins=collections.defaultdict(list)
bins_single=collections.defaultdict(list)
# Pressure Levels
#pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
% (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
pressure_file=npz_file['pressures_for_plotting']
date_file=npz_file['dates_for_plotting_single']
singles_file=npz_file['single_value_vars']
date_file_single=npz_file['dates_for_plotting_single_single']
#variable_list=npz_file['variable_list']
#variable_list_line=npz_file['variable_list_line']
bins=collections.defaultdict(list)
for di, dates in enumerate(date_file):
#print idx
idx=bisect.bisect(hourly_grid,dates.timetuple().tm_hour)
bins[idx].append((pressure_file[:,di],dates))
#print idx
#print len(bins)
#print len(grid)
bins_single=collections.defaultdict(list)
for di, dates in enumerate(date_file_single):
#print idx
idx=bisect.bisect(hourly_grid,dates.timetuple().tm_hour)
bins_single[idx].append((singles_file[:,di],dates))
#print idx
#print len(bins)
#print len(grid)
for i in range(len(hourly_grid)):
#if bins[l] is None:
#bins[l].append(([],[]))
if np.array(bins[i]).size != 0:
empty_array = np.empty((np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2).shape))
empty_array[:] = np.NAN
empty_array_list = (datetime.datetime(1,1,1,1), datetime.datetime(1,1,1,1))
#empty_array_list[:] = np.NAN
if np.array(bins_single[i]).size != 0:
empty_array_single = np.empty((np.nanmean(np.dstack(np.array(bins_single[i])[:,0]), axis=2).shape))
empty_array_single[:] = np.NAN
for i in range(len(hourly_grid)):
#if bins[l] is None:
#bins[l].append(([],[]))
if np.array(bins[i]).size != 0:
#for i in bins:
#print i
#print np.array(bins[i])[:,0].shape
#bins = np.sort(bins, axis=1)
#print grid[i]
#date_bin_mean = np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2)
#print date_bin_mean
#nan_mask = np.ma.masked_array(np.array(date_bin_mean, dtype=float), np.isnan(np.array(date_bin_mean, dtype=float)))
# if np.all(np.isnan(np.dstack(np.array(bins[i])[:,0]))) == False:
date_bin_mean_all_dates_one_station.append(np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2))
min_max_date_bin.append((min(np.array(bins[i])[:,1]), max(np.array(bins[i])[:,1])))
elif np.array(bins[i]).size == 0:
date_bin_mean_all_dates_one_station.append(empty_array)
if np.array(bins_single[i]).size != 0:
date_bin_mean_all_dates_one_station_single.append(np.nanmean(np.dstack(np.array(bins_single[i])[:,0]), axis=2))
#print date_bin_mean
elif np.array(bins_single[i]).size == 0:
date_bin_mean_all_dates_one_station_single.append(empty_array_single)
min_max_date_bin.append(empty_array_list)
#print i
#print date_bin_mean_all_dates_one_station
#print np.array(date_bin_mean_all_dates_one_station).shape
np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s' \
% (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat)\
, date_bin_mean_all_dates_one_station=date_bin_mean_all_dates_one_station, date_bin_mean_all_dates_one_station_single=date_bin_mean_all_dates_one_station_single, min_max_date_bin=min_max_date_bin, times_for_plotting=hourly_grid)
In [61]:
time_difference=5.5
hourly_grid2 = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
In [55]:
hourly_grid
Out[55]:
In [63]:
np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][time_sort][nan_mask]
Out[63]:
In [62]:
hourly_grid2[time_sort]
Out[62]:
In [73]:
import matplotlib.pyplot as plt
import re
station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285, 43192, 43150, 42339, 40990, 40948, 42809]
variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5,
'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11,
'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4,
'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}
station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
line = line.strip()
line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
station_metadata.append(line.split())
f.close()
def station_info_search(stat):
for line in station_metadata:
if "%s" % stat in line:
st = line[2].lower().title().replace('_',' ')
lo = float(line[3])
la = float(line[4])
return st,la,lo
variable='surface_pressure'
var_index = variable_name_index_match(variable, variable_list_line)
date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)
delta = relativedelta(hours=+1)
for stat in station_list_cs:
load_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s.npz' \
% (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat))
date_bin_mean_all_dates_one_station_single = load_file['date_bin_mean_all_dates_one_station_single']
hourly_grid = load_file['times_for_plotting']
nan_mask = ~np.isnan(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index])
time_difference=5.5
hourly_grid = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
station_name,la,lo = station_info_search(stat)
ylim_max = np.max(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
ylim_min = np.min(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
time_sort=np.argsort(hourly_grid[nan_mask])
fig=plt.figure()
plt.plot(np.array(hourly_grid)[nan_mask][time_sort], np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask][time_sort], label=variable, marker='.')
#plt.legend()
plt.title('%s' % station_name)
plt.ylim(ylim_min-500,ylim_max+500)
plt.xlim(0,24-3)
plt.xticks(np.arange(0,24,3))
#fig.autofmt_xdate()
plt.gca().invert_yaxis()
plt.show()
In [74]:
import matplotlib.pyplot as plt
import re
station_list_cs=[43014, 42867, 43150, 42809]
variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5,
'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11,
'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4,
'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}
station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
line = line.strip()
line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
station_metadata.append(line.split())
f.close()
def station_info_search(stat):
for line in station_metadata:
if "%s" % stat in line:
st = line[2].lower().title().replace('_',' ')
lo = float(line[3])
la = float(line[4])
return st,la,lo
variable='surface_pressure'
var_index = variable_name_index_match(variable, variable_list_line)
date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)
delta = relativedelta(hours=+1)
for stat in station_list_cs:
load_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s.npz' \
% (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat))
date_bin_mean_all_dates_one_station_single = load_file['date_bin_mean_all_dates_one_station_single']
hourly_grid = load_file['times_for_plotting']
nan_mask = ~np.isnan(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index])
time_difference=5.5
hourly_grid = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
station_name,la,lo = station_info_search(stat)
ylim_max = np.max(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
ylim_min = np.min(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
time_sort=np.argsort(hourly_grid[nan_mask])
fig=plt.figure()
plt.plot(np.array(hourly_grid)[nan_mask][time_sort], np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask][time_sort], label=variable, marker='.')
#plt.legend()
plt.title('%s' % station_name)
plt.ylim(ylim_min-500,ylim_max+500)
plt.xlim(0,24-3)
plt.xticks(np.arange(0,24,3))
#fig.autofmt_xdate()
plt.gca().invert_yaxis()
plt.show()
In [98]:
import matplotlib.pyplot as plt
import re
station_list_cs=[43014, 42867, 43150, 42809]
variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5,
'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11,
'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4,
'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}
station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
line = line.strip()
line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
station_metadata.append(line.split())
f.close()
def station_info_search(stat):
for line in station_metadata:
if "%s" % stat in line:
st = line[2].lower().title().replace('_',' ')
lo = float(line[3])
la = float(line[4])
return st,la,lo
variable='surface_pressure'
var_index = variable_name_index_match(variable, variable_list_line)
date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)
delta = relativedelta(hours=+1)
fig=plt.figure(figsize=(14,8.5))
for stat in station_list_cs:
load_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s.npz' \
% (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat))
date_bin_mean_all_dates_one_station_single = load_file['date_bin_mean_all_dates_one_station_single']
hourly_grid = load_file['times_for_plotting']
nan_mask = ~np.isnan(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index])
time_difference=5.5
hourly_grid = np.where(hourly_grid<=(24-time_difference),hourly_grid+5.5, hourly_grid+5.5-24)
station_name,la,lo = station_info_search(stat)
ylim_max = np.max(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
ylim_min = np.min(np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask])
time_sort=np.argsort(hourly_grid[nan_mask])
plt.plot(np.array(hourly_grid)[nan_mask][time_sort], np.array(date_bin_mean_all_dates_one_station_single)[:,0,var_index][nan_mask][time_sort], label=station_name, marker='.')
plt.legend()
plt.title('%s' % variable)
#plt.ylim(ylim_min-500,ylim_max+500)
plt.xlim(0,24-3)
plt.xticks(np.arange(0,24,3))
plt.xlabel('Time (Local)')
plt.ylabel('Pressure')
#fig.autofmt_xdate()
#plt.gca().invert_yaxis()
plt.show()
In [ ]:
variable='CAPE'
var_index = variable_name_index_match(variable, variable_list_line)
plt.plot_date(date_plot[1:], load_file['date_bin_mean_all_dates_one_station_single'][1:,0,var_index], fmt="-", label=variable)
variable='CIN'
var_index = variable_name_index_match(variable, variable_list_line)
plt.plot_date(date_plot[1:], load_file['date_bin_mean_all_dates_one_station_single'][1:,0,var_index], fmt="-", label=variable)
plt.legend()
plt.title('%s' % station_name)
fig.autofmt_xdate()
#plt.gca().invert_yaxis()
plt.show()
In [125]:
experiment_ids = ['djznw', 'djzny', 'djznq', 'djzns', 'dklyu', 'dkmbq', 'dklwu', 'dklzq', 'dkbhu', 'djznu', 'dkhgu' ]
diag = '_408_on_p_levs_mean_by_hour'
pp_file_path='/nfs/a90/eepdw/Data/EMBRACE/'
In [122]:
import imp
model_name_convert_legend = \
imp.load_source('util', '/nfs/see-fs-01_users/eepdw/python_scripts/modules/model_name_convert_legend.py')
In [129]:
p_level = 925.
#experiment_ids = ['djznw']
#experiment_ids = ['djznw', 'djzny', 'djznq']
fig = plt.figure(figsize=(14,8.5))
for experiment_id in experiment_ids:
try:
expmin1 = experiment_id[:-1]
data = np.load('%s%s/%s/%s%s_diurnal_monsoon_trough.npz' % (pp_file_path, expmin1, experiment_id, experiment_id, diag))
plot_idx=np.where(data['pressure']==p_level)[0][0]
hours = np.array([unit.num2date(q, 'hours since 1970-01-01 00:00:00',
unit.CALENDAR_STANDARD).hour for q in data['time']])
time_difference=5.5
hours_local = np.where(hours<=(24-time_difference),hours+5.5, hours+5.5-24)
time_sort=np.argsort(hours_local)
print experiment_id
print data['data'][:,plot_idx][time_sort]
plt.plot(hours_local[time_sort], data['data'][:,plot_idx][time_sort], \
label=model_name_convert_legend.main(experiment_id))
#plt.show()
except Exception:
pass
plt.legend()
plt.title('Diurnal Variation in geopotential height of %s hPa' % p_level )
plt.ylim(650,800)
plt.xlim(0,24-3)
plt.xticks(np.arange(0,24,3))
plt.xlabel('Time (Local)')
plt.ylabel('Height (m)')
plt.show()