In [2]:
import datetime
import scipy.interpolate
import re
import numpy as np

In [3]:
import pdb

In [4]:
station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
     line = line.strip()
     line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
     line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
     station_metadata.append(line.split())
f.close()

In [5]:
station_list_cs=[43150, 42867, 43014, 42339, 40990, 40948]
#station_list_cs=[43014, 42339, 40990, 40948]
#station_list_cs=[42339, 40990, 40948]
station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948]

In [6]:
import imp
imp.load_source('SoundingRoutines', '/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py')
#imp.load_source('TephigramPlot', '/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Tephigram_Test.py')
#from TephigramPlot import *
from SoundingRoutines import *

In [ ]:
#Raw data read - Climatology
    # /nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt
    # /nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/readme.txt
    
    # Station numbers for Bombay - 43003; New Delhi - 42182, Lucknow - 42369, Cochin, Bangalore, Madras, Minicoy, Nagpur Sonegaon)
    import datetime
    import scipy.interpolate
    import re
    import numpy as np
    
    import os 
    
    date_min=datetime.datetime(1960,5,1,0,0,0)
    date_max=datetime.datetime(2014,10,1,0,0,0)
    
    import datetime
    
    variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
    variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}
    

    test_count=0
    
    match_header = re.compile(r'(#.....20..0[56789]|#.....19..0[56789])')
    #match_header_20th = re.compile(r'#.....19..0[56789]')
    match_bad = re.compile(r'9999')
        
    lev_count=False
    
    y_points=np.linspace(5000, 100000, 200) # Points for pressure interpolation

    def PoissonConstant(wvmr):
    
        """http://glossary.ametsoc.org/wiki/Poisson_constant
        May need to tweak low limit for dry air (=0.2854)"""
     
        return np.where(wvmr>0., 0.2854*(1-0.24*wvmr), 0.2854)

    def interp_sounding(variable, pressures_s,y_points):
        #print pressures_s
        #print variable
        
        #pressures_s,variable=np.sort(np.array((pressures_s, variable), dtype=float), axis=1)
        #a=np.array((pressures_s, variable))
       # a=a[a[0].argsort()]
        #print a
        #print pressures_s
        #print variable
        nan_mask = np.ma.masked_array(np.array(variable, dtype=float), np.isnan(np.array(variable, dtype=float)))
        nan_mask_p = np.ma.masked_array(np.array(pressures_s, dtype=float), np.isnan(np.array(variable, dtype=float)))
        variable = [x for (y,x) in sorted(zip(pressures_s, variable), key=lambda pair: pair[0])]
        pressures_s = [y for (y,x) in sorted(zip(pressures_s, variable), key=lambda pair: pair[0])]
        #print nan_mask_p
        #print nan_mask
        #print sorted_pre_interp
        interp = scipy.interpolate.interp1d(pressures_s, variable, bounds_error=False, fill_value=np.nan)
        y_interp = interp(y_points)
        return y_interp
    
    for stat in station_list_cs:
        
      st,la,lo, st_height = station_info_search(stat)
        
      lev_count=False
        
      single_values=[]
      pressures=[]
      temps=[]
      dewpoints=[]
      winddirs=[]
      windspeeds=[]
      geop_height=[]
        
      pot_temp=[]
      sat_vap_pres=[]
      vap_press=[]
      rel_hum=[]
      wvmr=[]
      sp_hum=[]
      sat_temp=[]
      theta_e=[]
      theta_e_sat=[]
      theta_es_min_theta_e=[]
      temp_lcl=[]
      virtual_pot_temp=[]
      
      #lcl=[]
      lcl_method_2=[]
        
      pbl_pressure_l=[]
      
      surface_p_l=[]
        
      t_zero_line=[]
        
      CAPE=[]
      CIN=[]
      lclp=[]
      lclt=[] 
      lfcp=[]
      equil_level=[]
      pressures_s=[]
      temps_s=[]
      dewpoints_s=[]
      winddirs_s=[]
      windspeeds_s=[]
      theta_es_min_theta_e_s=[]
      #temp_lcl_s=[]
      #virtual_pot_temp_s=[]
      #pressures_for_plotting = np.array((dates_for_plotting, pressures, temps, dewpoints, winddirs, windspeeds))
        
      dates_for_plotting_single=[]
      dates_for_plotting_single_single=[]
        
      levs=-1
      numlevs=0
    
      #print stat
    
      i = '/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/%s.dat' % stat
      print i
      f = open(i,'r')
      #print f
      for line in f:
       #line = line.strip().replace('-99999',' %s ' %float('NaN'))
       line = line.strip()
       #print line
       #columns=line.split()
                
       if levs >= numlevs and pressures_s and list(temps_s) and list(dewpoints_s) and winddirs_s and windspeeds_s:
         lev_count=False
         #print pressures_s
         #print temps_s
        
         #temps_s_mask = np.ma.masked_array(np.array(temps_s, dtype=float), np.isnan(np.array(temps_s, dtype=float)))
         #pressures_s_mask =  np.ma.masked_array(np.array(pressures_s, dtype=float), np.isnan(np.array(pressures_s, dtype=float)))   
         #dewpoints_s_mask = np.ma.masked_array(np.array(dewpoints_s, dtype=float), np.isnan(np.array(dewpoints_s, dtype=float)))
         geop_height_s = np.ma.masked_array(np.array(geop_height_s, dtype=float), np.isnan(np.array(geop_height_s, dtype=float)))
           
         temps_scent = np.array(temps_s, dtype=float)/10.
         temps_s = np.array(temps_s, dtype=float)/10. + 273.15
         pressures_s_hPa = np.array(pressures_s, dtype=float)/100. # Pressures in raw files as mb*100 - which is Pa - convert to hPa for calcs
         dewpoints_s = np.array(dewpoints_s, dtype=float)/10.
            
         dewpoint_temp = temps_scent-dewpoints_s
        
         sat_vap_pres_s = 611.2*np.exp(17.67*(temps_scent/(temps_scent+243.5)))
         
         vap_press_s = 611.2*np.exp(17.67*(dewpoint_temp/(dewpoint_temp+243.5)))
            
         rel_hum_s = 100.*vap_press_s/sat_vap_pres_s
    
         wvmr_s = 0.622*vap_press_s/(pressures_s-vap_press_s) # kg/kg
        
         kappa=PoissonConstant(wvmr_s)
            
         pot_temp_s = temps_s*((1000./pressures_s_hPa)**(kappa))
            
         wvmr_sat_s = 0.622*sat_vap_pres_s/(pressures_s-sat_vap_pres_s) # kg/kg
            
         sp_hum_s = wvmr_s/(1+wvmr_s)
            
         sat_temp_s = 55.+2840./(3.5*np.log(temps_s)-np.log(vap_press_s/100.)-4.805)
            
         sat_temp_sat_s = 55.+2840./(3.5*np.log(temps_s)-np.log(sat_vap_pres_s/100.)-4.805)
        
         theta_e_s = temps_s*((1000./pressures_s_hPa)**(0.2854*(1.-0.28*wvmr_s)))*np.exp(((3376./sat_temp_s)-2.54)*wvmr_s*(1.+0.81*wvmr_s))
                        
         theta_e_sat_s = temps_s*((1000./pressures_s_hPa)**(0.2854*(1.-0.28*wvmr_sat_s)))*np.exp(((3376./sat_temp_sat_s)-2.54)*wvmr_sat_s*(1.+0.81*wvmr_sat_s))
         
         #surf_parcel_virtual_pot_temp_s=pot_temp_s*(1+wvmr_s/0.621972)/(1+wvmr_s)
            
         
          #print '%s contains no dew point temperatures' % line
         #print temp_lcl
          
         #except Exception, e:
          #print e
          #print line
            
         pot_temp_test=pot_temp_s
         pressures_test=pressures_s_hPa
         temps_test=temps_s
         
            
         try:
          temp_interp = interp_sounding(temps_s, pressures_s,y_points)
                
          #print pressures_s
          dewpoints_interp = interp_sounding(dewpoints_s, pressures_s,y_points)
          #print dewpoints_interp
          winddirs_interp= interp_sounding(np.array(winddirs_s, dtype=float), pressures_s, y_points)
          windspeeds_interp= interp_sounding(np.array(windspeeds_s, dtype=float)/10, pressures_s, y_points)
          #geop_height_interp=interp_sounding(np.array(geop_height_s, dtype=float), pressures_s, y_points)
          geop_height_interp=interp_sounding(geop_height_s.data[~geop_height_s.mask], np.array(pressures_s)[~geop_height_s.mask], y_points)
                
          pot_temp_interp = interp_sounding(pot_temp_s, pressures_s,y_points)
          #print pot_temp_interp
          sat_vap_pres_interp = interp_sounding(sat_vap_pres_s, pressures_s,y_points)
          vap_press_interp = interp_sounding(vap_press_s, pressures_s,y_points)
          rel_hum_interp = interp_sounding(rel_hum_s, pressures_s,y_points)
          #print rel_hum_interp
          wvmr_interp = interp_sounding(wvmr_s, pressures_s,y_points)
          sp_hum_interp = interp_sounding(sp_hum_s, pressures_s,y_points)
          sat_temp_interp = interp_sounding(sat_temp_s, pressures_s,y_points)
          theta_e_interp = interp_sounding(theta_e_s, pressures_s,y_points)
                
          theta_e_sat_interp = interp_sounding(theta_e_sat_s, pressures_s, y_points)
                
          #virtual_pot_temp_interp = interp_sounding(virtual_pot_temp_s, pressures_s, y_points)
  

# Calculate lifting condensation level
    
    # Find point closest to (surface pressure -5 hPa), if it is not too far away (e.g in a sounding where 
    # there are no interpolated values near the surface becuase of no data in the sounding near the surface)
    
          if any(dewpoints_interp[~np.isnan(dewpoints_interp)]) and any(temp_interp[~np.isnan(temp_interp)]) and ('surface_p' in globals()): #If arrays are not totally nan
          
           nan_mask = np.ma.masked_array(np.array(y_points), np.isnan(np.array(dewpoints_interp, dtype=float)) | np.isnan(np.array(temp_interp, dtype=float)))
          
           if np.abs(nan_mask-(surface_p-500.)).min()<700.:   # If there is a point not too far away from (surface pressure - 5 hPa)
            
            i_idx = np.abs(nan_mask-(surface_p-500.)).argmin()  # Closest point to 5 hPa less than surface
                
            if np.isnan(sat_temp_interp[i_idx]):
                print sat_temp_interp
                st=sat_temp_interp
                vp=vap_press_interp
                i_d=i_idx
                nm=nan_mask
                #sf=surf_level
                sp=surface_p
                
            temp_lcl.append(sat_temp_interp[i_idx])
            lcl_method_2.append((y_points[i_idx])*((sat_temp_interp[i_idx])/(temp_interp[i_idx]))**((1004.+1870.*(wvmr_interp[i_idx]))/(287.04*(1+(wvmr_interp[i_idx])/0.621972))))
                
            # Determine planetary boundary layer pressure from parcel vpt method
        
            # Calculate virtual potential temperature of parcel from closest interpolated point to (surface pressure - 5hPa) at all levels
            kappa=PoissonConstant(wvmr_interp)
            surface_vpt=(temp_interp[i_idx]*((100000./y_points)**(kappa)))*((1+wvmr_interp/0.621972)/(1+wvmr_interp)) # Calculate virtual pot temp for surface parcel
         
            # Mask out surface point in virtual potential temperature profile
            mask = np.zeros(surface_vpt.shape, dtype=bool) # all elements False
            mask[i_idx] = True  # Mask out surface point
            parcel_vpt=np.ma.masked_array(surface_vpt, mask)
            #parcel_vpt = np.where(mask, surface_vpt, np.nan)
    
                                                                                # Find index of point closest to minimum difference
    
                                                                                #   vpt_idx=np.nanargmin(np.abs(parcel_vpt-surface_vpt[i_idx]))  
        
            # Interpolate difference of virtual potential temperature profile from surface vpt and find where equal to zero
    
            interp = scipy.interpolate.interp1d((parcel_vpt-surface_vpt[i_idx]),y_points , bounds_error=False, fill_value=np.nan)
            pbl_pressure = float(interp(0.))
            
            #print pbl_pressure
            
            pbl_pressure_l.append(pbl_pressure)  # Interpolate to pressure of zero difference
            
           # Save surface pressure and date 
            surface_p_l.append(surface_p)
           
            dates_for_plotting_single_single.append(date)
            
            if len(pbl_pressure_l)>len(temp_lcl):
                print date
                print pbl_pressure_l
                print temp_lcl
                
            try:
              
                
                equil_level_s, parc_prof, lclp_s,lfcp_s, lclt_s, delta_z, CAPE_s, CIN_s = CapeCinPBLInput(
                                                        y_points/100, 
                                                        temp_interp, 
                                                        temp_interp-dewpoints_interp, 
                                                        geop_height_interp, st_height, pbl_pressure/100)  
                
                
                #print CAPE
            
            except Exception,e:
                #print e
                CAPE_s = np.nan
                CIN_s = np.nan
                lclp_s = np.nan
                lclt_s = np.nan
                lfcp_s = np.nan
                equil_level_s = np.nan
                #pass
            
            CAPE.append(CAPE_s)
            CIN.append(CIN_s)
            lclp.append(lclp_s)
            lclt.append(lclt_s)
            lfcp.append(lfcp_s)
            equil_level.append(equil_level_s)
            
    #Find T=0 pressure
    
            interp = scipy.interpolate.interp1d(temp_interp ,y_points , bounds_error=False, fill_value=np.nan)
            t_zero_line.append(float(interp(273.15)))  # Interpolate to pressure of zero difference
                
 
    # Find point closest to (surface pressure -50 hPa), if it is not too far away (e.g in a sounding where 
    # there are no interpolated values near the surface becuase of no data in the sounding near the surface)
    #  This is for calculating theta-e saturated minus boundary layer theta-e
    
           if np.abs(nan_mask-(surface_p-5000.)).min()<700.:   # If there is a point not too far away from (surface pressure - 50 hPa)
            
            i_idx_50hPa = np.abs(nan_mask-(surface_p-5000.)).argmin()  # Closest point to 50 hPa less than surface
            
    #Theta-e BL minus theta-es local
    
            theta_es_min_theta_e_s = theta_e_interp [i_idx_50hPa] - theta_e_sat_interp
                
            if np.isnan(sat_temp_interp[i_idx_50hPa]):
                print sat_temp_interp
                st=sat_temp_interp
                vp=vap_press_interp
                i_d=i_idx
                nm=nan_mask
                sf=surf_level
                sp=surface_p
                
           # Makes the next stage simpler if all arrays have the same number of soundings
                
           else:
                
            theta_es_min_theta_e_s = np.empty((theta_e_sat_interp.shape))
            theta_es_min_theta_e_s[:] = np.NAN
                      
   
 # Makes the next stage simpler if all arrays have the same number of soundings
                
          else:
                
            theta_es_min_theta_e_s = np.empty((theta_e_sat_interp.shape))
            theta_es_min_theta_e_s[:] = np.NAN
            
            #else:
           # temp_lcl.append(float('NaN'))
           # lcl_method_2.append(float('NaN'))
           # pbl_pressure.append(float('NaN'))
          
          #else:
          # temp_lcl.append(float('NaN'))
           #lcl_method_2.append(float('NaN'))
          # pbl_pressure.append(float('NaN'))
           
          try:
           del surface_p
          except NameError:
           pass
                
          pressures.append(list(y_points))
          temps.append(list(temp_interp))
          dewpoints.append(list(dewpoints_interp))
          winddirs.append(list(winddirs_interp))
          windspeeds.append(list(windspeeds_interp))
          geop_height.append(list(geop_height_interp))
                
          pot_temp.append(list(pot_temp_interp))
          sat_vap_pres.append(list(sat_vap_pres_interp))
          vap_press.append(list(vap_press_interp))
          rel_hum.append(list(rel_hum_interp))
          wvmr.append(list(wvmr_interp))
          sp_hum.append(list(sp_hum_interp))
          sat_temp.append(list(sat_temp_interp))
          theta_e.append(list(theta_e_interp))
                
          theta_e_sat.append(list(theta_e_sat_interp))
                
          theta_es_min_theta_e.append(list(theta_es_min_theta_e_s))
        
          dates_for_plotting_single.append(date)
            
          #virtual_pot_temp.append(list(virtual_pot_temp_interp))
            
         # dates_for_plotting.append(list([dates_for_plotting_single for i in range(len(y_points))]))
          
          #if any(dewpoints_interp[~np.isnan(dewpoints_interp)]):
                
           #pp=pressures_s
           #tt=temps_s
           #ppp=pressures
           #ttt=temps
           #ti=temp_interp
           #vpt=virtual_pot_temp_interp
              
          pressures_s=[]
          temps_s=[]
          dewpoints_s=[]
          winddirs_s=[]
          windspeeds_s=[]
          geop_height_s=[]
            
          temps_s_mask=[]
          pressures_s_mask =[]
          dewpoints_s_mask =[]
            
          temps_scent = []
          temps_s = []
          pressures_s_hPa = []
          dewpoints_s = []
            
          dewpoint_temp = []
        
          pot_temp_s = []
    
          sat_vap_pres_s = []
         
          vap_press_s = []
            
          rel_hum_s = []
    
          wvmr_s =[]
            
          sp_hum_s = []
            
          sat_temp_s =[]
        
          theta_e_s = []
            
          theta_e_sat_s = []
            
          theta_es_min_theta_e_s = []
            
          #virtual_pot_temp_s=[]
            
         except ValueError, e:
          #print e
          #print line
          pass
        # np.append(pressures_for_plotting, (y_points.tolist(), temp_interp.tolist(), dewpoints_interp.tolist(), winddirs_interp.tolist(), windspeeds_interp.tolist()), axis=1)
      # levs=0
       if lev_count is True: 
        
        # Line length varies depending on length of first variable (pressure)
     #   li=len(line)
      # ld=142-len(line)
        #print line[0:2]
        if float(line[0:2])==21:
           try:
            surface_p=float(line[2:8].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN')))
            #print surface_p
           except Exception,e:
             print e
             print line
        try:
         pressures_s.append(float(line[2:8].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN'))))     
         temps_s.append(float(line[15:20].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN')))) 
         dewpoints_s.append(float(line[21:26].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN')))) 
         winddirs_s.append(float(line[26:31].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN')))) 
         windspeeds_s.append(float(line[31:36].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN'))))
         geop_height_s.append(float(line[9:14].replace('-9999','%s' %float('NaN')).replace('-8888','%s' %float('NaN'))))
         
       
         levs+=1
        except Exception,e:
         print e
         print line
      
        #print pressures_s
        
    # Extract independent headers 
    
       #print uwind
       #if match_header_20th.search(line) is not None or match_header_21st.search(line) is not None:
       if match_header.search(line) is not None:
        #print line
        
        year=int(line[6:10])
        month=int(line[10:12])
        day=int(line[12:14])
        hour=int(line[14:16])
        
        date = datetime.datetime(year,month,day,hour)
        
        numlevs=int(line[20:24])
        
        #print numlevs
        
        lev_count=True
        levs=0
        
        pressures_s=[]
        temps_s=[]
        dewpoints_s=[]
        winddirs_s=[]
        windspeeds_s=[]
        geop_height_s=[]
        
      #print pressures.shape  
             
          
      pressures_for_plotting = np.array((pressures, temps, dewpoints, winddirs, windspeeds, pot_temp, sat_vap_pres, vap_press, rel_hum, wvmr, sp_hum, sat_temp, theta_e, theta_e_sat, theta_es_min_theta_e, geop_height))
      single_value_vars = np.array((temp_lcl, lcl_method_2, pbl_pressure_l, surface_p_l, t_zero_line, CAPE, CIN, lclp, lclt, lfcp, equil_level))
        
      print pressures_for_plotting.shape
      np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s'\
              % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d') ), pressures_for_plotting=pressures_for_plotting, single_value_vars=single_value_vars, dates_for_plotting_single=dates_for_plotting_single, dates_for_plotting_single_single=dates_for_plotting_single_single, variable_list=variable_list, variable_list_line=variable_list_line)


-c:40: RuntimeWarning: invalid value encountered in greater
/apps/canopy-1.4.1/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/scipy/interpolate/interpolate.py:500: RuntimeWarning: invalid value encountered in greater
  above_bounds = x_new > self.x[-1]
/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py:870: RuntimeWarning: invalid value encountered in less
  CIN = grav*sum(sum_ascent[((pp_diff-Tk_diff)<0) & (p_diff>lfc_p) & (p_diff<pbl_pressure)])
/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py:754: RuntimeWarning: invalid value encountered in greater
  for i in (where((parcel_profile>temp_k))[0]):
/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py:755: RuntimeWarning: invalid value encountered in less
  if i in (where(parcel_profile<temp_k)[0]+1):
/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py:733: RuntimeWarning: invalid value encountered in greater
  lfc_idx = nanmax(where((parcel_profile>temp_k) & (pres<(nanmax(pres)-50)))[0])
/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py:740: RuntimeWarning: invalid value encountered in greater
  if all(parcel_profile>temp_k):
/nfs/see-fs-01_users/eepdw/python_scripts/Tephigram/Sounding_Routines.py:865: RuntimeWarning: invalid value encountered in greater
  CAPE = grav*sum(sum_ascent[((pp_diff-Tk_diff)>0) & (p_diff>eqlv_p) & (p_diff<lfc_p)])
/apps/canopy-1.4.1/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:607: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/apps/canopy-1.4.1/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/scipy/interpolate/interpolate.py:499: RuntimeWarning: invalid value encountered in less
  below_bounds = x_new < self.x[0]
/apps/canopy-1.4.1/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/scipy/interpolate/interpolate.py:445: RuntimeWarning: divide by zero encountered in true_divide
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/43003.dat
(16, 18886, 200)
/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/43014.dat
(16, 10516, 200)
/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/42867.dat
(16, 17892, 200)

Check CAPE values when geop_height_s interpolated same way as others


In [42]:
import re

station_list_search='/nfs/a90/eepdw/Data/Observations/Radiosonde_downloaded_from_NOAA_GUAN/igra-stations.txt'
station_metadata=[]
f = open(station_list_search,'r')
for line in f:
     line = line.strip()
     line=re.sub(r'([A-Z])\s([A-Z])', r'\1_\2',line)
     line=re.sub(r'([A-Z])\s\s([A-Z])', r'\1_\2',line)
     station_metadata.append(line.split())
f.close()

def station_info_search(stat):
    for line in station_metadata:
         if "%s" % stat in line: 
             st = line[2].lower().title().replace('_',' ')
             lo = float(line[3])
             la = float(line[4])
             st_height = float(line[5])
    return st,la,lo, st_height

In [ ]:
# Histogram plots of year and diurnal time frequency for radiosonde soundings
from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)
    
    grid=DiurnalHourlyGrid(date_min,date_max)
    bins=collections.defaultdict(list)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
              % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
    #pressure_file=npz_file['pressures_for_plotting']
    date_file=npz_file['dates_for_plotting_single']
    
    for di, dates in enumerate(date_file):
   #print idx

        idx=bisect.bisect(grid,dates.timetuple().tm_hour)
        bins[idx-1].append(dates)
        
    no_soundings=[]
    time_soundings=[]
    for i in range(len(bins)):
        no_soundings.append(len(np.array(bins[i])))
        time_soundings.append(grid[i])

    plt.plot(time_soundings, no_soundings)
    plt.xlim(0,23)
    plt.xticks(arange(0,24,4))
    plt.title('%s' % st)
    plt.xlabel('Time (UTC)')
    plt.ylabel('Number of Soundings')
    plt.show()

In [ ]:
# Histogram plots of year and diurnal time frequency for radiosonde soundings
from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

%matplotlib inline
import matplotlib.pyplot as plt
from numpy import arange

grid=YearlyGrid(date_min,date_max)

    for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                       'Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
                       % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
    
    date_file=npz_file['dates_for_plotting_single']
    
    bins=collections.defaultdict(list)
    
    for di, dates in enumerate(date_file):
   #print idx

        idx=bisect.bisect(grid,dates.timetuple().tm_year)
        bins[idx].append(dates)
        
    no_soundings=[]
    time_soundings=[]
    for i in bins:
        no_soundings.append(len(np.array(bins[i])))
        time_soundings.append(grid[i-1])
        #print np.array(bins[i])
        #print grid[i]
    
    #print no_soundings
    #print time_soundings
    
    ind = np.argsort(time_soundings)
    
    #print ind

    plt.plot(np.array(time_soundings)[ind], np.array(no_soundings)[ind])
    plt.xlim(1960,2014)
    #plt.xticks(arange(0,24,4))
    plt.title('%s' % st)
    plt.xlabel('Year')
    plt.ylabel('Number of Soundings')
    plt.show()
    #print bins

In [55]:
import datetime
from dateutil.relativedelta import relativedelta

from calendar import timegm

#date_min=datetime.datetime(1960,5,1,0,0,0)
#date_max=datetime.datetime(2014,10,1,0,0,0)

def YearlyGrid(date_min, date_max):

    delta = relativedelta(years=+1)

    d = date_min

    grid=[]
    while (d <= date_max): 
        grid.append(d.timetuple().tm_year)
        d += delta
    
    return grid

def DiurnalHourlyGrid(date_min, date_max):

    delta = relativedelta(hours=+1)

    d = date_min

    grid=[]
    while ((d <= date_max) and (d.timetuple().tm_hour not in grid)): 
        grid.append(d.timetuple().tm_hour)
        d += delta
    
    return grid

def UnixEpochWeeklyGrid(date_min, date_max):

    delta = relativedelta(weeks=+1)

    d = date_min

    grid=[]
    while ((d <= date_max) and (d.timetuple() not in grid)): 
        grid.append(timegm(d.timetuple()))
        d += delta
    
    return grid

In [21]:
# Weekly Climatology
# Loading individual stations numpy files, sorting into datetime bins
# AND then interpolating for each station 
# BEFORE appending so that all 1-D interpolated values for stations are in one array with distances for gridding and plotting
# Plot variable index must be changed in script (plot_variable)

from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

import numpy as np

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)

#delta = relativedelta(months=+1)
delta = relativedelta(weeks=+1)

#from scipy.interpolate import spline

#import scipy.interpolate.interp1d

def calculate_distance_from_first_station(stat, first_station_lon, first_station_lat, station_lat, station_lon):

 fslat_rad = radians(first_station_lat)
 fslon_rad = radians(first_station_lon)
 lat_rad = radians(station_lat)
 lon_rad = radians(station_lon)

 #Haversine Formula
    
 a = sin((lat_rad-fslat_rad)/2.)**2. + cos(lat_rad) * cos(fslat_rad) * sin((lon_rad-fslon_rad)/2.)**2.
 c = 2. * atan2(sqrt(a), sqrt(1.-a))
 d = 6371. * c

 return d

all_stat_date_range_pressure_interp=[]

def DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta):

    ''' 
    Take datetime.datetime range (date_min_month_bin to date_max_month_bin)
    and create a grid with bins of size delta (input variable) 
    Return a list of timetuples for binning and list of datetimes for plotting or whatever else
    
    Example usage.  In loop, where date is one datetime
    
        bins=collections.defaultdict(list)
    
        idx=bisect.bisect(grid,date.timetuple().tm_yday)
        bins[idx].append((file[i],date))
   '''
    
    grid=[]
    dates_for_plotting =[]
    
    d = date_min_month_bin
    while (d <= date_max_month_bin):
        dates_for_plotting.append(d)
        grid.append(d.timetuple().tm_yday)
        d += delta
    
    return grid, dates_for_plotting
    
grid,dates_for_plotting = DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta)
                 
#bins=collections.defaultdict(list)
#bins_single=collections.defaultdict(list)

for stat in station_list_cs:
  print stat
  date_bin_mean_all_dates_one_station=[]
  date_bin_mean_all_dates_one_station_single=[]
  min_max_date_bin=[]
  bins=collections.defaultdict(list)
  bins_single=collections.defaultdict(list)
        
# Pressure Levels

 #pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
  npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
          % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
  pressure_file=npz_file['pressures_for_plotting']
  date_file=npz_file['dates_for_plotting_single']
  singles_file=npz_file['single_value_vars']
  date_file_single=npz_file['dates_for_plotting_single_single']
  #print '/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s_to_%s.npy'\
  #        % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d') )

  #print pressure_file
  for di, dates in enumerate(date_file):
   #print idx

   idx=bisect.bisect(grid,dates.timetuple().tm_yday)
   bins[idx].append((pressure_file[:,di],dates))
   #print idx
  #print len(bins)
  #print len(grid)
        
  for di, dates in enumerate(date_file_single):
   #print idx

   idx=bisect.bisect(grid,dates.timetuple().tm_yday)
   bins_single[idx].append((singles_file[:,di],dates))
   #print idx
  #print len(bins)
  #print len(grid)
    
  for i in range(len(grid)):
    #if bins[l] is None:
   #bins[l].append(([],[]))
        
   if np.array(bins[i]).size != 0:
            
    empty_array =  np.empty((np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2).shape))
    empty_array[:] = np.NAN
    
    empty_array_single =  np.empty((np.nanmean(np.dstack(np.array(bins_single[i])[:,0]), axis=2).shape))
    empty_array_single[:] = np.NAN
    
    empty_array_list =  (datetime.datetime(1,1,1,1), datetime.datetime(1,1,1,1))
    #empty_array_list[:] = np.NAN
    
    
  for i in range(len(grid)):
    #if bins[l] is None:
   #bins[l].append(([],[]))
        
   if np.array(bins[i]).size != 0:
  #for i in bins:
   #print i
   #print np.array(bins[i])[:,0].shape
  #bins = np.sort(bins, axis=1)
   #print grid[i]   
   #date_bin_mean = np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2)
   #print date_bin_mean
   #nan_mask = np.ma.masked_array(np.array(date_bin_mean, dtype=float), np.isnan(np.array(date_bin_mean, dtype=float)))
            
  # if np.all(np.isnan(np.dstack(np.array(bins[i])[:,0]))) == False:
        
    date_bin_mean_all_dates_one_station.append(np.nanmean(np.dstack(np.array(bins[i])[:,0]), axis=2))
    date_bin_mean_all_dates_one_station_single.append(np.nanmean(np.dstack(np.array(bins_single[i])[:,0]), axis=2))
   #print date_bin_mean
    min_max_date_bin.append((min(np.array(bins[i])[:,1]), max(np.array(bins[i])[:,1])))
    
   elif np.array(bins[i]).size == 0:
     date_bin_mean_all_dates_one_station.append(empty_array)
     date_bin_mean_all_dates_one_station_single.append(empty_array_single)
     min_max_date_bin.append(empty_array_list)
     #print i
    
  #print date_bin_mean_all_dates_one_station

  print np.array(date_bin_mean_all_dates_one_station).shape
  np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Cross_Section_IND_SOUNDING_INTERP_MEAN_Climat_%s_%s_%s_%s' \
       % (date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d'), delta, stat)\
    , date_bin_mean_all_dates_one_station=date_bin_mean_all_dates_one_station, date_bin_mean_all_dates_one_station_single=date_bin_mean_all_dates_one_station_single, min_max_date_bin=min_max_date_bin, dates_for_plotting=dates_for_plotting)


[121, 128, 135, 142, 149, 156, 163, 170, 177, 184, 191, 198, 205, 212, 219, 226, 233, 240, 247, 254, 261, 268]
43003
(22, 16, 200)
43014
(22, 16, 200)
42867
(22, 16, 200)
43371
(22, 16, 200)
43353
(22, 16, 200)
43285
(22, 16, 200)
43192
(22, 16, 200)
43150
(22, 16, 200)
42339
(22, 16, 200)
40990
(22, 16, 200)
40948
(22, 16, 200)

In [87]:
# Theta-e Theta-es experimentation

from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

import numpy as np

import matplotlib.pyplot as plt

import datetime

station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948]

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}

#delta = relativedelta(months=+1)
delta = relativedelta(weeks=+1)

def variable_name_index_match(variable, variable_list):
    for key, value in variable_list.iteritems():   # iter on both keys and values
        if key.startswith('%s' % variable) and key.endswith('%s' % variable):
            arr_index_var=value 
    assert 'arr_index_var' in locals(), "Cannot find index of variable name."
    return arr_index_var

def DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta):

    ''' 
    Take datetime.datetime range (date_min_month_bin to date_max_month_bin)
    and create a grid with bins of size delta (input variable) 
    Return a list of timetuples for binning and list of datetimes for plotting or whatever else
    
    Example usage.  delta = relativedelta(weeks=+1)
    
    In loop, where date is one datetime
    
        idx=bisect.bisect(grid,date.timetuple().tm_yday)
        bins[idx].append((file[i],date))
   '''
    
    grid=[]
    dates_for_plotting =[]
    
    d = date_min_month_bin
    while (d <= date_max_month_bin):
        dates_for_plotting.append(d)
        grid.append(d.timetuple().tm_yday)
        d += delta
    
    return grid, dates_for_plotting
    
grid,dates_for_plotting = DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta)

# Find point closest to (surface pressure -50 hPa), if it is not too far away (e.g in a sounding where 
# there are no interpolated values near the surface becuase of no data in the sounding near the surface)
#  This is for calculating theta-e saturated minus boundary layer theta-e
    
    
variable='theta_e'
theta_e_index = variable_name_index_match(variable, variable_list)

variable='pressures'
pressure_index = variable_name_index_match(variable, variable_list)

variable='surface_pressure'
sp_index = variable_name_index_match(variable, variable_list_line)

for stat in station_list_cs:
  print stat
  date_bin_mean_all_dates_one_station=[]
  date_bin_mean_all_dates_one_station_single=[]
  min_max_date_bin=[]
  bins_theta_e=collections.defaultdict(list)
  bins_datetime=collections.defaultdict(list)
        
# Pressure Levels

 #pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
  npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
          % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
  pressure_file=npz_file['pressures_for_plotting']
  date_file=npz_file['dates_for_plotting_single']
  singles_file=npz_file['single_value_vars']
  date_file_single=npz_file['dates_for_plotting_single_single']
    
  
  theta_e_surface=[]
  date_theta_e_surface=[]
    
  for so, sounding_datetime in enumerate(date_file_single):
        
    surface_pressure = singles_file[sp_index, so]
    theta_e = pressure_file[theta_e_index, so]
    pressures = pressure_file[pressure_index, so]
    
    pressure_nan_mask = np.ma.masked_array(np.array(pressures), 
                       np.isnan(np.array(pressures, dtype=float)))
    
 

    #If not totally nan    
    if any(theta_e[~np.isnan(theta_e)]):
    # If there is a point not too far away from (surface pressure - 50 hPa)
        if np.abs(pressure_nan_mask-(surface_pressure-5000.)).min()<700.:   
                   
            i_idx_50hPa = np.abs(pressure_nan_mask-(surface_pressure-5000.)).argmin()  # Closest point to 50 hPa less than surface
               
            week_bin_idx=bisect.bisect(grid,sounding_datetime.timetuple().tm_yday)
            bins_theta_e[week_bin_idx].append((theta_e[i_idx_50hPa])
            
        
  mean=[]
  time_soundings=[]
  for i in bins_theta_e:
      if np.array(bins_theta_e[i]).size != 0:
            mean.append(np.nanmean
                            (np.dstack(np.array(bins_theta_e[i]))))
            time_soundings.append(datetime.datetime.fromtimestamp(grid[i-1]))
            
  np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/theta_e_surface_parcel_weekly_mean_%s' % stat, 
                           theta_e_boundary_mean=mean, datetimes=dates_for_plotting )
            
            
        #plt.plot_date(date_theta_e_surface, theta_e_surface)
    #plt.xlim(1960,2014)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    #plt.title('%s' % st)
  #plt.xlabel('Year')
  #plt.ylabel('Hours between soundings')
  #plt.show()   

            
  
#print theta_e_surface
  
    
  

  
            
    #Theta-e BL minus theta-es local
    
    #        theta_es_min_theta_e_s = theta_e_interp [i_idx_50hPa] - theta_e_sat_interp


43003
43014
42867
43371
43353
43285
43192
43150
42339
40990
40948

In [158]:
# Theta-e Theta-es experimentation

from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

import numpy as np

import matplotlib.pyplot as plt

import datetime

station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948]

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}

#delta = relativedelta(months=+1)
delta = relativedelta(weeks=+1)

def variable_name_index_match(variable, variable_list):
    for key, value in variable_list.iteritems():   # iter on both keys and values
        if key.startswith('%s' % variable) and key.endswith('%s' % variable):
            arr_index_var=value 
    assert 'arr_index_var' in locals(), "Cannot find index of variable name."
    return arr_index_var

def DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta):

    ''' 
    Take datetime.datetime range (date_min_month_bin to date_max_month_bin)
    and create a grid with bins of size delta (input variable) 
    Return a list of timetuples for binning and list of datetimes for plotting or whatever else
    
    Example usage.  delta = relativedelta(weeks=+1)
    
    In loop, where date is one datetime
    
        idx=bisect.bisect(grid,date.timetuple().tm_yday)
        bins[idx].append((file[i],date))
   '''
    
    grid=[]
    dates_for_plotting =[]
    
    d = date_min_month_bin
    while (d <= date_max_month_bin):
        dates_for_plotting.append(d)
        grid.append(d.timetuple().tm_yday)
        d += delta
    
    return grid, dates_for_plotting
    
grid,dates_for_plotting = DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta)

# Find point closest to (surface pressure -50 hPa), if it is not too far away (e.g in a sounding where 
# there are no interpolated values near the surface becuase of no data in the sounding near the surface)
#  This is for calculating theta-e saturated minus boundary layer theta-e
    
    
variable='theta_e_sat'
theta_e_index = variable_name_index_match(variable, variable_list)

variable='pressures'
pressure_index = variable_name_index_match(variable, variable_list)

variable='surface_pressure'
sp_index = variable_name_index_match(variable, variable_list_line)

for stat in station_list_cs:
  print stat
  date_bin_mean_all_dates_one_station=[]
  date_bin_mean_all_dates_one_station_single=[]
  min_max_date_bin=[]
  bins_theta_e=collections.defaultdict(list)
  bins_datetime=collections.defaultdict(list)
        
# Pressure Levels

 #pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
  npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
          % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
  pressure_file=npz_file['pressures_for_plotting']
  date_file=npz_file['dates_for_plotting_single']
  singles_file=npz_file['single_value_vars']
  date_file_single=npz_file['dates_for_plotting_single_single']
    
  
  theta_e_surface=[]
  date_theta_e_surface=[]
    
  for so, sounding_datetime in enumerate(date_file_single):
        
    surface_pressure = singles_file[sp_index, so]
    theta_e = pressure_file[theta_e_index, so]
    pressures = pressure_file[pressure_index, so]
    
    pressure_nan_mask = np.ma.masked_array(np.array(pressures), 
                       np.isnan(np.array(pressures, dtype=float)))
    
 

    #If not totally nan    
    if any(theta_e[~np.isnan(theta_e)]):
    # If there is a point not too far away from (surface pressure - 50 hPa)
        if np.abs(pressure_nan_mask-(surface_pressure-5000.)).min()<700.:   
                   
            i_idx_50hPa = np.abs(pressure_nan_mask-(surface_pressure-5000.)).argmin()  # Closest point to 50 hPa less than surface
               
            week_bin_idx=bisect.bisect(grid,sounding_datetime.timetuple().tm_yday)
            bins_theta_e[week_bin_idx].append(theta_e[i_idx_50hPa])
            
        
  mean=[]
  time_soundings=[]
  for i in bins_theta_e:
      if np.array(bins_theta_e[i]).size != 0:
            mean.append(np.nanmean
                            (np.dstack(np.array(bins_theta_e[i]))))
            time_soundings.append(datetime.datetime.fromtimestamp(grid[i-1]))
            
  np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/theta_e_sat_surface_parcel_weekly_mean_%s' % stat, 
                           theta_e_boundary_mean=mean, datetimes=dates_for_plotting )
            
            
        #plt.plot_date(date_theta_e_surface, theta_e_surface)
    #plt.xlim(1960,2014)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    #plt.title('%s' % st)
  #plt.xlabel('Year')
  #plt.ylabel('Hours between soundings')
  #plt.show()   

            
  
#print theta_e_surface
  
    
  

  
            
    #Theta-e BL minus theta-es local
    
    #        theta_es_min_theta_e_s = theta_e_interp [i_idx_50hPa] - theta_e_sat_interp


43003
43014
42867
43371
43353
43285
43192
43150
42339
40990
40948

In [126]:
# Theta-e Theta-es experimentation

from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

import numpy as np

import matplotlib.pyplot as plt

import datetime

station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948]

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}

hpa_level=500

#delta = relativedelta(months=+1)
delta = relativedelta(weeks=+1)

def variable_name_index_match(variable, variable_list):
    for key, value in variable_list.iteritems():   # iter on both keys and values
        if key.startswith('%s' % variable) and key.endswith('%s' % variable):
            arr_index_var=value 
    assert 'arr_index_var' in locals(), "Cannot find index of variable name."
    return arr_index_var

def DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta):

    ''' 
    Take datetime.datetime range (date_min_month_bin to date_max_month_bin)
    and create a grid with bins of size delta (input variable) 
    Return a list of timetuples for binning and list of datetimes for plotting or whatever else
    
    Example usage.  delta = relativedelta(weeks=+1)
    
    In loop, where date is one datetime
    
        idx=bisect.bisect(grid,date.timetuple().tm_yday)
        bins[idx].append((file[i],date))
   '''
    
    grid=[]
    dates_for_plotting =[]
    
    d = date_min_month_bin
    while (d <= date_max_month_bin):
        dates_for_plotting.append(d)
        grid.append(d.timetuple().tm_yday)
        d += delta
    
    return grid, dates_for_plotting
    
grid,dates_for_plotting = DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta)

# Find point closest to (surface pressure -50 hPa), if it is not too far away (e.g in a sounding where 
# there are no interpolated values near the surface becuase of no data in the sounding near the surface)
#  This is for calculating theta-e saturated minus boundary layer theta-e
    
    
variable='theta_e_sat'
theta_e_sat_index = variable_name_index_match(variable, variable_list)

variable='pressures'
pressure_index = variable_name_index_match(variable, variable_list)

variable='surface_pressure'
sp_index = variable_name_index_match(variable, variable_list_line)

for stat in station_list_cs:
  print stat
  date_bin_mean_all_dates_one_station=[]
  date_bin_mean_all_dates_one_station_single=[]
  min_max_date_bin=[]
  bins_theta_e=collections.defaultdict(list)
  bins_datetime=collections.defaultdict(list)
        
# Pressure Levels

 #pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
  npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
          % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
  pressure_file=npz_file['pressures_for_plotting']
  date_file=npz_file['dates_for_plotting_single']
  singles_file=npz_file['single_value_vars']
  date_file_single=npz_file['dates_for_plotting_single_single']
    
  
  theta_e_surface=[]
  date_theta_e_surface=[]
    
  for so, sounding_datetime in enumerate(date_file_single):
        
    surface_pressure = singles_file[sp_index, so]
    theta_e_sat = pressure_file[theta_e_sat_index, so]
    pressures = pressure_file[pressure_index, so]
    
    pressure_nan_mask = np.ma.masked_array(np.array(pressures), 
                       np.isnan(np.array(pressures, dtype=float)))
    
 

    #If not totally nan    
    if any(theta_e_sat[~np.isnan(theta_e_sat)]):
    # If there is a point not too far away from (surface pressure - 50 hPa)
        if np.abs(pressure_nan_mask-(surface_pressure-5000.)).min()<700.:   
                   
            #i_idx_50hPa = np.abs(pressure_nan_mask-(surface_pressure-5000.)).argmin()  # Closest point to 50 hPa less than surface
            i_idx_hPa = np.abs(pressure_nan_mask-(hpa_level*100)).argmin()  # Closest point to 600 hPa
            week_bin_idx=bisect.bisect(grid,sounding_datetime.timetuple().tm_yday)
            bins_theta_e[week_bin_idx].append(np.mean(theta_e_sat[i_idx_hPa]))
            
        
  mean=[]
  time_soundings=[]
  for i in bins_theta_e:
      if np.array(theta_e_sat[i]).size != 0:
            mean.append(np.nanmean
                            (np.dstack(np.array(bins_theta_e[i]))))
            time_soundings.append(datetime.datetime.fromtimestamp(grid[i-1]))
            
  np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/theta_e_sat_%s_hPa_surface_parcel_weekly_mean_%s' % (hpa_level, stat), 
                           theta_e_boundary_mean=mean, datetimes=dates_for_plotting )
            
            
        #plt.plot_date(date_theta_e_surface, theta_e_surface)
    #plt.xlim(1960,2014)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    #plt.title('%s' % st)
  #plt.xlabel('Year')
  #plt.ylabel('Hours between soundings')
  #plt.show()   

            
  
#print theta_e_surface
  
    
  

  
            
    #Theta-e BL minus theta-es local
    
    #        theta_es_min_theta_e_s = theta_e_interp [i_idx_50hPa] - theta_e_sat_interp


43003
43014
42867
43371
43353
43285
43192
43150
42339
40990
40948

In [155]:
# Theta-e Theta-es experimentation

from dateutil.relativedelta import relativedelta
from math import sin, cos, atan2, radians, sqrt
from collections import defaultdict
import collections
import bisect

import numpy as np

import matplotlib.pyplot as plt

import datetime

station_list_cs=[43003, 43014, 42867, 43371, 43353, 43285,  43192, 43150, 42339, 40990, 40948]

date_min=datetime.datetime(1960,5,1,0,0,0)
date_max=datetime.datetime(2014,10,1,0,0,0)

date_min_month_bin=datetime.datetime(2011,5,1,0,0,0)
date_max_month_bin=datetime.datetime(2011,10,1,0,0,0)

variable_list={'pressures': 0, 'temps':1, 'dewpoints':2, 'winddirs':3, 'windspeeds':4, 'pot_temp':5, 
               'sat_vap_pres':6, 'vap_press':7, 'rel_hum':8, 'wvmr':9, 'sp_hum':10, 'sat_temp':11, 
               'theta_e':12, 'theta_e_sat':13, 'theta_e_minus_theta_e_sat':14}
variable_list_line={'lcl_temp': 0, 'lcl_vpt':1, 'pbl_pressure':2, 'surface_pressure':3, 'T_eq_0':4, 
                    'CAPE':5, 'CIN':6, 'lclp':7, 'lclt':8, 'lfcp':9, 'equil_level':10}

hpa_level_lower=800
hpa_level_higher=850

#delta = relativedelta(months=+1)
delta = relativedelta(weeks=+1)

def variable_name_index_match(variable, variable_list):
    for key, value in variable_list.iteritems():   # iter on both keys and values
        if key.startswith('%s' % variable) and key.endswith('%s' % variable):
            arr_index_var=value 
    assert 'arr_index_var' in locals(), "Cannot find index of variable name."
    return arr_index_var

def DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta):

    ''' 
    Take datetime.datetime range (date_min_month_bin to date_max_month_bin)
    and create a grid with bins of size delta (input variable) 
    Return a list of timetuples for binning and list of datetimes for plotting or whatever else
    
    Example usage.  delta = relativedelta(weeks=+1)
    
    In loop, where date is one datetime
    
        idx=bisect.bisect(grid,date.timetuple().tm_yday)
        bins[idx].append((file[i],date))
   '''
    
    grid=[]
    dates_for_plotting =[]
    
    d = date_min_month_bin
    while (d <= date_max_month_bin):
        dates_for_plotting.append(d)
        grid.append(d.timetuple().tm_yday)
        d += delta
    
    return grid, dates_for_plotting
    
grid,dates_for_plotting = DateTimeBinGridCreate(date_min_month_bin, date_max_month_bin, delta)

# Find point closest to (surface pressure -50 hPa), if it is not too far away (e.g in a sounding where 
# there are no interpolated values near the surface becuase of no data in the sounding near the surface)
#  This is for calculating theta-e saturated minus boundary layer theta-e
    
    
variable='theta_e_sat'
theta_e_sat_index = variable_name_index_match(variable, variable_list)

variable='pressures'
pressure_index = variable_name_index_match(variable, variable_list)

variable='surface_pressure'
sp_index = variable_name_index_match(variable, variable_list_line)

for stat in station_list_cs:
  print stat
  date_bin_mean_all_dates_one_station=[]
  date_bin_mean_all_dates_one_station_single=[]
  min_max_date_bin=[]
  bins_theta_e=collections.defaultdict(list)
  bins_datetime=collections.defaultdict(list)
        
# Pressure Levels

 #pressure_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES_%s_%s.npy' % (stat, date_title))
  npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
          % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
  pressure_file=npz_file['pressures_for_plotting']
  date_file=npz_file['dates_for_plotting_single']
  singles_file=npz_file['single_value_vars']
  date_file_single=npz_file['dates_for_plotting_single_single']
    
  
  theta_e_surface=[]
  date_theta_e_surface=[]
    
  for so, sounding_datetime in enumerate(date_file_single):
        
    surface_pressure = singles_file[sp_index, so]
    theta_e_sat = pressure_file[theta_e_sat_index, so]
    pressures = pressure_file[pressure_index, so]
    
    pressure_nan_mask = np.ma.masked_array(np.array(pressures), 
                       np.isnan(np.array(pressures, dtype=float)))
    
 

    #If not totally nan    
    if any(theta_e_sat[~np.isnan(theta_e_sat)]):
    # If there is a point not too far away from (surface pressure - 50 hPa)
        if np.abs(pressure_nan_mask-(surface_pressure-5000.)).min()<700.:   
                   
            i_idx_hPa_lower = np.abs(pressure_nan_mask-(hpa_level_lower*100)).argmin()  # Closest point to 50 hPa less than surface
            i_idx_hPa_higher = np.abs(pressure_nan_mask-(hpa_level_higher*100)).argmin()  # Closest point to 600 hPa
            week_bin_idx=bisect.bisect(grid,sounding_datetime.timetuple().tm_yday)
            bins_theta_e[week_bin_idx].append(np.mean(theta_e_sat[i_idx_hPa_lower:i_idx_hPa_higher]))
            
        
  mean=[]
  time_soundings=[]
  for i in bins_theta_e:
      if np.array(theta_e_sat[i]).size != 0:
            mean.append(np.nanmean
                            (np.dstack(np.array(bins_theta_e[i]))))
            time_soundings.append(datetime.datetime.fromtimestamp(grid[i-1]))
            
  np.savez('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/theta_e_sat_%s_to_%s_hPa_surface_parcel_weekly_mean_%s' 
                           % (hpa_level_lower, hpa_level_higher, stat), 
                           theta_e_boundary_mean=mean, datetimes=dates_for_plotting )


43003
43014
42867
43371
43353
43285
43192
43150
42339
40990
40948

In [156]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

hpa_level_lower=800
hpa_level_higher=850

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_sat_%s_to_%s_hPa_surface_parcel_weekly_mean_%s.npz'\
                       % (hpa_level_lower, hpa_level_higher, stat))
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    #
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    
plt.ylim(360,380)    
plt.title('%s to %s hPa' % (hpa_level_lower, hpa_level_higher))
plt.xlabel('Week')
plt.ylabel('Theta-e at %s hPa' % hpa_level)    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [149]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

hpa_level_lower=775
hpa_level_higher=825

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_sat_%s_to_%s_hPa_surface_parcel_weekly_mean_%s.npz'\
                       % (hpa_level_lower, hpa_level_higher, stat))
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    #
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    
plt.ylim(360,380)    
plt.title('%s to %s hPa' % (hpa_level_lower, hpa_level_higher))
plt.xlabel('Week')
plt.ylabel('Theta-e at %s hPa' % hpa_level)    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [152]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

hpa_level_lower=475
hpa_level_higher=525

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_sat_%s_to_%s_hPa_surface_parcel_weekly_mean_%s.npz'\
                       % (hpa_level_lower, hpa_level_higher, stat))
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    #
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    
plt.ylim(340,360)    
plt.title('%s to %s hPa' % (hpa_level_lower, hpa_level_higher))
plt.xlabel('Week')
plt.ylabel('Theta-e at %s hPa' % hpa_level)    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [136]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

hpa_level=500

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_sat_%s_hPa_surface_parcel_weekly_mean_%s.npz'\
                       % (hpa_level, stat))
  
    
    #helper = np.vectorize(lambda x: x.total_seconds())
    #dt_sec = helper(np.diff(date_file))
    
    
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    #plt.ylim(345,355)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    plt.title('%s hPa' % hpa_level)
    plt.xlabel('Week')
    plt.ylabel('Theta-e at %s hPa' % hpa_level)
    
    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [137]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

hpa_level=800

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_sat_%s_hPa_surface_parcel_weekly_mean_%s.npz'\
                       % (hpa_level, stat))
  
    
    #helper = np.vectorize(lambda x: x.total_seconds())
    #dt_sec = helper(np.diff(date_file))
    
    
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    plt.ylim(360,380)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    plt.title('%s hPa' % hpa_level)
    plt.xlabel('Week')
    plt.ylabel('Theta-e at %s hPa' % hpa_level)
    
    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [154]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_surface_parcel_weekly_mean_%s.npz'\
                       % (stat))
  
    
    #helper = np.vectorize(lambda x: x.total_seconds())
    #dt_sec = helper(np.diff(date_file))
    
    
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    plt.ylim(340,360)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    plt.title('Theta-e 50hPa above surface')
    plt.xlabel('Week')
    plt.ylabel('Theta-e 50hPa above surface')
    
    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [163]:
%matplotlib inline 

fig=plt.figure(figsize=(14,8.5))

station_list_cs=[43150, 42867, 43014, 42339, 40990]

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_sat_surface_parcel_weekly_mean_%s.npz'\
                       % (stat))
  
    
    #helper = np.vectorize(lambda x: x.total_seconds())
    #dt_sec = helper(np.diff(date_file))
    
    
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], fmt="-", label=st)
    plt.ylim(370,400)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    plt.title('Theta-e sat 50hPa above surface')
    plt.xlabel('Week')
    plt.ylabel('Theta-e sat 50hPa above surface')
    
    
fig.autofmt_xdate()    
plt.legend()
plt.show()



In [90]:
%matplotlib inline 

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                        'theta_e_surface_parcel_weekly_mean_%s.npz'\
                       % (stat))
  
    
    #helper = np.vectorize(lambda x: x.total_seconds())
    #dt_sec = helper(np.diff(date_file))
    
    fig=plt.figure()
    
    plt.plot_date(npz_file['datetimes'], npz_file['theta_e_boundary_mean'], linestyle='-')
    #plt.xlim(1960,2014)
    #plt.yticks(arange(0,24,4))
    #plt.ylim(0,24)
    plt.title('%s' % st)
    plt.xlabel('Week')
    plt.ylabel('Theta-e 50hPa above surface')
    
    fig.autofmt_xdate()
    
    
    plt.show()



In [38]:
r=np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/theta_e_surface_parcel_weekly_mean_43003.npz')

In [40]:
r['theta_e_boundary_mean']


Out[40]:
array([ 353.54791991,  354.2578714 ,  354.39552744,  354.26418255,
        355.0806987 ,  353.54271795,  355.04223005,  354.28580491,
        352.53515911,  353.355431  ,  354.2508106 ,  354.24389635,
        352.93318612,  353.93705154,  353.18290331,  354.05324208,
        353.17461834,  353.62409618,  353.12458332,  354.35499029,
        354.32395392,  353.2281764 ])

Wind vectors and barbs on same plot

FTP code for station list


In [99]:
# Time difference between plots

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                       'Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
                       % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
    
    #pressure_file=npz_file['pressures_for_plotting']
    date_file=npz_file['dates_for_plotting_single']
    
    helper = np.vectorize(lambda x: x.total_seconds())
    dt_sec = helper(np.diff(date_file))
    
    
    
    plt.plot_date(date_file[1::], dt_sec/3600)
    #plt.xlim(1960,2014)
    plt.yticks(arange(0,24,4))
    plt.ylim(0,24)
    plt.title('%s' % st)
    plt.xlabel('Year')
    plt.ylabel('Hours between soundings')
    plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-99-9b0d298b55cc> in <module>()
     15 
     16 
---> 17     plt.plot_date(date_file[1::], dt_sec/3600)
     18     #plt.xlim(1960,2014)
     19     plt.yticks(arange(0,24,4))

NameError: name 'plt' is not defined

In [ ]:
# Time difference between plots

grid=UnixEpochWeeklyGrid(date_min, date_max)

for stat in station_list_cs:
    
    st,la,lo, st_height = station_info_search(stat)

    npz_file = np.load('/nfs/a90/eepdw/Data/Observations/Radiosonde_Numpy/'\
                       'Radiosonde_Single_Station_PRESSURES__IND_INTERP_SOUNDING_%s_%s_to_%s.npz'\
                       % (stat, date_min.strftime('%Y%m%d'), date_max.strftime('%Y%m%d')))
    
    date_file=npz_file['dates_for_plotting_single']
    
    bins=collections.defaultdict(list)
    
    #helper = np.vectorize(lambda x: x.total_seconds())
    #dt_sec = helper(np.diff(date_file))
    
    helper = np.vectorize(lambda x: x.total_seconds())
    dt_sec = helper(np.diff(date_file))
    
    for di, dates in enumerate(date_file):
   #print idx

        idx=bisect.bisect(grid,timegm(dates.timetuple()))
        bins[idx].append(dt_sec[di-1])
        
    
    #print bins  
    time_soundings=[]
    mean_diff=[]
    for i in bins:
        if np.array(bins[i]).size != 0:
            mean_diff.append(np.nanmean
                            (np.dstack(np.array(bins[i]))))
            time_soundings.append(datetime.datetime.fromtimestamp(grid[i-1]))
   
    plt.plot_date(time_soundings, np.array(mean_diff)/3600)
    #plt.xlim(1960,2014)
    plt.yticks(arange(0,24,4))
    plt.ylim(0,24)
    plt.title('%s' % st)
    plt.xlabel('Year')
    plt.ylabel('Hours between soundings')
    plt.show()