In [ ]:
%pylab notebook
import matplotlib.pyplot as plt
import pandas as pd
import re

In [ ]:
%cd /Users/brodzik/projects/MODICE/data/sii
%ls

In [ ]:
#daily = pd.read_csv('daily.csv')

In [ ]:
monthly = pd.read_csv('monthly.csv')

In [ ]:
monthly

Slicing monthly files

  1. only keep hemisphere='N'
  2. only keep dates since Jan 1999
  3. validate that "missing" columns are zeroes and drop them </ol> Then set index to date.
  4. 
    
    In [ ]:
    monthly.shape
    
    
    
    In [ ]:
    monthly = monthly[monthly['hemisphere'] == 'N']
    
    
    
    In [ ]:
    monthly.shape
    
    
    
    In [ ]:
    monthly.loc[:,'date'] = pd.to_datetime(monthly['month'])
    
    
    
    In [ ]:
    # Set the month column to the DataFrame index
    monthly.set_index('date', inplace=True, verify_integrity=True, drop=True)
    
    
    
    In [ ]:
    monthly = monthly[monthly.index > '1998-12-31']
    
    
    
    In [ ]:
    monthly.columns
    
    
    
    In [ ]:
    monthly.shape
    
    
    
    In [ ]:
    for column in monthly.columns:
        matched = re.search(r"missing_km2", column)
        if matched:
            print("%s: " % (column))
            print(monthly[column].min(), monthly[column].max())
            del monthly[column]
    
    
    
    In [ ]:
    monthly.shape
    
    
    
    In [ ]:
    monthly['meier2007_laptev_area_km2'].plot()
    
    
    
    In [ ]:
    fig, ax = plt.subplots(15, figsize=(8,25))
    i = 0
    for column in monthly.columns:
        matched = re.search(r"area_km2", column)
        if matched:
            print("%s: %d" % (column, i))
            monthly[column].plot(ax=ax[i], sharey=True, title=column)
            i = i + 1
    fig.tight_layout()
    fig.savefig("nsidc0051_area_by_region.png")
    

    Convert from monthly time series to years time series

    
    
    In [ ]:
    monthly
    
    
    
    In [ ]:
    def convert_column_to_matrix(df, column):
        short_column = column
        short_column = re.sub("meier2007_", "", short_column)
        nyears = 17
        nmonths = 12
        years = np.arange(nyears) + 1999
        months = np.arange(nmonths) + 1
        column_names = ["%02d_%s" % (month, short_column) for month in months]
        data = pd.DataFrame(index=years, columns=column_names)
        for year in years:
            for month in months:
                yyyymm = "%4d-%02d" % (year, month)
                data.loc[year, column_names[month-1]] = df.get_value(
                    index=pd.to_datetime(yyyymm), col=column)
        return(data)
    
    
    
    In [ ]:
    start=True
    for column in monthly.columns:
        matched = re.search(r"area_km2|extent_km2", column)
        if matched:
            print("%s: " % (column))
            new = convert_column_to_matrix(monthly, column)
            if start: 
                all = new.copy()
                start=False
            else:
                all = pd.concat([all, new], axis=1)
    
    
    
    In [ ]:
    all.shape
    
    
    
    In [ ]:
    all
    
    
    
    In [ ]:
    col = 'beaufort_extent_km2'
    
    print("from monthly: %f" % monthly.get_value(index=pd.to_datetime('2008-06-01'), col='meier2007_'+col))
    print("from all    : %f" % all.get_value(index=2008, col='06_' + col))
    
    
    
    In [ ]:
    del monthly['month']
    
    
    
    In [ ]:
    monthly.to_csv('nsidc0051_monthly_tseries.csv', sep='\t')
    
    
    
    In [109]:
    all.to_csv('nsidc0051_year_by_month.csv', index_label='Year')
    
    
    
    In [110]:
    %pwd
    
    
    
    
    Out[110]:
    u'/Users/brodzik/projects/MODICE/data/sii'
    
    
    In [111]:
    %more nsidc0051_year_by_month.csv
    
    
    
    In [108]:
    all.columns
    
    
    
    
    Out[108]:
    Index([u'01_total_extent_km2', u'02_total_extent_km2', u'03_total_extent_km2',
           u'04_total_extent_km2', u'05_total_extent_km2', u'06_total_extent_km2',
           u'07_total_extent_km2', u'08_total_extent_km2', u'09_total_extent_km2',
           u'10_total_extent_km2',
           ...
           u'03_chukchi_area_km2', u'04_chukchi_area_km2', u'05_chukchi_area_km2',
           u'06_chukchi_area_km2', u'07_chukchi_area_km2', u'08_chukchi_area_km2',
           u'09_chukchi_area_km2', u'10_chukchi_area_km2', u'11_chukchi_area_km2',
           u'12_chukchi_area_km2'],
          dtype='object', length=360)
    
    
    In [ ]: