Create a month/day by Year view of the daily sea ice index data.


In [ ]:
tmp_dir = "../data"

In [ ]:
!mkdir -p ../data
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/north/daily/data/NH_seaice_extent_final.csv
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/north/daily/data/NH_seaice_extent_nrt.csv
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/south/daily/data/SH_seaice_extent_final.csv
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/south/daily/data/SH_seaice_extent_nrt.csv

Variables to set before running:


In [ ]:
climatology_years = (1981, 2010)

In [ ]:
import datetime as dt
import numpy as np

import os
import pandas as pd
from pandas import ExcelWriter
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
pd.options.display.mpl_style = 'default'

In [ ]:
def parse_the_date(year, mm, dd):
    return dt.date(int(year), int(mm), int(dd))

def slurp_csv(filename):
    data = pd.read_csv(filename, header = None, skiprows=2,
                       names=["year", "mm", "dd", "extent", "missing", "source"],
                       parse_dates={'date':['year', 'mm', 'dd']},
                       date_parser=parse_the_date, index_col='date')
    data = data.drop(['missing', 'source'], axis=1)
    return data


def read_a_hemisphere(hemisphere):
    the_dir = '../data'
    final_prod_filename = os.path.join(the_dir, '{hemi}H_seaice_extent_final.csv'.format(hemi=hemisphere[0:1].upper()))
    nrt_prod_filename = os.path.join(the_dir, '{hemi}H_seaice_extent_nrt.csv'.format(hemi=hemisphere[0:1].upper()))

    final = slurp_csv(final_prod_filename)
    nrt = slurp_csv(nrt_prod_filename)
    all_data = pd.concat([final, nrt])
    all_data.index = pd.to_datetime(all_data.index)
    all_data  = all_data.reindex(index=pd.date_range('1978-10-25', dt.date.today().strftime('%Y-%m-%d')))
    all_data['hemi'] = hemisphere
    return all_data

In [ ]:
def interpolate_column_excluding_extended_missing_periods(df_in, column, interpolated_column):
    df = df_in.copy()
    df['backfill'] = df[column].fillna(method='bfill', limit=1)
    df['forwardfill'] = df[column].fillna(method='ffill', limit=1)
    is_really_nan = pd.isnull(df['backfill']) | pd.isnull(df['forwardfill'])
    df[interpolated_column] = df[column].interpolate()
    df.loc[is_really_nan,interpolated_column] = np.nan
    df = df.drop(['forwardfill', 'backfill'], axis=1)
    return df

In [ ]:
def clim_string(climatology_years):
    return  '{0}-{1}'.format(climatology_years[0], climatology_years[1])

def get_climatological_means(df, column, climatology_years):
    clim = df[(df.index.year >= climatology_years[0]) & (df.index.year <= climatology_years[1] )].copy()
    clim = clim.groupby([clim.index.month, clim.index.day]).mean()[[column]]
    clim = clim.rename(columns={column: clim_string(climatology_years)})
    return clim

In [ ]:
import calendar
month_names = [calendar.month_name[x] for x in range(1,13)]

def prepare_daily_dataframe(column, means, df_in, title):
    df = df_in.copy()
    df = df[[column]].set_index([df.index.year, df.index.month, df.index.day]).unstack(0)
    df.columns = df.columns.droplevel(0)
    space = means.copy()
    space['1981-2010'] = "    "
    space.rename(columns={'1981-2010': '   '}, inplace=True)
    df = pd.concat([df, space, means.copy()], axis=1)
    df[column] = title
    df.set_index(column, append=True, inplace=True)
    df = df.unstack(column)
    df.columns = df.columns.reorder_levels([column, None])
    df.index = df.index.set_levels(month_names, level=0)
    return df

In [ ]:
def to_mon_day_rows_year_cols(column, df, title):
    a = df.copy()
    a = a[[column]].set_index([a.index.year, a.index.month, a.index.day]).unstack(0)
    a.columns.set_levels([title], level=0)
    return a

In [ ]:
def compute_anomaly_from_extent_df(df, title):
    a = df.copy()
    values = np.array(a.iloc[:, 0:-2])
    clim = np.array(a.iloc[:, -1])
    means = np.tile(clim, (values.shape[1],1)).T
    anomalies = values - means
    a = pd.DataFrame(data=anomalies, index=a.index, columns=a.columns[0:-2])
    a.columns = a.columns.set_levels([title], level=0)
    return a

In [ ]:
def compute_extent_and_5day_extent_for_hemisphere(hemisphere):
    df = read_a_hemisphere(hemisphere)
    df = interpolate_column_excluding_extended_missing_periods(df, 'extent', 'interpolated')
    df['5 Day'] = pd.rolling_mean(df['interpolated'], window=5, min_periods=2)
    df['Daily Change'] = df['5 Day'].diff(periods=1)
    daily_means = get_climatological_means(df, 'interpolated', climatology_years)
    five_day_means = get_climatological_means(df, '5 Day', climatology_years)
    extent = prepare_daily_dataframe('extent', daily_means , df, 'Daily Extents : with climatological means based on interpolated data')
    daily_change = to_mon_day_rows_year_cols('Daily Change', df, 'Daily Extent Change for 5 Day Average Extent')
    average_extent = prepare_daily_dataframe('5 Day', five_day_means, df, 'Daily 5 Day Extents : with climatological means based on 5 day data')
    extent_anomaly = compute_anomaly_from_extent_df(extent, 'Extent Anomaly')
    avg_extent_anomaly = compute_anomaly_from_extent_df(average_extent, '5 Day Avg Ext Anomaly')

    return {'Ext': extent, '5 Day Avg Ext': average_extent,
            'Ext Anomaly': extent_anomaly, '5 Day Avg Ext Anomaly': avg_extent_anomaly,
            'Daily Change': daily_change}

In [ ]:
def write_hemisphere(writer, df, abbv):
    df['Ext'].to_excel(writer,"{0} Ext".format(abbv),float_format = "%.3f", index=True)
    df['5 Day Avg Ext'].to_excel(writer,"{0} 5 Day Avg Ext".format(abbv),float_format = "%.3f")

    #    don't do daily anomaly.
    #    df['Ext Anomaly'].to_excel(writer,"{} Ext Anomaly".format(abbv),float_format = "%.3f")

    df['5 Day Avg Ext Anomaly'].to_excel(writer,"{0} 5 Day Avg Ext Anomaly".format(abbv),float_format = "%.3f")
    df['Daily Change'].to_excel(writer, "{0} Daily Change".format(abbv), float_format = "%.3f")



    workbook = writer.book
    # add colors blue with blue
    format1 = workbook.add_format({'bg_color':   '#CEC7FF',
                                   'font_color': '#06009C'})

    # add colors red with red
    format2 = workbook.add_format({'bg_color':   '#FFC7CE',
                                   'font_color': '#9C0006'})

    sheets = ["{} Daily Change".format(abbv), "{} 5 Day Avg Ext Anomaly".format(abbv)]

    for sheet in sheets:
        worksheet = writer.sheets[sheet]
        worksheet.conditional_format('C3:ZZ369', {'type':     'cell',
                                                  'criteria': '>',
                                                  'value':    0,
                                                  'format':   format1})

        worksheet.conditional_format('C3:ZZ369', {'type':     'cell',
                                                  'criteria': '<',
                                                  'value':    0,
                                                  'format':   format2})

In [ ]:
north = compute_extent_and_5day_extent_for_hemisphere('north')
south = compute_extent_and_5day_extent_for_hemisphere('south')

In [ ]:
writer = ExcelWriter('../output/Sea_Ice_Extent_Daily.xls', engine='xlsxwriter')

write_hemisphere(writer, north, 'NH')
write_hemisphere(writer, south, 'SH')

writer.save()

In [ ]:
# cleanup
!cd ../data; rm -f NH_seaice_extent_final.csv NH_seaice_extent_nrt.csv SH_seaice_extent_final.csv SH_seaice_extent_nrt.csv

In [ ]: