In [101]:
%matplotlib notebook
import pandas as pd
import os
import glob
import matplotlib.pyplot as plot
In [102]:
# source: ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/
# Data for 2017, North and South by Month
paths = ['../data/North_2017/', '../data/South_2017/']
#print(glob.glob(os.path.join(paths[0], '*.csv')))
all_files = (glob.glob(os.path.join(p, '*.csv')) for p in paths)
df_from_each_file = (pd.read_csv(f, skipinitialspace=True) for files in all_files for f in files)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)
concatenated_df.head(10)
Out[102]:
In [103]:
len(concatenated_df.index) # getting number of rows
Out[103]:
In [104]:
list(concatenated_df)
Out[104]:
In [105]:
def correct_month(month):
if(month < 10):
return '0' + str(month)
else:
return str(month)
concatenated_df['date'] = concatenated_df['mo'].apply(correct_month) + '-' + concatenated_df['year'].apply(str)
concatenated_df['date'] = pd.to_datetime(concatenated_df['date'], format='%m-%Y')
concatenated_df['month'] = concatenated_df['mo']
concatenated_df = concatenated_df[['data_type', 'region','extent', 'area', 'date', 'year', 'month']]
In [106]:
concatenated_df.head()
Out[106]:
In [107]:
concatenated_df['data_type'].unique()
Out[107]:
In [108]:
concatenated_df = concatenated_df[concatenated_df['data_type'] != '-9999']
concatenated_df.head()
Out[108]:
In [109]:
concatenated_df.set_index(['date'],inplace=True)
In [110]:
group_region_year_sorted = concatenated_df[['region', 'extent', 'area']].groupby(['region',concatenated_df.index.year], as_index=False).mean().apply(lambda x: x.sort_values(ascending=False))
group_region_year_sorted.head()
Out[110]:
In [111]:
fig, ax = plot.subplots(figsize=(8,6))
group_region_year_sorted.plot(ax=ax, subplots=True)
Out[111]:
In [112]:
concatenated_df.head().to_json(orient = 'records')
Out[112]:
In [113]:
concatenated_df.to_json("../data/ice_nsidc.json",orient = 'records') # write to file
In [ ]: