Script to extract ferret timeseries data into two columns of numbers


In [1]:
import pandas as pd

# For bokeh plots
import bokeh #0.12.3
from bokeh.plotting import figure
from datetime import datetime as dt
from bokeh.models import DatetimeTickFormatter
from math import pi

In [2]:
# Read all csv files and create a dataframe from selected year and selected fields
# df = pd.DataFrame()
# for key in dict_files.keys():
#     df_full = pd.read_csv('files_csv/' + dict_files[key], delimiter=';', index_col=0)
#     df_select = df_full.loc[year][list_fields]
#     df_select['Source'] = key
#     df = df.append(df_select)
# df.set_index('Source', inplace=True)
# df

In [3]:
# Read csv file output by ferret

df_orig = pd.read_csv('~/timeseries.csv', delimiter=':', index_col=0)
#df.set_index('Source', inplace=True)
df_orig.head(10)


Out[3]:
ZONAL WIND (M/S)
VARIABLE
FILENAME monthly_navy_winds.cdf
FILEPATH /homel/cnangini/.conda/envs/FERRET/share/fer_...
SUBSET 132 points (TIME)
LONGITUDE 110.4W(-110.4) to 30.2E(30.2) (XY ave)
LATITUDE 70.3S to 70.3N (XY ave)
16-JAN-1982 / 1 0.1973
16-FEB-1982 / 2 0.3922
18-MAR-1982 / 3 0.1530
18-APR-1982 / 4 0.2385
18-MAY-1982 / 5 0.0239

In [4]:
# Read csv file output by ferret, skipping header
df_full = pd.read_csv('~/timeseries.csv', delimiter=':', skiprows=5)
#df.set_index('Source', inplace=True)
df_full.head(5)


Out[4]:
LATITUDE 70.3S to 70.3N (XY ave)
0 16-JAN-1982 / 1 0.1973
1 16-FEB-1982 / 2 0.3922
2 18-MAR-1982 / 3 0.1530
3 18-APR-1982 / 4 0.2385
4 18-MAY-1982 / 5 0.0239

In [5]:
#Extract date string from col 0
s = df_full.ix[:, 0]
s.head(5)


Out[5]:
0     16-JAN-1982 /   1
1     16-FEB-1982 /   2
2     18-MAR-1982 /   3
3     18-APR-1982 /   4
4     18-MAY-1982 /   5
Name:              LATITUDE , dtype: object

In [6]:
# x_tmp = s.str.split(' /').str.get(0)
# x_tmp.head(5)

In [18]:
x = s.str.split(' /').str.get(0).str.split(' ').str.get(1)
x.head(12)


Out[18]:
0     16-JAN-1982
1     16-FEB-1982
2     18-MAR-1982
3     18-APR-1982
4     18-MAY-1982
5     18-JUN-1982
6     18-JUL-1982
7     17-AUG-1982
8     17-SEP-1982
9     17-OCT-1982
10    17-NOV-1982
11    17-DEC-1982
dtype: object

In [8]:
x[0]


Out[8]:
'16-JAN-1982'

In [25]:
# Determine if x-col is a list of dates
months=['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
dateFlag=0
for idx in range(12):
    if x[0].find(months[idx]) != -1:
        print("found month")
        dateFlag=1
print dateFlag


found month
1

In [27]:
if dateFlag==1:
    x = pd.to_datetime(x,infer_datetime_format=True)
x.head(5)


Out[27]:
0   1982-01-16
1   1982-02-16
2   1982-03-18
3   1982-04-18
4   1982-05-18
dtype: datetime64[ns]

In [29]:
# Put x and y values in dataframe
dfer = pd.DataFrame()
dfer['xval'] = x
dfer['yval'] = df_full.ix[:, 1]
dfer.head(5)


Out[29]:
xval yval
0 1982-01-16 0.1973
1 1982-02-16 0.3922
2 1982-03-18 0.1530
3 1982-04-18 0.2385
4 1982-05-18 0.0239

In [39]:
# dfer['date'] =  pd.to_datetime(dfer['date'], format='%d%b%Y:%H:%M:%S.%f')

In [31]:
p = figure(title='A Bokeh plot',
        plot_width=700,plot_height=400)

In [32]:
p.line(dfer['xval'], dfer['yval'])


Out[32]:
<bokeh.models.renderers.GlyphRenderer at 0x7fda7779e790>

In [33]:
if dateFlag==1:
    p.xaxis.formatter=DatetimeTickFormatter(formats=dict(
        hours=["%d %B %Y"],
        days=["%d %B %Y"],
        months=["%d %B %Y"],
        years=["%d %B %Y"],
    ))
p.xaxis.major_label_orientation = pi/4
p.yaxis.axis_label = "size"

In [34]:
# For plotting in notebook
from bokeh.io import output_file
from bokeh.charts import show
from bokeh.models import DatetimeTickFormatter

output_file('myplot2.html')
show(p)

In [ ]:


In [29]:
# http://stackoverflow.com/questions/33869292/how-can-i-set-the-x-axis-as-datetimes-on-a-bokeh-plot
import pandas as pd
from math import pi
from datetime import datetime as dt
from bokeh.io import output_file
from bokeh.charts import show
from bokeh.models import DatetimeTickFormatter
from bokeh.plotting import figure

df = pd.DataFrame(data=[1,2,3],
                  index=[dt(2015, 1, 1), dt(2015, 1, 2), dt(2015, 1, 3)],
                  columns=['foo'])
p_test = figure(plot_width=400, plot_height=400)
p_test.line(df.index, df['foo'])
p_test.xaxis.formatter=DatetimeTickFormatter(formats=dict(
        hours=["%d %B %Y"],
        days=["%d %B %Y"],
        months=["%d %B %Y"],
        years=["%d %B %Y"],
    ))
p_test.xaxis.major_label_orientation = pi/4
output_file('myplot.html')
show(p_test)


ERROR:/usr/lib/python2.7/site-packages/bokeh/core/validation/check.pyc:W-1001 (NO_DATA_RENDERERS): Plot has no data renderers: Figure, ViewModel:Plot, ref _id: 132c3c79-0a8e-44d7-8e24-20afbd24945c

In [95]:
df.index


Out[95]:
DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03'], dtype='datetime64[ns]', freq=None, tz=None)

In [118]:
df['foo']


Out[118]:
2015-01-01    1
2015-01-02    2
2015-01-03    3
Name: foo, dtype: int64

In [30]:
p_test.line(df.index, df['foo'])


Out[30]:
<bokeh.models.renderers.GlyphRenderer at 0x7fa478885b90>

In [ ]: