Pandas is a popular library for manipulating vectors, tables, and time series. We will frequently use Pandas data structures instead of the built-in python data structures, as they provide much richer functionality. Also, Pandas is fast, which makes working with large datasets easier. Check out the official pandas website at [http://pandas.pydata.org/]
Pandas provides three data structures:
Today we will mainly work with dataframe.
In [ ]:
import pandas as pd
In [ ]:
glad = pd.read_csv('./GLAD_15min_filtered_S1_41days_sample.csv')
In [ ]:
glad
In [ ]:
glad.shape
In [ ]:
glad_orig.head()
In [ ]:
glad_orig.tail()
In [ ]:
import numpy as np
np.zeros(240000)
In [ ]:
glad['temperature'] = np.zeros(240000)
In [ ]:
glad.head()
In [ ]:
del glad['vel_Error']
In [ ]:
del glad['Pos_Error']
In [ ]:
glad.head()
In [ ]:
glad.to_csv('./test.csv')
In [ ]:
glad.to_csv?
In [ ]:
glad.to_csv('./test_without_index.csv', index = False)
In [ ]:
glad.iloc[0]
In [ ]:
glad_orig.iloc[:10]
In [ ]:
glad_orig.iloc[0].values
In [ ]:
glad_id = glad.set_index('ID')
In [ ]:
glad_id.head()
In [ ]:
glad_id.loc['CARTHE_021']
Use .values to access the data stored in the dataframe.
In [ ]:
lat = glad_id.loc['CARTHE_021', 'Latitude'].values
lat
In [ ]:
lon = glad_id.loc['CARTHE_021', 'Longitude'].values
lon
In [ ]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
In [ ]:
plt.figure(figsize = (6, 8))
min_lat, max_lat = 23, 30.5
min_lon, max_lon = -91.5, -85
ax = plt.axes(projection = ccrs.PlateCarree())
ax.set_extent([min_lon, max_lon, min_lat, max_lat], ccrs.PlateCarree())
ax.coastlines(resolution = '50m', color = 'black')
ax.gridlines(crs = ccrs.PlateCarree(), draw_labels = True, color = 'grey')
ax.plot(lon, lat)
In [ ]:
drifter_grouped = glad.groupby('ID')
In [ ]:
drifter_grouped.groups
In [ ]:
drifter_grouped.groups.keys()
In [ ]:
drifter_grouped.groups['CARTHE_021']
In [ ]:
drifter_ids = drifter_grouped.groups.keys()
In [ ]:
for drifter_id in drifter_ids:
print(drifter_id)
In [ ]:
glad_id.head()
In [ ]:
plt.figure(figsize = (6, 8))
min_lat, max_lat = 23, 30.5
min_lon, max_lon = -91.5, -85
ax = plt.axes(projection = ccrs.PlateCarree())
ax.set_extent([min_lon, max_lon, min_lat, max_lat], ccrs.PlateCarree())
ax.coastlines(resolution = '50m', color = 'black')
ax.gridlines(crs = ccrs.PlateCarree(), draw_labels = True, color = 'grey')
for drifter_id in drifter_ids:
lon = glad_id.loc[drifter_id, 'Longitude'].values
lat = glad_id.loc[drifter_id, 'Latitude'].values
ax.plot(lon, lat)
In [ ]:
glad_date = glad_orig.set_index('Date')
In [ ]:
glad_date.head()
In [ ]:
glad_date.index
In [ ]:
pd.date_range(start = '2012-07-22', end = '2012-08-05')
In [ ]:
glad_date.loc[date_range,:]
In [ ]:
date_range = pd.date_range(start=first_day, end = last_day).strftime("%Y-%m-%d")
In [ ]:
date_range
In [ ]:
glad_selected = glad_date.loc[date_range,:]
In [ ]: