Qualité de l'air mesurée dans la station Châtelet

Récupération des données


In [ ]:
%matplotlib inline
#%matplotlib notebook

import matplotlib
matplotlib.rcParams['figure.figsize'] = (9, 9)

import pandas as pd

In [ ]:
def conv_func(s):
    s = s.replace('<', '')
    if s == 'ND':
        return np.nan
    elif s.strip() == '':
        return np.nan
    else:
        return float(s)

In [ ]:
url = "https://data.iledefrance.fr/explore/dataset/qualite-de-lair-mesuree-dans-la-station-chatelet/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true"

#dtype_dict = {'NO': np.float64,
#              'NO2': np.float64,
#              'PM10': np.float64,
#              'CO2': np.float64,
#              'TEMP': np.float64,
#              'HUMI': np.float64}

converter_dict = {'NO': conv_func,
                  'NO2': conv_func,
                  'PM10': conv_func,
                  'CO2': conv_func,
                  'TEMP': conv_func,
                  'HUMI': conv_func}

df = pd.read_csv(url,
                 #encoding='iso-8859-1',
                 index_col=0,
                 sep=';',
                 decimal=',',
                 parse_dates=["DATE/HEURE"],
                 #dtype=dtype_dict,
                 #na_values='ND',
                 converters=converter_dict)

In [ ]:
df = df.sort_index()

Infos diverses sur le DataFrame


In [ ]:
df.head()

In [ ]:
df.columns

In [ ]:
df.dtypes

In [ ]:
df.index

Analyse de la concentration en particules fines (PM10)


In [ ]:
df.PM10.plot(figsize=(18,6));

In [ ]:
df.PM10.resample('7D').mean().plot(figsize=(18,6));

In [ ]:
df.PM10.rolling('7D').mean().plot(figsize=(18,6));

In [ ]:
df.PM10.resample('1M').mean().plot(figsize=(18,6));

In [ ]:
ts = df.PM10

# https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html#Digging-into-the-data

ts_mean = ts.groupby(ts.index.time).mean()
ts_median = ts.groupby(ts.index.time).median()
ts_quartile_1 = ts.groupby(ts.index.time).quantile(0.25)
ts_quartile_3 = ts.groupby(ts.index.time).quantile(0.75)
ts_percentile_5 = ts.groupby(ts.index.time).quantile(0.05)
ts_percentile_95 = ts.groupby(ts.index.time).quantile(0.95)
ts_min = ts.groupby(ts.index.time).min()
ts_max = ts.groupby(ts.index.time).max()

color = "blue"
ax = ts_mean.plot(y='duration', figsize=(18, 12), color=color, label="mean", alpha=0.75)
ts_median.plot(ax=ax, color=color, label="median", style="--", alpha=0.75)
ts_quartile_1.plot(ax=ax, color=color, alpha=0.5, style="-.", label="1st quartile")
ts_quartile_3.plot(ax=ax, color=color, alpha=0.5, style="-.", label="3rd quartile")
ts_percentile_5.plot(ax=ax, color=color, alpha=0.25, style=":", label="5th percentile")
ts_percentile_95.plot(ax=ax, color=color, alpha=0.25, style=":", label="95th percentile")
ts_min.plot(ax=ax, color=color, alpha=0.2, style=":", label="min")
ts_max.plot(ax=ax, color=color, alpha=0.2, style=":", label="max")

plt.fill_between(ts_percentile_5.index, ts_percentile_5.values, ts_percentile_95.values, facecolor=color, alpha=0.1)
plt.fill_between(ts_quartile_1.index, ts_quartile_1.values, ts_quartile_3.values, facecolor=color, alpha=0.1)

ts = df.TEMP
ax2 = ax.twinx()

# https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html#Digging-into-the-data

ts_mean = ts.groupby(ts.index.time).mean()
ts_median = ts.groupby(ts.index.time).median()
ts_quartile_1 = ts.groupby(ts.index.time).quantile(0.25)
ts_quartile_3 = ts.groupby(ts.index.time).quantile(0.75)
ts_percentile_5 = ts.groupby(ts.index.time).quantile(0.05)
ts_percentile_95 = ts.groupby(ts.index.time).quantile(0.95)
ts_min = ts.groupby(ts.index.time).min()
ts_max = ts.groupby(ts.index.time).max()

color = "red"
ax2 = ts_mean.plot(y='duration', figsize=(18, 12), color=color, label="mean", alpha=0.75)
ts_median.plot(ax=ax2, color=color, label="median", style="--", alpha=0.75)
ts_quartile_1.plot(ax=ax2, color=color, alpha=0.5, style="-.", label="1st quartile")
ts_quartile_3.plot(ax=ax2, color=color, alpha=0.5, style="-.", label="3rd quartile")
ts_percentile_5.plot(ax=ax2, color=color, alpha=0.25, style=":", label="5th percentile")
ts_percentile_95.plot(ax=ax2, color=color, alpha=0.25, style=":", label="95th percentile")
ts_min.plot(ax=ax2, color=color, alpha=0.2, style=":", label="min")
ts_max.plot(ax=ax2, color=color, alpha=0.2, style=":", label="max")

plt.fill_between(ts_percentile_5.index, ts_percentile_5.values, ts_percentile_95.values, facecolor=color, alpha=0.1)
plt.fill_between(ts_quartile_1.index, ts_quartile_1.values, ts_quartile_3.values, facecolor=color, alpha=0.1)

ax.legend(loc='upper left')
ax2.legend(loc='upper right');

ax.set_xlabel('Time')
ax.set_ylabel('PM10');
ax2.set_ylabel('Temperature');

In [ ]:
ax = df.PM10.groupby(df.index.time).mean().plot(figsize=(18,6), color="blue")

ax.set_xlabel("Time")

ax2 = ax.twinx()
df.TEMP.groupby(df.index.time).mean().plot(ax=ax2, color="red")

ax.legend(loc='upper left')
ax2.legend(loc='upper right');

In [ ]:
ax = df.PM10.groupby(df.index.weekday).mean().plot(figsize=(18,6), color="blue")

ax.set_xlabel("Weekday")

ax2 = ax.twinx()
df.TEMP.groupby(df.index.weekday).mean().plot(ax=ax2, color="red")

ax.legend(loc='upper left')
ax2.legend(loc='upper right');

In [ ]:
ax = df.PM10.groupby(df.index.month).mean().plot(figsize=(18,6), color="blue")

ax.set_xlabel("Month")

ax2 = ax.twinx()
df.TEMP.groupby(df.index.month).mean().plot(ax=ax2, color="red")

ax.legend(loc='upper left')
ax2.legend(loc='upper right');