Leitura X - Titulo

By Hans. Original: Bill Chambers



In [4]:
import sys
import numpy as np
import pandas as pd
print(sys.version) # Versao do python - Opcional
print(np.__version__) # VErsao do modulo numpy - Opcional
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import datetime
import time


3.4.3 |Anaconda 2.2.0 (64-bit)| (default, Mar  6 2015, 12:06:10) [MSC v.1600 64 bit (AMD64)]
1.9.2

In [5]:
# Carregando dados no dataframe df_dados a partir do arquivo .csv em servidor remoto.
#df_dados = pd.read_csv('http://fortran-zrhans.c9.io/csdapy/sr311-2014.csv', index_col=None)

#Dados local
df_dados = pd.read_csv('../dados/sr311-2015.csv', index_col=None, parse_dates=['Timestamp'])
#df_dados.columns
del(df_dados['Unnamed: 0'])
df_dados.set_index('Timestamp', inplace=True)
df_dados = df_dados[['AirTC', 'RH', 'Rain_mm']]
print("Ok")


Ok

In [7]:
df_dados = df_dados.dropna()
df_dados

#s_chuva = df_dados.Rain_mm


Out[7]:
AirTC RH Rain_mm
Timestamp
2015-01-01 00:00:00 21.97 82.9 0
2015-01-01 00:01:00 21.99 83.1 0
2015-01-01 00:02:00 21.97 83.0 0
2015-01-01 00:03:00 21.98 82.7 0
2015-01-01 00:04:00 22.00 82.8 0
2015-01-01 00:05:00 22.01 83.1 0
2015-01-01 00:06:00 21.98 82.9 0
2015-01-01 00:07:00 21.95 82.8 0
2015-01-01 00:08:00 21.95 82.6 0
2015-01-01 00:09:00 21.96 82.7 0
2015-01-01 00:10:00 21.96 82.9 0
2015-01-01 00:11:00 21.94 82.7 0
2015-01-01 00:12:00 21.95 82.4 0
2015-01-01 00:13:00 21.98 82.2 0
2015-01-01 00:14:00 21.99 82.4 0
2015-01-01 00:15:00 21.99 82.1 0
2015-01-01 00:16:00 22.01 82.3 0
2015-01-01 00:17:00 21.99 82.3 0
2015-01-01 00:18:00 21.98 82.3 0
2015-01-01 00:19:00 22.00 82.1 0
2015-01-01 00:20:00 22.00 82.3 0
2015-01-01 00:21:00 21.99 82.4 0
2015-01-01 00:22:00 21.97 82.8 0
2015-01-01 00:23:00 21.93 83.1 0
2015-01-01 00:24:00 21.89 83.3 0
2015-01-01 00:25:00 21.88 83.5 0
2015-01-01 00:26:00 21.84 83.7 0
2015-01-01 00:27:00 21.81 84.3 0
2015-01-01 00:28:00 21.80 83.8 0
2015-01-01 00:29:00 21.77 83.7 0
... ... ... ...
2015-05-29 09:31:00 12.50 98.8 0
2015-05-29 09:32:00 12.51 99.0 0
2015-05-29 09:33:00 12.54 99.0 0
2015-05-29 09:34:00 12.56 99.0 0
2015-05-29 09:35:00 12.56 98.8 0
2015-05-29 09:36:00 12.56 98.6 0
2015-05-29 09:37:00 12.54 98.4 0
2015-05-29 09:38:00 12.53 98.3 0
2015-05-29 09:39:00 12.55 98.8 0
2015-05-29 09:40:00 12.65 99.1 0
2015-05-29 09:41:00 12.71 99.1 0
2015-05-29 09:42:00 12.71 98.8 0
2015-05-29 09:43:00 12.69 98.4 0
2015-05-29 09:44:00 12.68 98.1 0
2015-05-29 09:45:00 12.70 98.4 0
2015-05-29 09:46:00 12.72 98.0 0
2015-05-29 09:47:00 12.71 97.7 0
2015-05-29 09:48:00 12.72 97.8 0
2015-05-29 09:49:00 12.73 97.6 0
2015-05-29 09:50:00 12.74 97.8 0
2015-05-29 09:51:00 12.79 97.4 0
2015-05-29 09:52:00 12.79 97.6 0
2015-05-29 09:53:00 12.90 97.9 0
2015-05-29 09:54:00 12.92 97.1 0
2015-05-29 09:55:00 12.94 96.8 0
2015-05-29 09:56:00 12.99 97.7 0
2015-05-29 09:57:00 13.21 98.9 0
2015-05-29 09:58:00 13.36 98.3 0
2015-05-29 09:59:00 13.45 98.7 0
2015-05-29 10:00:00 13.53 98.2 0

205113 rows × 3 columns


In [43]:
#s_chuva.cumsum()

In [8]:
plt.figure(figsize=(16,8))
df_dados.AirTC.plot()
df_dados.RH.plot()


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x42b4940>

In [18]:
#df_dados.index.min(), df_dados.index.max(), 
## (Timestamp('2015-01-01 00:00:00'), Timestamp('2015-05-29 10:00:00'))
# Criando um novo dominio continuo com base no inicio e fim da serie de dados original
d = pd.DataFrame(index=pd.date_range(pd.datetime(2015,1,1), pd.datetime(2015,5,29)))

In [29]:
# Unindo os dois DataFrames pela esquerda (o que não houver em d será substituído por NaN
ndf_dados = d.join(df_dados)
#ndf_dados.fillna(0) #Substitui valor NaN por 0

In [30]:
plt.figure(figsize=(16,8))
ndf_dados.AirTC.plot()
ndf_dados.RH.plot()


Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0xa21e8d0>

In [42]:
plt.figure(figsize=(16,8))
plt.ylabel('Chuva')
ndf_dados.Rain_mm.plot()


Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0xe462940>

In [31]:
ndf_dados.head()
#Exportando para um novo arquivo
ndf_dados.to_csv('sao_roque_2015-AirTC-RH-Rain.csv')

Elaborado por Hans Rogerio Zimermann com bases no curso Data Analysis in Python with Pandas de Bill Chambers - UDEMY.