Javier Garcia-Bernardo garcia@uva.nl
In [25]:
import pandas as pd
import numpy as np
import pylab as plt
import seaborn as sns
from scipy.stats import chi2_contingency,ttest_ind
#This allows us to use R
%load_ext rpy2.ipython
#Visualize in line
%matplotlib inline
#Be able to plot images saved in the hard drive
from IPython.display import Image,display
#Make the notebook wider
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
In [28]:
df = pd.read_html("https://piie.com/summary-economic-sanctions-episodes-1914-2006",encoding="UTF-8")
print(type(df),len(df))
df
Out[28]:
In [32]:
df[0].head(10)
Out[32]:
In [30]:
df[0].columns
Out[30]:
In [5]:
df = pd.read_html("https://piie.com/summary-economic-sanctions-episodes-1914-2006",encoding="UTF-8")
df = df[0]
print(df.columns)
df.columns = ['Year imposed', 'Year ended', 'Principal sender',
'Target country', 'Policy goal',
'Success score (scale 1 to 16)',
'Cost to target (percent of GNP)']
df = df.replace('negligible', 0)
df = df.replace("–","-",regex=True) #the file uses long dashes
df.to_csv("data/economic_sanctions.csv",index=None,sep="\t")
In [6]:
df = pd.read_csv("data/economic_sanctions.csv",sep="\t",na_values=["-","Ongoing"])
df["Duration"] = df["Year ended"] - df["Year imposed"]
df.head()
Out[6]:
In [13]:
sns.lmplot(x="Duration",y="Cost to target (percent of GNP)",data=df,fit_reg=False,hue="Year imposed",legend=False,palette="YlOrBr")
plt.ylim((-2,10))
plt.legend(loc="center left", bbox_to_anchor=(1, 0.5),ncol=4)
Out[13]:
In [177]:
In [33]:
df = pd.read_csv("data/exchange-rate-twi-may-1970-aug-1.tsv",sep="\t",parse_dates=["Month"],skipfooter=2)
df.head()
Out[33]:
In [37]:
#filter by time
df_after1980 = df.loc[df["Month"] > "1980-05-02"] #year-month-date
df_after1980.columns = ["Date","Rate"]
df_after1980.head()
Out[37]:
In [20]:
#make columns with year and month (useful for models)
df_after1980["Year"] = df_after1980["Date"].apply(lambda x: x.year)
df_after1980["Month"] = df_after1980["Date"].apply(lambda x: x.month)
df_after1980.head()
Out[20]:
In [38]:
#resample
df_after1980_resampled = df_after1980.resample("A",on="Date").mean()
display(df_after1980_resampled.head())
In [23]:
df_after1980_resampled = df_after1980_resampled.reset_index()
df_after1980_resampled.head()
Out[23]:
In [24]:
#Let's visualize it
plt.figure(figsize=(6,4))
plt.plot(df_after1980["Date"],df_after1980["Rate"],label="Before resampling")
plt.plot(df_after1980_resampled["Date"],df_after1980_resampled["Rate"],label="After resampling")
plt.xlabel("Time")
plt.ylabel("Rate")
plt.legend()
plt.show()