In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
sns.set_context("poster")
sns.set_style("ticks")
In [3]:
df = pd.read_csv("/home/entity/Downloads/BigQuery_GDELT_INDIA_Target.csv")
In [4]:
df.head()
Out[4]:
In [5]:
QUAD_CLASS_NAMES={
1: "Verbal Cooperation",
2: "Material Cooperation",
3: "Verbal Conflict",
4: "Material Conflict"
}
In [ ]:
In [6]:
df_t = df[(df.Target == "PAK")].pivot_table(index="Year", columns="QuadClass", values="TotalEvents", aggfunc=np.mean)
ax = sns.pointplot(x="Year", y="TotalEvents", hue="QuadClass",
order=df_t.index.sort_values(),
data=pd.melt(df_t.divide(df_t.sum(axis=1), axis=0).reset_index(),
id_vars=["Year"],
value_vars=[1,2,3,4],
value_name="TotalEvents").assign(
QuadClass=lambda x: x.apply(lambda k: QUAD_CLASS_NAMES[k.QuadClass], axis=1)
)
)
plt.xticks(rotation='vertical')
plt.ylabel("Proportion of event types")
plt.xlabel("Year")
plt.title("GDELT events between India (IND) and Pakistan (PAK) across years")
Out[6]:
In [7]:
df_t = df[(df.Target == "PAK") &
(df.QuadClass != 1)].pivot_table(
index="Year",
columns="QuadClass",
values="TotalEvents", aggfunc=np.mean)
ax = sns.pointplot(x="Year", y="TotalEvents", hue="QuadClass",
order=df_t.index.sort_values(),
data=pd.melt(df_t.divide(df_t.sum(axis=1), axis=0).reset_index(),
id_vars=["Year"],
value_vars=[2,3,4],
value_name="TotalEvents").assign(
QuadClass=lambda x: x.apply(lambda k: QUAD_CLASS_NAMES[k.QuadClass], axis=1)
)
)
plt.xticks(rotation='vertical')
plt.ylabel("Proportion of event types")
plt.xlabel("Year")
plt.title("GDELT events between India (IND) and Pakistan (PAK) across years")
Out[7]:
In [8]:
df_t = df[(df.Target == "RUS")].pivot_table(
index="Year",
columns="QuadClass",
values="TotalEvents", aggfunc=np.mean)
ax = sns.pointplot(x="Year", y="TotalEvents", hue="QuadClass",
order=df_t.index.sort_values(),
data=pd.melt(df_t.divide(df_t.sum(axis=1), axis=0).reset_index(),
id_vars=["Year"],
value_vars=[1, 2,3,4],
value_name="TotalEvents").assign(
QuadClass=lambda x: x.apply(lambda k: QUAD_CLASS_NAMES[k.QuadClass], axis=1)
)
)
plt.xticks(rotation='vertical')
plt.ylabel("Proportion of event types")
plt.xlabel("Year")
plt.title("GDELT events between India (IND) and Russia (RUS) across years")
Out[8]:
In [9]:
df_t = df[(df.Target == "USA")].pivot_table(
index="Year",
columns="QuadClass",
values="TotalEvents", aggfunc=np.mean)
ax = sns.pointplot(x="Year", y="TotalEvents", hue="QuadClass",
order=df_t.index.sort_values(),
data=pd.melt(df_t.divide(df_t.sum(axis=1), axis=0).reset_index(),
id_vars=["Year"],
value_vars=[1, 2,3,4],
value_name="TotalEvents").assign(
QuadClass=lambda x: x.apply(lambda k: QUAD_CLASS_NAMES[k.QuadClass], axis=1)
)
)
plt.xticks(rotation='vertical')
plt.ylabel("Proportion of event types")
plt.xlabel("Year")
plt.title("GDELT events between India (IND) and United States (USA) across years")
Out[9]:
In [10]:
df_t = df[(df.Target == "CHN")].pivot_table(
index="Year",
columns="QuadClass",
values="TotalEvents", aggfunc=np.mean)
ax = sns.pointplot(x="Year", y="TotalEvents", hue="QuadClass",
order=df_t.index.sort_values(),
data=pd.melt(df_t.divide(df_t.sum(axis=1), axis=0).reset_index(),
id_vars=["Year"],
value_vars=[1, 2,3,4],
value_name="TotalEvents").assign(
QuadClass=lambda x: x.apply(lambda k: QUAD_CLASS_NAMES[k.QuadClass], axis=1)
)
)
plt.xticks(rotation='vertical')
plt.ylabel("Proportion of event types")
plt.xlabel("Year")
plt.title("GDELT events between India (IND) and China (CHN) across years")
Out[10]:
In [11]:
df_t = df[(df.Target == "GBR")].pivot_table(
index="Year",
columns="QuadClass",
values="TotalEvents", aggfunc=np.mean)
ax = sns.pointplot(x="Year", y="TotalEvents", hue="QuadClass",
order=df_t.index.sort_values(),
data=pd.melt(df_t.divide(df_t.sum(axis=1), axis=0).reset_index(),
id_vars=["Year"],
value_vars=[1, 2,3,4],
value_name="TotalEvents").assign(
QuadClass=lambda x: x.apply(lambda k: QUAD_CLASS_NAMES[k.QuadClass], axis=1)
)
)
plt.xticks(rotation='vertical')
plt.ylabel("Proportion of event types")
plt.xlabel("Year")
plt.title("GDELT events between India (IND) and Great Britain (GBR) across years")
Out[11]:
In [12]:
df.groupby("Target")["TotalEvents"].sum().sort_values(ascending=False).head(10)
Out[12]:
In [ ]: