In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = [16.18033, 10]
In [2]:
%matplotlib inline
In [37]:
china_2018 = 12.24
usa_2018 = 19.39
In [38]:
year = np.array(range(2018, 2050))
china = china_2018 * ((1.06)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))
In [41]:
plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.plot(year, china, label="China, 6% yearly increase")
plt.plot(year, usa, label="US, 2% yearly increase")
plt.plot(a.Year, a.China/1e12)
plt.plot(a.Year, a.USA/1e12)
plt.xlabel("Year")
plt.ylabel("GDP")
plt.yscale("log")
plt.legend()
Out[41]:
In [23]:
year = np.array(range(2018, 2050))
china = china_2018 * ((1.065)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))
plt.plot(year, china, label="China")
plt.plot(year, usa, label="USA")
plt.legend()
Out[23]:
In [25]:
year = np.array(range(2018, 2050))
china = china_2018 * ((1.05)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))
plt.plot(year, china, label="China")
plt.plot(year, usa, label="USA")
plt.legend()
Out[25]:
In [ ]:
In [15]:
GDP = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_103640/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_103640.csv", skiprows=4)
GDP = GDP.dropna(axis=1, how="all")
GDP.columns = GDP.columns.str.replace(" ", "_")
In [32]:
a = GDP.query("Country_Name in ['China', 'United States']").T.reset_index()[4:]
a.columns = ["Year", "China", "USA"]
a.Year = a.Year.astype(int)
a.China = a.China.astype(float)
a.USA = a.USA.astype(float)
In [35]:
plt.plot(a.Year, a.China)
plt.plot(a.Year, a.USA)
Out[35]:
In [3]:
code = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/averageGDP/Metadata_Country_API_NY.GDP.PCAP.CD_DS2_en_csv_v2_10224851.csv").dropna(axis=1, how="all")
In [132]:
exclude = code.query("Region != Region")
exclude
Out[132]:
In [136]:
"WLD" in exclude["Country Code"].values
Out[136]:
In [109]:
averageGDP = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/averageGDP/API_NY.GDP.PCAP.CD_DS2_en_csv_v2_10224851.csv", skiprows=4)
averageGDP = averageGDP.dropna(axis=1, how="all")
averageGDP.columns = averageGDP.columns.str.replace(" ", "_")
In [112]:
t = (averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]].values > 0).sum(axis=0)
In [114]:
m = averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]]
plt.figure(figsize=(20,10))
plt.imshow(m.T)
plt.colorbar()
Out[114]:
In [115]:
population = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/Population/API_SP.POP.TOTL_DS2_en_csv_v2_10203548.csv", skiprows=4).dropna(axis=1, how="all")
population.columns = population.columns.str.replace(" ", "_")
In [116]:
m = averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]]
plt.figure(figsize=(20,10))
plt.imshow(m.T)
plt.colorbar()
Out[116]:
In [117]:
len(averageGDP)
Out[117]:
In [118]:
len(population)
Out[118]:
In [119]:
china
Out[119]:
In [172]:
higher_than_china_pop = 0
for year in range(2017, 2018):
china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
p_below_china = 0.0
total_p_this_year = 0.0
for i in range(len(averageGDP)):
c = averageGDP.iloc[i]
# print(i, c)
country = c["Country_Name"]
this_country_code = c["Country_Code"]
gdp_per_capita = averageGDP.iloc[i][str(year)]
p = population.query(f'Country_Name == "{country}"')
pop = p[str(year)].values[0]
if this_country_code in exclude["Country Code"].values:
pass
elif np.isnan(gdp_per_capita):
# print("gdp_per_capita Nan")
pass
elif np.isnan(pop):
# print("population Nan")
pass
else:
if gdp_per_capita < china:
p_below_china += pop
else:
if pop > 1e7: # 1 millions in population
print(country, gdp_per_capita, pop/1e6)
if gdp_per_capita > china and gdp_per_capita > 10814:
# print(country)
higher_than_china_pop += pop
total_p_this_year += pop
# if not np.isnan(pop):
# total_p_this_year += pop
# else:
# print("Hi", country)
# print(country, gdp_per_capita, pop)
below_percent = p_below_china/total_p_this_year
include_china = (p_below_china + p_china)/total_p_this_year
tmp = [year, below_percent, include_china, total_p_this_year, china]
print(tmp)
print(higher_than_china_pop/1e6)
In [166]:
data = []
higher_than_china_pop = 0.0
for year in range(2017, 2018):
china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
p_below_china = 0.0
total_p_this_year = 0.0
for i in range(len(averageGDP)):
c = averageGDP.iloc[i]
# print(i, c)
country = c["Country_Name"]
this_country_code = c["Country_Code"]
gdp_per_capita = averageGDP.iloc[i][str(year)]
p = population.query(f'Country_Name == "{country}"')
pop = p[str(year)].values[0]
if this_country_code in exclude["Country Code"].values:
pass
elif np.isnan(gdp_per_capita):
# print("gdp_per_capita Nan")
pass
elif np.isnan(pop):
# print("population Nan")
pass
else:
if gdp_per_capita < china:
p_below_china += pop
else:
if pop > 1e7: # 1 millions in population
print(country, gdp_per_capita, pop/1e6)
higher_than_china_pop += pop
if gdp_per_capita > china:
print(country)
total_p_this_year += pop
# if not np.isnan(pop):
# total_p_this_year += pop
# else:
# print("Hi", country)
# print(country, gdp_per_capita, pop)
below_percent = p_below_china/total_p_this_year
include_china = (p_below_china + p_china)/total_p_this_year
tmp = [year, below_percent, include_china, total_p_this_year]
data.append(tmp)
print(tmp)
print(higher_than_china_pop/1e6)
In [ ]:
In [141]:
data = []
for year in range(1960, 2018):
china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
p_below_china = 0.0
total_p_this_year = 0.0
for i in range(len(averageGDP)):
c = averageGDP.iloc[i]
# print(i, c)
country = c["Country_Name"]
this_country_code = c["Country_Code"]
gdp_per_capita = averageGDP.iloc[i][str(year)]
p = population.query(f'Country_Name == "{country}"')
pop = p[str(year)].values[0]
if this_country_code in exclude["Country Code"].values:
pass
elif np.isnan(gdp_per_capita):
# print("gdp_per_capita Nan")
pass
elif np.isnan(pop):
# print("population Nan")
pass
else:
if gdp_per_capita < china:
p_below_china += pop
total_p_this_year += pop
# if not np.isnan(pop):
# total_p_this_year += pop
# else:
# print("Hi", country)
# print(country, gdp_per_capita, pop)
below_percent = p_below_china/total_p_this_year
include_china = (p_below_china + p_china)/total_p_this_year
tmp = [year, below_percent, include_china, total_p_this_year, china]
data.append(tmp)
print(tmp)
data = np.array(data)
In [144]:
plt.plot(data[:,0], data[:,4])
Out[144]:
In [148]:
n = 40
plt.plot(data[:,0][:n], data[:,4][:n])
Out[148]:
In [140]:
plt.plot(data[:,0], data[:,3])
Out[140]:
In [145]:
data = np.array(data)
plt.scatter(data[:,0], data[:,1])
plt.scatter(data[:,0], data[:,2])
plt.ylim([0,1])
Out[145]:
In [146]:
data = np.array(data)
plt.plot(data[:,0], data[:,1])
plt.plot(data[:,0], data[:,2])
plt.ylim([0,1])
Out[146]: