In [1]:
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = [16.18033, 10]
    
In [2]:
    
%matplotlib inline
    
In [37]:
    
china_2018 = 12.24
usa_2018 = 19.39
    
In [38]:
    
year = np.array(range(2018, 2050))
china = china_2018 * ((1.06)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))
    
In [41]:
    
plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.plot(year, china, label="China, 6% yearly increase")
plt.plot(year, usa, label="US, 2% yearly increase")
plt.plot(a.Year, a.China/1e12)
plt.plot(a.Year, a.USA/1e12)
plt.xlabel("Year")
plt.ylabel("GDP")
plt.yscale("log")
plt.legend()
    
    Out[41]:
    
In [23]:
    
year = np.array(range(2018, 2050))
china = china_2018 * ((1.065)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))
plt.plot(year, china, label="China")
plt.plot(year, usa, label="USA")
plt.legend()
    
    Out[23]:
    
In [25]:
    
year = np.array(range(2018, 2050))
china = china_2018 * ((1.05)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))
plt.plot(year, china, label="China")
plt.plot(year, usa, label="USA")
plt.legend()
    
    Out[25]:
    
In [ ]:
    
    
In [15]:
    
GDP = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_103640/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_103640.csv", skiprows=4)
GDP = GDP.dropna(axis=1, how="all")
GDP.columns = GDP.columns.str.replace(" ", "_")
    
In [32]:
    
a = GDP.query("Country_Name in ['China', 'United States']").T.reset_index()[4:]
a.columns = ["Year", "China", "USA"]
a.Year = a.Year.astype(int)
a.China = a.China.astype(float)
a.USA = a.USA.astype(float)
    
In [35]:
    
plt.plot(a.Year, a.China)
plt.plot(a.Year, a.USA)
    
    Out[35]:
    
In [3]:
    
code = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/averageGDP/Metadata_Country_API_NY.GDP.PCAP.CD_DS2_en_csv_v2_10224851.csv").dropna(axis=1, how="all")
    
In [132]:
    
exclude = code.query("Region != Region")
exclude
    
    Out[132]:
In [136]:
    
"WLD" in exclude["Country Code"].values
    
    Out[136]:
In [109]:
    
averageGDP = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/averageGDP/API_NY.GDP.PCAP.CD_DS2_en_csv_v2_10224851.csv", skiprows=4)
averageGDP = averageGDP.dropna(axis=1, how="all")
averageGDP.columns = averageGDP.columns.str.replace(" ", "_")
    
In [112]:
    
t = (averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]].values > 0).sum(axis=0)
    
    
In [114]:
    
m = averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]]
plt.figure(figsize=(20,10))
plt.imshow(m.T)
plt.colorbar()
    
    Out[114]:
    
In [115]:
    
population = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/Population/API_SP.POP.TOTL_DS2_en_csv_v2_10203548.csv", skiprows=4).dropna(axis=1, how="all")
population.columns = population.columns.str.replace(" ", "_")
    
In [116]:
    
m = averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]]
plt.figure(figsize=(20,10))
plt.imshow(m.T)
plt.colorbar()
    
    Out[116]:
    
In [117]:
    
len(averageGDP)
    
    Out[117]:
In [118]:
    
len(population)
    
    Out[118]:
In [119]:
    
china
    
    Out[119]:
In [172]:
    
higher_than_china_pop = 0
for year in range(2017, 2018):
    china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_below_china = 0.0
    total_p_this_year = 0.0
    for i in range(len(averageGDP)):
        c = averageGDP.iloc[i]
#         print(i, c)
        country = c["Country_Name"]
        this_country_code = c["Country_Code"]
        gdp_per_capita = averageGDP.iloc[i][str(year)]
        p = population.query(f'Country_Name == "{country}"')
        pop = p[str(year)].values[0]
        if this_country_code in exclude["Country Code"].values:
            pass
        elif np.isnan(gdp_per_capita):
#             print("gdp_per_capita Nan")
            pass
        elif np.isnan(pop):
#             print("population Nan")
            pass
        else:
            if gdp_per_capita < china:
                p_below_china += pop
            else:
                if pop > 1e7: # 1 millions in population
                    print(country, gdp_per_capita, pop/1e6)
            if gdp_per_capita > china and gdp_per_capita > 10814:
#                 print(country)
                higher_than_china_pop += pop
            total_p_this_year += pop 
#         if not np.isnan(pop):
#             total_p_this_year += pop 
#         else:
#             print("Hi", country)
    #     print(country, gdp_per_capita, pop)
    below_percent = p_below_china/total_p_this_year
    include_china = (p_below_china + p_china)/total_p_this_year
    tmp = [year, below_percent, include_china, total_p_this_year, china]
    print(tmp)
print(higher_than_china_pop/1e6)
    
    
In [166]:
    
data = []
higher_than_china_pop = 0.0
for year in range(2017, 2018):
    china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_below_china = 0.0
    total_p_this_year = 0.0
    for i in range(len(averageGDP)):
        c = averageGDP.iloc[i]
#         print(i, c)
        country = c["Country_Name"]
        this_country_code = c["Country_Code"]
        gdp_per_capita = averageGDP.iloc[i][str(year)]
        p = population.query(f'Country_Name == "{country}"')
        pop = p[str(year)].values[0]
        if this_country_code in exclude["Country Code"].values:
            pass
        elif np.isnan(gdp_per_capita):
#             print("gdp_per_capita Nan")
            pass
        elif np.isnan(pop):
#             print("population Nan")
            pass
        else:
            if gdp_per_capita < china:
                p_below_china += pop
            else:
                if pop > 1e7: # 1 millions in population
                    print(country, gdp_per_capita, pop/1e6)
                    higher_than_china_pop += pop
            if gdp_per_capita > china:
                print(country)
            total_p_this_year += pop 
#         if not np.isnan(pop):
#             total_p_this_year += pop 
#         else:
#             print("Hi", country)
    #     print(country, gdp_per_capita, pop)
    below_percent = p_below_china/total_p_this_year
    include_china = (p_below_china + p_china)/total_p_this_year
    tmp = [year, below_percent, include_china, total_p_this_year]
    data.append(tmp)
    print(tmp)
print(higher_than_china_pop/1e6)
    
    
In [ ]:
    
    
In [141]:
    
data = []
for year in range(1960, 2018):
    china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_below_china = 0.0
    total_p_this_year = 0.0
    for i in range(len(averageGDP)):
        c = averageGDP.iloc[i]
#         print(i, c)
        country = c["Country_Name"]
        this_country_code = c["Country_Code"]
        gdp_per_capita = averageGDP.iloc[i][str(year)]
        p = population.query(f'Country_Name == "{country}"')
        pop = p[str(year)].values[0]
        if this_country_code in exclude["Country Code"].values:
            pass
        elif np.isnan(gdp_per_capita):
#             print("gdp_per_capita Nan")
            pass
        elif np.isnan(pop):
#             print("population Nan")
            pass
        else:
            if gdp_per_capita < china:
                p_below_china += pop
            total_p_this_year += pop 
#         if not np.isnan(pop):
#             total_p_this_year += pop 
#         else:
#             print("Hi", country)
    #     print(country, gdp_per_capita, pop)
    below_percent = p_below_china/total_p_this_year
    include_china = (p_below_china + p_china)/total_p_this_year
    tmp = [year, below_percent, include_china, total_p_this_year, china]
    data.append(tmp)
    print(tmp)
data = np.array(data)
    
    
In [144]:
    
plt.plot(data[:,0], data[:,4])
    
    Out[144]:
    
In [148]:
    
n = 40
plt.plot(data[:,0][:n], data[:,4][:n])
    
    Out[148]:
    
In [140]:
    
plt.plot(data[:,0], data[:,3])
    
    Out[140]:
    
In [145]:
    
data = np.array(data)
plt.scatter(data[:,0], data[:,1])
plt.scatter(data[:,0], data[:,2])
plt.ylim([0,1])
    
    Out[145]:
    
In [146]:
    
data = np.array(data)
plt.plot(data[:,0], data[:,1])
plt.plot(data[:,0], data[:,2])
plt.ylim([0,1])
    
    Out[146]: