In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = [16.18033, 10]

In [2]:
%matplotlib inline

In [37]:
china_2018 = 12.24
usa_2018 = 19.39

usa 2%, china 6%


In [38]:
year = np.array(range(2018, 2050))
china = china_2018 * ((1.06)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))

In [41]:
plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.plot(year, china, label="China, 6% yearly increase")
plt.plot(year, usa, label="US, 2% yearly increase")
plt.plot(a.Year, a.China/1e12)
plt.plot(a.Year, a.USA/1e12)
plt.xlabel("Year")
plt.ylabel("GDP")
plt.yscale("log")
plt.legend()


Out[41]:
<matplotlib.legend.Legend at 0x1a17b4a358>

USA 2%, China 6.5%


In [23]:
year = np.array(range(2018, 2050))
china = china_2018 * ((1.065)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))

plt.plot(year, china, label="China")
plt.plot(year, usa, label="USA")
plt.legend()


Out[23]:
<matplotlib.legend.Legend at 0x1a19a37908>

In [25]:
year = np.array(range(2018, 2050))
china = china_2018 * ((1.05)**(year-2018))
usa = usa_2018 * ((1.02)**(year-2018))

plt.plot(year, china, label="China")
plt.plot(year, usa, label="USA")
plt.legend()


Out[25]:
<matplotlib.legend.Legend at 0x1a196d2908>

In [ ]:


In [15]:
GDP = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_103640/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_103640.csv", skiprows=4)

GDP = GDP.dropna(axis=1, how="all")
GDP.columns = GDP.columns.str.replace(" ", "_")

In [32]:
a = GDP.query("Country_Name in ['China', 'United States']").T.reset_index()[4:]
a.columns = ["Year", "China", "USA"]
a.Year = a.Year.astype(int)
a.China = a.China.astype(float)
a.USA = a.USA.astype(float)

In [35]:
plt.plot(a.Year, a.China)
plt.plot(a.Year, a.USA)


Out[35]:
[<matplotlib.lines.Line2D at 0x1a1782a588>]

In [3]:
code = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/averageGDP/Metadata_Country_API_NY.GDP.PCAP.CD_DS2_en_csv_v2_10224851.csv").dropna(axis=1, how="all")

In [132]:
exclude = code.query("Region != Region")
exclude


Out[132]:
Country Code Region IncomeGroup SpecialNotes TableName
5 ARB NaN NaN Arab World aggregate. Arab World is composed o... Arab World
34 CEB NaN NaN Central Europe and the Baltics aggregate. Central Europe and the Baltics
47 CSS NaN NaN NaN Caribbean small states
59 EAP NaN NaN East Asia and Pacific regional aggregate (does... East Asia & Pacific (excluding high income)
60 EAR NaN NaN Early-dividend countries are mostly lower-midd... Early-demographic dividend
61 EAS NaN NaN East Asia and Pacific regional aggregate (incl... East Asia & Pacific
62 ECA NaN NaN Europe and Central Asia regional aggregate (do... Europe & Central Asia (excluding high income)
63 ECS NaN NaN Europe and Central Asia regional aggregate (in... Europe & Central Asia
66 EMU NaN NaN Euro area aggregate. Euro area
71 EUU NaN NaN European Union aggregate. European Union
72 FCS NaN NaN Fragile and conflict affected situations aggre... Fragile and conflict affected situations
93 HIC NaN NaN High income group aggregate. High-income econo... High income
96 HPC NaN NaN Heavily indebted poor countries aggregate. Heavily indebted poor countries (HIPC)
100 IBD NaN NaN IBRD only group aggregate. IBRD only
101 IBT NaN NaN IDA and IBRD total group aggregate (includes I... IDA & IBRD total
102 IDA NaN NaN IDA total group aggregate (includes IDA only a... IDA total
103 IDB NaN NaN IDA blend group aggregate. IDA blend
105 IDX NaN NaN IDA only group aggregate. IDA only
125 LAC NaN NaN NaN Latin America & Caribbean (excluding high income)
131 LCN NaN NaN NaN Latin America & Caribbean
132 LDC NaN NaN NaN Least developed countries: UN classification
133 LIC NaN NaN NaN Low income
136 LMC NaN NaN NaN Lower middle income
137 LMY NaN NaN NaN Low & middle income
139 LTE NaN NaN NaN Late-demographic dividend
150 MEA NaN NaN Middle East and North Africa regional aggregat... Middle East & North Africa
153 MIC NaN NaN Middle income group aggregate. Middle-income e... Middle income
158 MNA NaN NaN NaN Middle East & North Africa (excluding high inc...
167 NAC NaN NaN North America regional aggregate. There are no... North America
178 OED NaN NaN Aggregations include Lithuania. OECD members
180 OSS NaN NaN NaN Other small states
188 PRE NaN NaN Pre-dividend countries are mostly low-income c... Pre-demographic dividend
194 PSS NaN NaN Pacific island small states aggregate. Pacific island small states
195 PST NaN NaN Post-dividend countries are mostly high-income... Post-demographic dividend
201 SAS NaN NaN NaN South Asia
212 SSA NaN NaN Sub-Saharan Africa regional aggregate (does no... Sub-Saharan Africa (excluding high income)
214 SSF NaN NaN Sub-Saharan Africa regional aggregate (include... Sub-Saharan Africa
215 SST NaN NaN Small states aggregate. Includes 41 members of... Small states
227 TEA NaN NaN East Asia & Pacific (IDA & IBRD countries) agg... East Asia & Pacific (IDA & IBRD)
228 TEC NaN NaN Europe & Central Asia (IDA & IBRD countries) a... Europe & Central Asia (IDA & IBRD)
233 TLA NaN NaN Latin America & the Caribbean (IDA & IBRD coun... Latin America & Caribbean (IDA & IBRD)
235 TMN NaN NaN Middle East & North Africa (IDA & IBRD countri... Middle East & North Africa (IDA & IBRD)
237 TSA NaN NaN South Asia (IDA & IBRD countries) aggregate. South Asia (IDA & IBRD)
238 TSS NaN NaN Sub-Saharan Africa (IDA & IBRD countries) aggr... Sub-Saharan Africa (IDA & IBRD)
246 UMC NaN NaN Upper middle income group aggregate. Upper-mid... Upper middle income
256 WLD NaN NaN World aggregate. World

In [136]:
"WLD" in exclude["Country Code"].values


Out[136]:
True

In [109]:
averageGDP = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/averageGDP/API_NY.GDP.PCAP.CD_DS2_en_csv_v2_10224851.csv", skiprows=4)

averageGDP = averageGDP.dropna(axis=1, how="all")
averageGDP.columns = averageGDP.columns.str.replace(" ", "_")

In [112]:
t = (averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]].values > 0).sum(axis=0)


/Users/weilu/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in greater
  """Entry point for launching an IPython kernel.

In [114]:
m = averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]]
plt.figure(figsize=(20,10))
plt.imshow(m.T)
plt.colorbar()


Out[114]:
<matplotlib.colorbar.Colorbar at 0x1a25302a90>

In [115]:
population = pd.read_csv("/Users/weilu/Dropbox/WorldBankData/Population/API_SP.POP.TOTL_DS2_en_csv_v2_10203548.csv", skiprows=4).dropna(axis=1, how="all")
population.columns = population.columns.str.replace(" ", "_")

In [116]:
m = averageGDP.dropna(axis=1, how="all")[[str(i) for i in range(1960, 2018)]]
plt.figure(figsize=(20,10))
plt.imshow(m.T)
plt.colorbar()


Out[116]:
<matplotlib.colorbar.Colorbar at 0x1a24b53240>

In [117]:
len(averageGDP)


Out[117]:
264

In [118]:
len(population)


Out[118]:
264

In [119]:
china


Out[119]:
59659.887694344005

In [172]:
higher_than_china_pop = 0
for year in range(2017, 2018):
    china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_below_china = 0.0
    total_p_this_year = 0.0
    for i in range(len(averageGDP)):
        c = averageGDP.iloc[i]
#         print(i, c)
        country = c["Country_Name"]
        this_country_code = c["Country_Code"]
        gdp_per_capita = averageGDP.iloc[i][str(year)]
        p = population.query(f'Country_Name == "{country}"')
        pop = p[str(year)].values[0]
        if this_country_code in exclude["Country Code"].values:
            pass
        elif np.isnan(gdp_per_capita):
#             print("gdp_per_capita Nan")
            pass
        elif np.isnan(pop):
#             print("population Nan")
            pass
        else:
            if gdp_per_capita < china:
                p_below_china += pop
            else:
                if pop > 1e7: # 1 millions in population
                    print(country, gdp_per_capita, pop/1e6)
            if gdp_per_capita > china and gdp_per_capita > 10814:
#                 print(country)
                higher_than_china_pop += pop
            total_p_this_year += pop 
#         if not np.isnan(pop):
#             total_p_this_year += pop 
#         else:
#             print("Hi", country)
    #     print(country, gdp_per_capita, pop)
    below_percent = p_below_china/total_p_this_year
    include_china = (p_below_china + p_china)/total_p_this_year
    tmp = [year, below_percent, include_china, total_p_this_year, china]
    print(tmp)
print(higher_than_china_pop/1e6)


Argentina 14401.974854608101 44.271041
Australia 53799.938089959905 24.598933
Belgium 43323.8073364711 11.372068
Brazil 9821.407686410079 209.288278
Canada 45032.1199081697 36.708083
Chile 15346.4496997595 18.054726
China 8826.99409574835 1386.395
Czech Republic 20368.1385576071 10.591323
Germany 44469.90906072441 82.695
Spain 28156.8158362352 46.572028
France 38476.6586361575 67.118648
United Kingdom 39720.4434267836 66.022273
Greece 18613.4238733817 10.760421
Italy 31952.9759206841 60.551416
Japan 38428.0973168279 126.785797
Kazakhstan 8837.45730230661 18.037646
Korea, Rep. 29742.8388613471 51.466201
Mexico 8902.83082294794 129.163276
Malaysia 9944.904300185759 31.624264
Netherlands 48223.1554941825 17.132854
Poland 13811.664243680802 37.975841
Portugal 21136.2972102005 10.293718
Romania 10813.7166001986 19.586539
Russian Federation 10743.0965915463 144.495044
Saudi Arabia 20760.9060117662 32.938213
Sweden 53442.0082808022 10.067744
Turkey 10540.617998709 80.74502
United States 59531.661964344006 325.719178
[2017, 0.5592285966041578, 0.7473825288770098, 7368408320.0, 8826.99409574835]
1225.193308

In [166]:
data = []
higher_than_china_pop = 0.0
for year in range(2017, 2018):
    china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_below_china = 0.0
    total_p_this_year = 0.0
    for i in range(len(averageGDP)):
        c = averageGDP.iloc[i]
#         print(i, c)
        country = c["Country_Name"]
        this_country_code = c["Country_Code"]
        gdp_per_capita = averageGDP.iloc[i][str(year)]
        p = population.query(f'Country_Name == "{country}"')
        pop = p[str(year)].values[0]
        if this_country_code in exclude["Country Code"].values:
            pass
        elif np.isnan(gdp_per_capita):
#             print("gdp_per_capita Nan")
            pass
        elif np.isnan(pop):
#             print("population Nan")
            pass
        else:
            if gdp_per_capita < china:
                p_below_china += pop
            else:
                if pop > 1e7: # 1 millions in population
                    print(country, gdp_per_capita, pop/1e6)
                    higher_than_china_pop += pop
            if gdp_per_capita > china:
                print(country)
            total_p_this_year += pop 
#         if not np.isnan(pop):
#             total_p_this_year += pop 
#         else:
#             print("Hi", country)
    #     print(country, gdp_per_capita, pop)
    below_percent = p_below_china/total_p_this_year
    include_china = (p_below_china + p_china)/total_p_this_year
    tmp = [year, below_percent, include_china, total_p_this_year]
    data.append(tmp)
    print(tmp)
print(higher_than_china_pop/1e6)


Andorra
United Arab Emirates
Argentina 14401.974854608101 44.271041
Argentina
Antigua and Barbuda
Australia 53799.938089959905 24.598933
Australia
Austria
Belgium 43323.8073364711 11.372068
Belgium
Bahrain
Bahamas, The
Brazil 9821.407686410079 209.288278
Brazil
Barbados
Brunei Darussalam
Canada 45032.1199081697 36.708083
Canada
Switzerland
Chile 15346.4496997595 18.054726
Chile
China 8826.99409574835 1386.395
Costa Rica
Cyprus
Czech Republic 20368.1385576071 10.591323
Czech Republic
Germany 44469.90906072441 82.695
Germany
Denmark
Spain 28156.8158362352 46.572028
Spain
Estonia
Finland
France 38476.6586361575 67.118648
France
United Kingdom 39720.4434267836 66.022273
United Kingdom
Equatorial Guinea
Greece 18613.4238733817 10.760421
Greece
Grenada
Hong Kong SAR, China
Croatia
Hungary
Ireland
Iceland
Israel
Italy 31952.9759206841 60.551416
Italy
Japan 38428.0973168279 126.785797
Japan
Kazakhstan 8837.45730230661 18.037646
Kazakhstan
St. Kitts and Nevis
Korea, Rep. 29742.8388613471 51.466201
Korea, Rep.
Kuwait
St. Lucia
Lithuania
Luxembourg
Latvia
Macao SAR, China
Maldives
Mexico 8902.83082294794 129.163276
Mexico
Malta
Mauritius
Malaysia 9944.904300185759 31.624264
Malaysia
Netherlands 48223.1554941825 17.132854
Netherlands
Norway
New Zealand
Oman
Panama
Palau
Poland 13811.664243680802 37.975841
Poland
Portugal 21136.2972102005 10.293718
Portugal
Qatar
Romania 10813.7166001986 19.586539
Romania
Russian Federation 10743.0965915463 144.495044
Russian Federation
Saudi Arabia 20760.9060117662 32.938213
Saudi Arabia
Singapore
San Marino
Slovak Republic
Slovenia
Sweden 53442.0082808022 10.067744
Sweden
Seychelles
Trinidad and Tobago
Turkey 10540.617998709 80.74502
Turkey
Uruguay
United States 59531.661964344006 325.719178
United States
[2017, 0.5592285966041578, 0.7473825288770098, 7368408320.0]
3111.030573

In [ ]:


In [141]:
data = []
for year in range(1960, 2018):
    china = averageGDP.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_china = population.query(f"Country_Name == 'China'")[str(year)].values[0]
    p_below_china = 0.0
    total_p_this_year = 0.0
    for i in range(len(averageGDP)):
        c = averageGDP.iloc[i]
#         print(i, c)
        country = c["Country_Name"]
        this_country_code = c["Country_Code"]
        gdp_per_capita = averageGDP.iloc[i][str(year)]
        p = population.query(f'Country_Name == "{country}"')
        pop = p[str(year)].values[0]
        if this_country_code in exclude["Country Code"].values:
            pass
        elif np.isnan(gdp_per_capita):
#             print("gdp_per_capita Nan")
            pass
        elif np.isnan(pop):
#             print("population Nan")
            pass
        else:
            if gdp_per_capita < china:
                p_below_china += pop
            total_p_this_year += pop 
#         if not np.isnan(pop):
#             total_p_this_year += pop 
#         else:
#             print("Hi", country)
    #     print(country, gdp_per_capita, pop)
    below_percent = p_below_china/total_p_this_year
    include_china = (p_below_china + p_china)/total_p_this_year
    tmp = [year, below_percent, include_china, total_p_this_year, china]
    data.append(tmp)
    print(tmp)
data = np.array(data)


[1960, 0.24938035057899183, 0.5358748022055203, 2328387151.0, 89.52054151035841]
[1961, 0.0179179114868462, 0.2980966865142763, 2356816643.0, 75.8058379259965]
[1962, 0.014679121828633852, 0.289965719295237, 2418461364.0, 70.90941166710071]
[1963, 0.010824993775869724, 0.2869949724180933, 2470706640.0, 74.3136434486145]
[1964, 0.016724221594707808, 0.29350229488756685, 2523158687.0, 85.4985551596313]
[1965, 0.011347243546853614, 0.28516617403118016, 2611890269.0, 98.48677775222059]
[1966, 0.20160112086978632, 0.47696388522275335, 2670658837.0, 104.324566181147]
[1967, 0.23285661731946677, 0.4985078683798731, 2840378116.0, 96.5895319417819]
[1968, 0.049295281604295776, 0.3148444553403496, 2916634946.0, 91.4727183066072]
[1969, 0.049445233935341094, 0.31632654815563016, 2982692896.0, 100.12990326618]
[1970, 0.22809886061085063, 0.48938847006963726, 3131831387.0, 113.16299155468599]
[1971, 0.2270301684936131, 0.4897358070553263, 3201701359.0, 118.65457778534599]
[1972, 0.24964394138287124, 0.5132342121185014, 3270340736.0, 131.883561243868]
[1973, 0.2727495259925619, 0.5369614333826652, 3338002472.0, 157.090374298657]
[1974, 0.03367317364291264, 0.2979806488806319, 3406449247.0, 160.140093727686]
[1975, 0.2103825204181777, 0.47473495737700583, 3466565357.0, 178.34181960809602]
[1976, 0.21261356487914246, 0.47612514108735426, 3531856222.0, 165.40554037241998]
[1977, 0.21369379367733582, 0.4760221618379846, 3596465783.0, 185.42283291367298]
[1978, 0.005173458956161883, 0.26629928237484585, 3661702192.0, 156.396388520044]
[1979, 0.008990139549242666, 0.2688482304103945, 3728977600.0, 183.983152215978]
[1980, 0.007366526087289222, 0.26321215539656695, 3835261922.0, 194.80472218683602]
[1981, 0.010195593222689838, 0.2622568563248505, 3943029515.0, 197.07147449910198]
[1982, 0.012146975515958322, 0.26402839484115287, 4004384296.0, 203.33491950346402]
[1983, 0.045077405807081396, 0.2959409379575094, 4079150091.0, 225.431928890812]
[1984, 0.046477176194951446, 0.29573576552717945, 4159635994.0, 250.71396904698798]
[1985, 0.2577999878338535, 0.5024141339524515, 4296726157.0, 294.45884850496003]
[1986, 0.07805899098770697, 0.3215582169715526, 4381081688.0, 281.928120911563]
[1987, 0.05250060117990328, 0.291226659279981, 4540916097.0, 251.811956961329]
[1988, 0.07990834217947881, 0.31678875498509945, 4650574469.0, 283.537695240524]
[1989, 0.09599276217501805, 0.32492623077138166, 4886354131.0, 310.8819124049]
[1990, 0.07763547171898547, 0.29955258676177876, 5115355793.0, 317.884673040928]
[1991, 0.271560818562033, 0.4953206919863782, 5142923896.0, 333.142145400184]
[1992, 0.28860019008534427, 0.5114936123873188, 5226578640.0, 366.46069230207297]
[1993, 0.29179667762441597, 0.510966442943344, 5376836528.0, 377.38983947995797]
[1994, 0.3298364711309829, 0.5479432078053159, 5464457532.0, 473.492278718042]
[1995, 0.33742742268617254, 0.5536443445765664, 5572436188.0, 609.6566792024839]
[1996, 0.34066383560973623, 0.556060313735455, 5652599386.0, 709.413755085039]
[1997, 0.35110478713657556, 0.5656783748217907, 5732648707.0, 781.744164341053]
[1998, 0.38899126393559, 0.6026782739234592, 5811934942.0, 828.580479295681]
[1999, 0.40419539293931944, 0.6168824967527766, 5890037419.0, 873.2870617257901]
[2000, 0.41091867184099695, 0.620787491964921, 6016353450.0, 959.372483639691]
[2001, 0.43006910877743465, 0.6380853563201535, 6114185863.0, 1053.1082430045199]
[2002, 0.4347433884728073, 0.6415194580700756, 6192205909.0, 1148.5082904417]
[2003, 0.4525456148608514, 0.6580309230913743, 6270034637.0, 1288.64325183381]
[2004, 0.45757410577766494, 0.6608884857149139, 6374733555.0, 1508.6680978826598]
[2005, 0.44867668863775045, 0.6506685362085446, 6454319893.0, 1753.41782925823]
[2006, 0.4489809233487611, 0.649607894339996, 6534614930.0, 2099.22943460447]
[2007, 0.45608025011156517, 0.6552960554906383, 6615363663.0, 2695.36591709669]
[2008, 0.4586223026119019, 0.6570132695351808, 6676992509.0, 3471.2480543115003]
[2009, 0.47659255873665596, 0.6735594958742225, 6758799316.0, 3838.43397176904]
[2010, 0.48764200235131894, 0.6831853544387017, 6840963836.0, 4560.512586009289]
[2011, 0.5000331986647464, 0.6939140296813503, 6932763765.0, 5633.7957168394]
[2012, 0.5038386007355834, 0.6964512441626176, 7012493967.0, 6337.88332279255]
[2013, 0.5366752156604534, 0.7275562486922579, 7111130836.0, 7077.7707653955795]
[2014, 0.5401686734223236, 0.7297145871643546, 7197570093.0, 7683.502613091179]
[2015, 0.5563270570741055, 0.7453876380973851, 7252807500.0, 8069.2130238951095]
[2016, 0.5605787859786744, 0.748734612471496, 7327251171.0, 8117.267465431791]
[2017, 0.5592285966041578, 0.7473825288770098, 7368408320.0, 8826.99409574835]

In [144]:
plt.plot(data[:,0], data[:,4])


Out[144]:
[<matplotlib.lines.Line2D at 0x1a22925d68>]

In [148]:
n = 40
plt.plot(data[:,0][:n], data[:,4][:n])


Out[148]:
[<matplotlib.lines.Line2D at 0x1a248ca2e8>]

In [140]:
plt.plot(data[:,0], data[:,3])


Out[140]:
[<matplotlib.lines.Line2D at 0x1a237c9978>]

In [145]:
data = np.array(data)
plt.scatter(data[:,0], data[:,1])
plt.scatter(data[:,0], data[:,2])
plt.ylim([0,1])


Out[145]:
(0, 1)

In [146]:
data = np.array(data)
plt.plot(data[:,0], data[:,1])
plt.plot(data[:,0], data[:,2])
plt.ylim([0,1])


Out[146]:
(0, 1)