Datasets to use in Pandas class. Convert large datasets to more manageable ones and save as dictionaries that we can turn into dataframes.
This notebook was written by Dave Backus for the NYU Stern course Data Bootcamp.
In [ ]:
import pandas as pd
%matplotlib inline
In [ ]:
%%time
url = 'http://www.rug.nl/research/ggdc/data/pwt/v81/pwt81.xlsx'
pwt = pd.read_excel(url, sheetname='Data')
pwt.head()
In [ ]:
pwt.shape
In [ ]:
variables = [0, 1, 3, 4, 6]
countries = ['CHN', 'FRA', 'USA']
years = [1990, 2000, 2010]
pwt = pwt[variables]
In [ ]:
# one country
pwt1 = pwt[(pwt['countrycode'] == 'CHN') & (pwt['year'].isin(years))]
In [ ]:
# three countries
pwt3 = pwt[(pwt['countrycode'].isin(countries)) & (pwt['year'].isin(years))]
pwt3 = pwt3[pwt3['country']==]
pwt3.shape
In [ ]:
pwt3 = pwt3.replace(to_replace=["China, People's Republic of"], value=['China'])
pwt3.reset_index()
pwt3
In [ ]:
pw3d = pwt3.to_dict(orient='list')
pw3d
In [ ]:
In [ ]:
In [ ]:
In [ ]: