Datasets to use in Pandas class. Convert large datasets to more manageable ones and save as dictionaries that we can turn into dataframes.
This notebook was written by Dave Backus for the NYU Stern course Data Bootcamp.
In [1]:
import pandas as pd
%matplotlib inline
In [10]:
%%time
url = 'http://www.rug.nl/research/ggdc/data/pwt/v81/pwt81.xlsx'
pwt = pd.read_excel(url, sheetname='Data')
pwt.head()
In [11]:
pwt.shape
Out[11]:
In [12]:
variables = [0, 1, 3, 4, 6]
countries = ['CHN', 'FRA', 'USA']
years = [1990, 2000, 2010]
pwt = pwt[variables]
Out[12]:
In [13]:
# one country
pwt1 = pwt[(pwt['countrycode'] == 'CHN') & (pwt['year'].isin(years))]
In [15]:
# three countries
pwt3 = pwt[(pwt['countrycode'].isin(countries)) & (pwt['year'].isin(years))]
pwt3 = pwt3[pwt3['country']==]
pwt3.shape
Out[15]:
In [1]:
pwt3 = pwt3.replace(to_replace=["China, People's Republic of"], value=['China'])
pwt3.reset_index()
pwt3
In [31]:
pw3d = pwt3.to_dict(orient='list')
pw3d
Out[31]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: