In [34]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from IPython import display
from collections import namedtuple
In [153]:
def fib_recursion(n, table=[]):
"""returns fib of n"""
if n <= 1:
return n
if len(table) < 1:
table = np.zeros(n+1)
if table[n-1] == 0:
table[n-1] = fib_recursion(n-1, table)
if table[n-2] == 0:
table[n-2] = fib_recursion(n-2, table)
table[n] = table[n-1] + table[n-2]
return table[n]
[fib_recursion(n) for n in range(10)]
Out[153]:
Now what if we use a list:'
In [150]:
def fib_list(n):
table=[0,1]
for i in range(2, n+1):
table.append(table[i-1]+table[i-2])
return table[n]
[fib_list(n) for n in range(10)]
Out[150]:
Now there isn't any point in saving all the fib numbers, so modifying the code to save only the last two fib numbers:
In [159]:
def fib_list_two(n):
table=[0,1]
for i in range(2, n+1):
table.append(sum(table[-2:]))
del table[0]
return table[1]
[fib_list_two(n) for n in range(10)]
Out[159]:
In [167]:
%time fib_recursion(100)
Out[167]:
The recursion fails as exceeds max memory depth on pretty small fib numbers
In [162]:
%time fib_list(1000)
Out[162]:
Now this holds all the fib numbers up to n in memory in a list. Which for large n seems un necessary to waste all that memory.
In [163]:
%time fib_list_two(1000)
Out[163]:
In [7]:
income_url="http://docs.google.com/spreadsheet/pub?key=0AkBd6lyS3EmpdHo5S0J6ekhVOF9QaVhod05QSGV4T3c&output=xlsx"
life_url="http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj2tPLxKvvnNPA&output=xlsx"
pop_url="http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=xlsx"
In [8]:
income = pd.read_excel(income_url)
life = pd.read_excel(life_url)
pop = pd.read_excel(pop_url)
In [9]:
income.head()
Out[9]:
In [10]:
life.head()
Out[10]:
In [11]:
pop.head()
Out[11]:
The most recent year where we have info on all three is 2011, so lets look at 2011 data:
In [16]:
df = pop[["Total population", 2011]].copy()
df.rename(columns={2011: "Population"}, inplace=True)
df.head()
Out[16]:
In [17]:
df = df.join(life[2011])
df.rename(columns={2011: "Life"}, inplace=True)
df.head()
Out[17]:
In [18]:
df = df.join(income["2011"])
df.rename(columns={"2011": "Income"}, inplace=True)
df.head()
Out[18]:
So now we have a dataframe containing country names, their population, adjusted per capita income and life expectance.
To simplify things, I'm going to drop the missing values:
In [20]:
df.dropna(inplace=True)
df.head()
Out[20]:
Lets say we want 500 million people with the highest income. So the population becomes the "weights", and the income the "values" of this knapsack problem.
So lets divide this problem into sizes of 10M, so we go 10, 20, 30... all the way to 500M
first, starting with 10 to 50M to keep it simple, heres our table:
In [30]:
solved = pd.DataFrame(columns=np.arange(10,51,10), index=df["Total population"])
solved.fillna(value=0, inplace=True)
solved.head()
Out[30]:
In [41]:
ans = {}
ans[10] = 100, 200, ["countryA", "B"]
ans[10]
Out[41]:
In [42]:
def pop_income(pop_size):
"""takes in a pop_size, and returns a tuple containing
max income and pop and a list of countries chosen"""
if pop_size in solved.keys():
return solved[max]
pop = 0
income = 0
for i, row in df[df["Population"]<= pop_size].iterrows():
print(row)
pop_income(10*np.power(10,4))
np.power(10,9)
Out[42]:
In [ ]: