Work in Progress


In [25]:
# read population in
import json
import requests
from pandas import DataFrame

# pop_json_url holds a 
pop_json_url = "https://gist.github.com/rdhyee/8511607/raw/f16257434352916574473e63612fcea55a0c1b1c/population_of_countries.json"
pop_list= requests.get(pop_json_url, verify=False).json()

df = DataFrame(pop_list)
df[:5]


Out[25]:
0 1 2
0 1 China 1385566537
1 2 India 1252139596
2 3 United States 320050716
3 4 Indonesia 249865631
4 5 Brazil 200361925

5 rows × 3 columns


In [26]:
pop_list[0]


Out[26]:
[1, u'China', 1385566537]

Q: What is the relationship between s and the population of China?

s = sum(df[df[1].str.startswith('C')][2])

  1. s is greater than the population of China
  2. s is the same as the population of China
  3. s is less than the population of China
  4. s is not a number.

In [12]:
from pandas import DataFrame, Series, Index
import numpy as np

s1 = Series(np.arange(1,4))
s1


Out[12]:
0    1
1    2
2    3
dtype: int32

In [22]:
sum(df[df[1].str.startswith('C')][2])


Out[22]:
1667559248

In [24]:
1667559248>1385566537


Out[24]:
True

Q: What is

s1.apply(lambda k: 2*k).sum()

In [17]:
s1.apply(lambda k: 2*k)


Out[17]:
0    2
1    4
2    6
dtype: int64

In [18]:
s1.apply(lambda k: 2*k).sum()


Out[18]:
12

Q: What is s1.cumsum()[1]?


In [20]:
s1.cumsum()


Out[20]:
0    1
1    3
2    6
dtype: int32

In [21]:
s1.cumsum()[1]


Out[21]:
3