In [6]:
import pandas as pd
import numpy as np

raw_df = pd.DataFrame.from_csv('31461-0001-Data.tsv',sep='\t', index_col=None)

In [60]:



Out[60]:
109

In [13]:
field = 'r111r'
field2 = 'r112r'
sa2 = raw_df[['newid','year','country','cow','r111r','r112r','r113r','r114r']]
sa2 = sa2[sa2.r111r != ' ']

In [21]:
trans = ["Algeria",
"Argentina",
"Belarus",
"Benin",
"Bolivia",
"Brazil",
"Burkina Faso",
"Cambodia",
"Cent. Af. Rep.",
"Chad",
"Congo",
"Cote d'Ivoire",
"El Salvador",
"Estonia",
"Ethiopia",
"Gabon",
"Georgia",
"Ghana",
"Guatemala",
"Guinea-Bissau",
"Honduras",
"Hungary",
"Indonesia",
"Madagascar",
"Mali",
"Moldova",
"Mongolia",
"Mozambique",
"Namibia",
"Nepal",
"Nicaragua",
"Niger",
"Nigeria",
"Pakistan",
"Paraguay",
"Peru",
"Philippines",
"Poland",
"Rwanda",
"Romania",
"Slovenia",
"Sudan",
"Suriname",
"Taiwan",
"Tajikistan",
"Turkey",
"Uruguay",
"Zambia",
"Zimbabwe"]

In [22]:
trans_df = sa2[sa2.country.isin(trans)]

In [23]:
trans_df


Out[23]:
newid year country cow r111r r112r r113r r114r
17 84 1997 Algeria 615 1 1 1 1
20 130 1983 Argentina 160 0 0 1 2
60 368 1995 Belarus 370 0 0 3 0
61 368 1995 Belarus 370 0 1 3 0
62 368 1995 Belarus 370 0 0 2 0
63 369 1995 Belarus 370 0 0 2 1
64 374 2000 Belarus 370 2 2 2 1
65 374 2000 Belarus 370 3 3 3 3
66 375 2001 Belarus 370 0 0 1 3
67 375 2001 Belarus 370 3 3 3 3
68 375 2001 Belarus 370 3 1 3 3
69 375 2001 Belarus 370 1 2 2 2
70 375 2001 Belarus 370 0 0 0 0
71 378 2004 Belarus 370 0 2 1 2
72 378 2004 Belarus 370 0 0 0 0
73 455 1996 Benin 434 1 0 1 1
74 516 1997 Bolivia 145 1 0 0 0
75 521 2002 Bolivia 145 0 0 1 0
89 586 1994 Brazil 140 0 0 1 0
103 761 1993 Cambodia 811 2 0 0 2
104 766 1998 Cambodia 811 1 0 2 0
105 766 1998 Cambodia 811 0 1 2 3
106 766 1998 Cambodia 811 0 1 2 3
107 766 1998 Cambodia 811 0 0 1 1
108 766 1998 Cambodia 811 0 0 2 0
109 766 1998 Cambodia 811 0 2 3 3
110 771 2003 Cambodia 811 1 2 3 3
111 771 2003 Cambodia 811 3 0 2 1
112 771 2003 Cambodia 811 3 0 2 3
123 1022 2002 Congo 484 3 0 0 0
... ... ... ... ... ... ... ... ...
429 3700 1996 Romania 360 0 0 1 1
430 3700 1996 Romania 360 0 0 2 0
431 3704 2000 Romania 360 0 0 1 0
432 3708 2004 Romania 360 1 0 2 1
455 3750 2003 Rwanda 517 3 3 1 2
456 3750 2003 Rwanda 517 2 3 0 3
457 3751 2003 Rwanda 517 1 3 0 3
498 3945 1992 Slovenia 349 1 0 1 0
506 4164 1991 Suriname 115 0 1 0 1
507 4169 1996 Suriname 115 1 0 1 1
508 4173 2000 Suriname 115 0 0 0 0
511 4328 2004 Taiwan 713 2 0 1 0
512 4338 2000 Tajikistan 702 0 0 2 1
529 4555 2002 Turkey 640 0 0 0 1
530 4555 2002 Turkey 640 0 0 0 0
565 4719 1984 Uruguay 165 0 1 2 1
579 4974 1991 Zambia 551 2 1 1 1
580 4974 1991 Zambia 551 3 1 2 0
581 4984 2001 Zambia 551 2 0 2 1
582 4984 2001 Zambia 551 2 0 2 0
583 4984 2001 Zambia 551 1 1 1 0
584 4998 1985 Zimbabwe 552 2 2 2 3
585 5013 2000 Zimbabwe 552 0 0 2 3
586 5013 2000 Zimbabwe 552 2 2 2 2
587 5013 2000 Zimbabwe 552 1 3 3 3
588 5013 2000 Zimbabwe 552 2 2 1 3
589 5013 2000 Zimbabwe 552 0 0 3 3
590 5015 2002 Zimbabwe 552 0 2 3 3
591 5015 2002 Zimbabwe 552 0 3 3 3
592 5015 2002 Zimbabwe 552 1 2 2 3

202 rows × 8 columns


In [57]:
for c in raw_df.country.unique():
    c_df = trans_df[trans_df.country == c]
    r111r = c_df.r111r.max()
    r112r = c_df.r112r.max()
    r113r = c_df.r113r.max()
    r114r = c_df.r114r.max()
    print "{0}, {1}, {2}, {3}, {4}".format(c,r111r, r112r, r113r, r114r)


Afghanistan, nan, nan, nan, nan
Albania, nan, nan, nan, nan
Algeria, 1, 1, 1, 1
Angola, nan, nan, nan, nan
Argentina, 0, 0, 1, 2
Armenia, nan, nan, nan, nan
Azerbaijan, nan, nan, nan, nan
Bangladesh, nan, nan, nan, nan
Belarus, 3, 3, 3, 3
Benin, 1, 0, 1, 1
Bolivia, 1, 0, 1, 0
Bosnia-Herz, nan, nan, nan, nan
Botswana, nan, nan, nan, nan
Brazil, 0, 0, 1, 0
Bulgaria, nan, nan, nan, nan
Burundi, nan, nan, nan, nan
Cambodia, 3, 2, 3, 3
Cameroon, nan, nan, nan, nan
Chile, nan, nan, nan, nan
Colombia, nan, nan, nan, nan
Congo, 3, 0, 0, 0
Costa Rica, nan, nan, nan, nan
Cote d'Ivoire, nan, nan, nan, nan
Croatia, nan, nan, nan, nan
Czech Rep., nan, nan, nan, nan
Dom. Rep., nan, nan, nan, nan
Ecuador, nan, nan, nan, nan
El Salvador, 1, 1, 2, 1
Eq. Guinea, nan, nan, nan, nan
Estonia, 0, 0, 1, 0
Fiji, nan, nan, nan, nan
France, nan, nan, nan, nan
GDR, nan, nan, nan, nan
Gambia, nan, nan, nan, nan
Georgia, 3, 2, 2, 2
Ghana, 2, 0, 1, 2
Grenada, nan, nan, nan, nan
Guatemala, 3, 2, 2, 3
Guinea, nan, nan, nan, nan
Guyana, nan, nan, nan, nan
Haiti, nan, nan, nan, nan
Honduras, 0, 0, 0, 1
Hungary, 1, 0, 1, 1
Indonesia, 3, 2, 1, 2
Jamaica, nan, nan, nan, nan
Kazakhstan, nan, nan, nan, nan
Kenya, nan, nan, nan, nan
Kyrgyzstan, nan, nan, nan, nan
Latvia, nan, nan, nan, nan
Lesotho, nan, nan, nan, nan
Liberia, nan, nan, nan, nan
Lithuania, nan, nan, nan, nan
Macedonia, nan, nan, nan, nan
Madagascar, 2, 0, 0, 2
Malawi, nan, nan, nan, nan
Malaysia, nan, nan, nan, nan
Mali, 0, 0, 0, 0
Mauritius, nan, nan, nan, nan
Mexico, nan, nan, nan, nan
Moldova, 1, 0, 1, 1
Mongolia, 3, 1, 1, 0
Morocco, nan, nan, nan, nan
Mozambique, 3, 1, 2, 2
Namibia, 2, 1, 2, 2
Nepal, 2, 0, 1, 1
Nicaragua, 2, 2, 2, 2
Niger, 3, 3, 3, 0
Nigeria, 2, 0, 1, 1
P. N. Guinea, nan, nan, nan, nan
Pakistan, 3, 3, 3, 3
Panama, nan, nan, nan, nan
Paraguay, 3, 0, 3, 1
Peru, 3, 1, 3, 3
Philippines, 2, 2, 2, 2
Poland, 0, 0, 0, 0
Romania, 3, 2, 3, 3
Russia, nan, nan, nan, nan
Rwanda, 3, 3, 1, 3
S. Africa, nan, nan, nan, nan
Senegal, nan, nan, nan, nan
Serbia, nan, nan, nan, nan
Montenegro, nan, nan, nan, nan
Sierra Leone, nan, nan, nan, nan
Slovakia, nan, nan, nan, nan
Slovenia, 1, 0, 1, 0
Solomon Is., nan, nan, nan, nan
Spain, nan, nan, nan, nan
Sri Lanka, nan, nan, nan, nan
Suriname, 1, 1, 1, 1
Swaziland, nan, nan, nan, nan
Taiwan, 2, 0, 1, 0
Tajikistan, 0, 0, 2, 1
Tanzania, nan, nan, nan, nan
Tanzania -Zanzibar, nan, nan, nan, nan
Thailand, nan, nan, nan, nan
Timor-Leste, nan, nan, nan, nan
Togo, nan, nan, nan, nan
Trinidad-Tobago, nan, nan, nan, nan
Tunisia, nan, nan, nan, nan
Turkey, 0, 0, 0, 1
USA, nan, nan, nan, nan
Uganda, nan, nan, nan, nan
Ukraine, nan, nan, nan, nan
Uruguay, 0, 1, 2, 1
Uzbekistan, nan, nan, nan, nan
Venezuela, nan, nan, nan, nan
Yemen, nan, nan, nan, nan
Zambia, 3, 1, 2, 1
Zimbabwe, 2, 3, 3, 3

In [42]:
trans_indicator_df = pd.DataFrame.from_csv('r111-r114_indicators_trans.csv')

In [55]:
print "r111r pct: {0}".format(len(trans_indicator_df[trans_indicator_df.r111r >= 2])/float(len(trans_indicator_df)))
print "r112r pct: {0}".format(len(trans_indicator_df[trans_indicator_df.r112r >= 2])/float(len(trans_indicator_df)))
print "r113r pct: {0}".format(len(trans_indicator_df[trans_indicator_df.r113r >= 2])/float(len(trans_indicator_df)))
print "r11rr pct: {0}".format(len(trans_indicator_df[trans_indicator_df.r114r >= 2])/float(len(trans_indicator_df)))


r111r pct: 0.585365853659
r112r pct: 0.292682926829
r113r pct: 0.439024390244
r11rr pct: 0.414634146341

In [63]:
all_indicator_df = pd.DataFrame.from_csv('r111-r114_indicators_all.csv')

In [64]:
print "r111r pct: {0}".format(len(all_indicator_df[all_indicator_df.r111r >= 2])/float(len(all_indicator_df)))
print "r112r pct: {0}".format(len(all_indicator_df[all_indicator_df.r112r >= 2])/float(len(all_indicator_df)))
print "r113r pct: {0}".format(len(all_indicator_df[all_indicator_df.r113r >= 2])/float(len(all_indicator_df)))
print "r11rr pct: {0}".format(len(all_indicator_df[all_indicator_df.r114r >= 2])/float(len(all_indicator_df)))


r111r pct: 0.220183486239
r112r pct: 0.110091743119
r113r pct: 0.165137614679
r11rr pct: 0.155963302752

In [ ]: