HCDS_(Fall_2017)

A2 Assignments

A2: Bias in data

by Abhishek Anand

The goal for project is to explore the concept of 'bias' through data on Wikipedia articles .

Step 1 : Getting the article and population data

Source :

* Wikipedia Data Set : Politicians by Country from the English-language Wikipedia  [https://figshare.com/articles/Untitled_Item/5513449] : File Name  - page_data.csv  
    1. "country", containing the sanitised country name, extracted from the category name;
    2. "page", containing the unsanitised page title.
    3. "last_edit", containing the edit ID of the last edit to the page.
* Population Data : http://www.prb.org/DataFinder/Topic/Rankings.aspx?ind=14   File Name  - opulation Mid-2015.csv
* Article quality predictions : https://ores.wikimedia.org/v3/#!/scoring/get_v3_scores_context_revid_model



In [2]:

    
## getting the data from the CSV files
import csv



In [3]:

    
# Data Source 1 : 
# reading data and saving in dictionary of dictionaries
'''
{
    "revision_id": {
        "country": "",
        "article_name": "",
        "revision_id":"",
    },
    "revision_id": {
    
    },
    ...
}
'''

page_data = dict()
skip_lines = 1

with open('page_data.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        #page_data.append([row[0],row[1],row[2]])
        #['Template:ZambiaProvincialMinisters', 'Zambia', '235107991']
        if(skip_lines!=1):
            revision_id = row[2]
            page_data[revision_id]=dict()
            page_data[revision_id]["country"] = row[1]
            page_data[revision_id]["article_name"] = row[0]
            page_data[revision_id]["revision_id"] = row[2] 
            
        skip_lines = skip_lines+1
# print(page_data)



In [4]:

    
# Data Source 2 : 
'''
{
    "country_name_1":"population",
    "country_name_1":"population"
}

'''
population_data = {}

# skip twolines from input csv file 
skip_lines = 1
with open('Population Mid-2015.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        # last line in the raw data file is an empty line
        # skip_lines > 2 skips first two lines
        # row checks if the read list is empty
        #print(row)
        if(skip_lines>3 and row):
            # ['Afghanistan', 'Country', 'Mid-2015', 'Number', '32,247,000', '']
            population_data[row[0]] = row[4]
        skip_lines = skip_lines + 1
        
print(population_data)









    



{'Afghanistan': '32,247,000', 'Albania': '2,892,000', 'Algeria': '39,948,000', 'Andorra': '78,000', 'Angola': '25,000,000', 'Antigua and Barbuda': '90,000', 'Argentina': '42,426,000', 'Armenia': '3,017,106', 'Australia': '23,888,000', 'Austria': '8,615,955', 'Azerbaijan': '9,651,000', 'Bahamas': '377,000', 'Bahrain': '1,412,299', 'Bangladesh': '160,411,000', 'Barbados': '278,000', 'Belarus': '9,524,247', 'Belgium': '11,211,064', 'Belize': '368,000', 'Benin': '10,583,034', 'Bhutan': '757,000', 'Bolivia': '10,475,500', 'Bosnia-Herzegovina': '3,650,114', 'Botswana': '2,139,900', 'Brazil': '204,519,398', 'Brunei': '413,000', 'Bulgaria': '7,181,000', 'Burkina Faso': '18,450,400', 'Burundi': '10,742,000', 'Cambodia': '15,417,100', 'Cameroon': '23,739,000', 'Canada': '35,833,000', 'Cape Verde': '514,000', 'Central African Republic': '5,551,900', 'Chad': '13,707,000', 'Channel Islands': '164,000', 'Chile': '18,025,000', 'China': '1,371,920,000', 'Colombia': '48,218,000', 'Comoros': '764,000', 'Congo': '4,755,000', 'Congo, Dem. Rep. of': '73,340,200', 'Costa Rica': '4,832,000', "Cote d'Ivoire": '23,281,300', 'Croatia': '4,215,000', 'Cuba': '11,139,000', 'Curacao': '158,000', 'Cyprus': '1,153,000', 'Czech Republic': '10,551,227', 'Denmark': '5,676,025', 'Djibouti': '900,000', 'Dominica': '68,000', 'Dominican Republic': '10,508,000', 'Ecuador': '16,279,000', 'Egypt': '89,074,000', 'El Salvador': '6,366,000', 'Equatorial Guinea': '805,000', 'Eritrea': '5,200,000', 'Estonia': '1,310,504', 'Ethiopia': '98,148,000', 'Federated States of Micronesia': '103,000', 'Fiji': '867,000', 'Finland': '5,476,031', 'France': '64,346,720', 'French Guiana': '251,000', 'French Polynesia': '263,000', 'Gabon': '1,751,000', 'Gambia': '2,021,893', 'Georgia': '3,804,000', 'Germany': '81,132,000', 'Ghana': '27,672,800', 'Greece': '11,520,785', 'Grenada': '111,000', 'Guadeloupe': '407,000', 'Guam': '184,200', 'Guatemala': '16,183,752', 'Guinea': '10,985,600', 'Guinea-Bissau': '1,788,000', 'Guyana': '743,000', 'Haiti': '10,924,000', 'Honduras': '8,340,000', 'Hong Kong, SAR': '7,286,402', 'Hungary': '9,835,030', 'Iceland': '330,828', 'India': '1,314,097,616', 'Indonesia': '255,741,973', 'Iran': '78,483,446', 'Iraq': '37,056,000', 'Ireland': '4,630,308', 'Israel': '8,375,384', 'Italy': '62,466,780', 'Jamaica': '2,727,000', 'Japan': '126,866,820', 'Jordan': '8,118,000', 'Kazakhstan': '17,544,274', 'Kenya': '44,306,000', 'Kiribati': '113,400', 'Korea, North': '24,983,000', 'Korea, South': '50,713,867', 'Kosovo': '1,802,000', 'Kuwait': '3,837,700', 'Kyrgyzstan': '5,951,000', 'Laos': '6,903,049', 'Latvia': '1,978,454', 'Lebanon': '6,185,000', 'Lesotho': '1,924,381', 'Liberia': '4,503,000', 'Libya': '6,317,000', 'Liechtenstein': '37,570', 'Lithuania': '2,911,203', 'Luxembourg': '569,202', 'Macao, SAR': '658,611', 'Macedonia': '2,070,100', 'Madagascar': '23,047,400', 'Malawi': '17,174,000', 'Malaysia': '30,788,840', 'Maldives': '346,946', 'Mali': '16,749,000', 'Malta': '431,486', 'Marshall Islands': '55,000', 'Martinique': '379,000', 'Mauritania': '3,641,288', 'Mauritius': '1,262,660', 'Mayotte': '229,890', 'Mexico': '127,017,000', 'Moldova': '4,109,000', 'Monaco': '38,088', 'Mongolia': '3,029,335', 'Montenegro': '622,421', 'Morocco': '34,121,000', 'Mozambique': '25,736,000', 'Myanmar': '52,147,000', 'Namibia': '2,482,100', 'Nauru': '10,860', 'Nepal': '28,039,000', 'Netherlands': '16,942,373', 'New Caledonia': '271,974', 'New Zealand': '4,598,066', 'Nicaragua': '6,262,000', 'Niger': '18,884,462', 'Nigeria': '181,839,400', 'Norway': '5,194,411', 'Oman': '4,201,000', 'Pakistan': '199,047,300', 'Palau': '18,000', 'Palestinian Territory': '4,481,195', 'Panama': '3,980,000', 'Papua New Guinea': '7,744,600', 'Paraguay': '7,020,000', 'Peru': '31,151,643', 'Philippines': '102,965,300', 'Poland': '38,478,001', 'Portugal': '10,349,000', 'Puerto Rico': '3,502,000', 'Qatar': '2,394,524', 'Reunion': '851,060', 'Romania': '19,838,662', 'Russia': '144,302,000', 'Rwanda': '11,331,300', 'Samoa': '194,210', 'San Marino': '33,000', 'Sao Tome and Principe': '195,570', 'Saudi Arabia': '31,565,109', 'Senegal': '14,690,400', 'Serbia': '7,097,190', 'Seychelles': '92,833', 'Sierra Leone': '6,502,960', 'Singapore': '5,541,121', 'Slovakia': '5,424,051', 'Slovenia': '2,064,000', 'Solomon Islands': '641,900', 'Somalia': '11,123,000', 'South Africa': '55,041,000', 'South Sudan': '12,152,000', 'Spain': '46,368,000', 'Sri Lanka': '20,868,800', 'St. Kitts-Nevis': '46,000', 'St. Lucia': '175,000', 'St. Vincent & the Grenadines': '110,000', 'Sudan': '40,883,900', 'Suriname': '576,000', 'Swaziland': '1,286,000', 'Sweden': '9,804,792', 'Switzerland': '8,292,851', 'Syria': '17,065,000', 'Taiwan': '23,468,000', 'Tajikistan': '8,452,153', 'Tanzania': '52,291,000', 'Thailand': '65,121,250', 'Timor-Leste': '1,244,759', 'Togo': '7,231,000', 'Tonga': '103,300', 'Trinidad and Tobago': '1,351,000', 'Tunisia': '11,026,000', 'Turkey': '78,215,000', 'Turkmenistan': '5,373,000', 'Tuvalu': '11,800', 'Uganda': '40,141,000', 'Ukraine': '42,828,300', 'United Arab Emirates': '9,577,000', 'United Kingdom': '65,092,000', 'United States': '321,234,172', 'Uruguay': '3,562,000', 'Uzbekistan': '31,290,791', 'Vanuatu': '277,500', 'Venezuela': '30,620,000', 'Vietnam': '91,714,080', 'Western Sahara': '604,000', 'Yemen': '26,737,000', 'Zambia': '15,473,900', 'Zimbabwe': '17,354,000'}



In [5]:

    
# Merging data set from wikipedia data(page_data) and population data(population_data)
# reading data from page_data and including poulation data
'''
{
    "revision_id": {
        "country": "",
        "article_name": "",
        "revision_id":"",
        "population":""
    },
    "revision_id": {
    
    },
    ...
}
'''
count=0
for key,value in page_data.items():
    revision_id = key
    #print(key)
    
    country_page_data = value["country"]
    #print(country_page_data)
    
    if population_data.get(str(country_page_data)) is not None:
        page_data[revision_id]["population"] = population_data[country_page_data]
    else:
        # for now setting population as 0 for countries which are not preset in population_data
        page_data[revision_id]["population"] = 0
        count = count+1
        
#print("no of entries which have popluation 0")      
#print(count)

Getting article quality predictions



In [6]:

    
# Data Source 3 : 
import requests
import json

headers = {'User-Agent' : 'https://github.com/abhishekanand', 'From' : 'anand1@uw.edu'}

def get_ores_data(revision_ids, headers):
    
    # Define the endpoint
    endpoint = 'https://ores.wikimedia.org/v3/scores/{project}/?models={model}&revids={revids}'
    
    # Specify the parameters - smushing all the revision IDs together separated by | marks.
    # Yes, 'smush' is a technical term, trust me I'm a scientist.
    # What do you mean "but people trusting scientists regularly goes horribly wrong" who taught you tha- oh.  
    params = {'project' : 'enwiki',
              'model'   : 'wp10',
              'revids'  : '|'.join(str(x) for x in revision_ids)
              }
    api_call = requests.get(endpoint.format(**params))
    response = api_call.json()
    json.dumps(response, indent=4, sort_keys=True)
    #print(response)
    return response


# So if we grab some example revision IDs and turn them into a list and then call get_ores_data...
#example_ids = [783381498, 807355596, 757539710]
#get_ores_data(example_ids, headers)


#get_ores_data(example_ids, headers)
no_revision_ids = len(page_data)
#print(no_revision_ids)

# getting all the revision ids from page_data in list format
revision_ids = list(page_data.keys())


counter = 0
call_count = 0

# this contains revsion id as key and article quality as value
data_revision_quality  = {}



# prediction is being added in page_data
'''
{
    "revision_id": {
        "country": "",
        "article_name": "",
        "revision_id":"",
        "population":"",
        "article_quality":""
    },
    "revision_id": {
    
    },
    ...
}
'''

while(counter<no_revision_ids):
    temp = get_ores_data(revision_ids[counter:counter+100], headers)
    hundred_revisions = temp["enwiki"]["scores"]
    
    for key, value in hundred_revisions.items():
        revision = key
        if value["wp10"].get("score") is not None:
            prediction = value["wp10"]["score"]["prediction"]
            page_data[revision]["article_quality"] = prediction
            #print(page_data[revision])
        else:
            page_data[revision]["article_quality"] = 'NA'
        
        
    counter = counter + 100
    call_count = call_count + 1
    
    
if(counter>no_revision_ids):
    temp = get_ores_data(revision_ids[counter-100:(counter-100)+no_revision_ids%100], headers)
    left_revisions = temp["enwiki"]["scores"]
    
    counter = counter-100
    call_count = call_count + 1
    
    for key, value in left_revisions.items():
        revision = key
        counter = counter + 1
        if value["wp10"].get("score") is not None:
            prediction = value["wp10"]["score"]["prediction"]
            page_data[revision]["article_quality"] = prediction
            #print(page_data[revision])



In [7]:

    
#print(counter)  # Numbe of entries received from the API Call 
#print(call_count)
#print(no_revision_ids%100)



In [8]:

    
# Cleaning Data Set to contain following values only 
    #country
    #article_name
    #revision_id
    #article_quality
    #population

    
combined_data = [] # Empty List 
for key, value in page_data.items():
    combined_data.append(value)
    
'''
printing first 100 revision ids because of 
"OPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`."
'''
for i in range(100):    
    print(combined_data[i])









    



{'country': 'Zambia', 'article_name': 'Template:ZambiaProvincialMinisters', 'revision_id': '235107991', 'population': '15,473,900', 'article_quality': 'Stub'}
{'country': 'Chad', 'article_name': 'Bir I of Kanem', 'revision_id': '355319463', 'population': '13,707,000', 'article_quality': 'Stub'}
{'country': 'Zimbabwe', 'article_name': 'Template:Zimbabwe-politician-stub', 'revision_id': '391862046', 'population': '17,354,000', 'article_quality': 'Stub'}
{'country': 'Uganda', 'article_name': 'Template:Uganda-politician-stub', 'revision_id': '391862070', 'population': '40,141,000', 'article_quality': 'Stub'}
{'country': 'Namibia', 'article_name': 'Template:Namibia-politician-stub', 'revision_id': '391862409', 'population': '2,482,100', 'article_quality': 'Stub'}
{'country': 'Nigeria', 'article_name': 'Template:Nigeria-politician-stub', 'revision_id': '391862819', 'population': '181,839,400', 'article_quality': 'Stub'}
{'country': 'Colombia', 'article_name': 'Template:Colombia-politician-stub', 'revision_id': '391863340', 'population': '48,218,000', 'article_quality': 'Stub'}
{'country': 'Chile', 'article_name': 'Template:Chile-politician-stub', 'revision_id': '391863361', 'population': '18,025,000', 'article_quality': 'Stub'}
{'country': 'Fiji', 'article_name': 'Template:Fiji-politician-stub', 'revision_id': '391863617', 'population': '867,000', 'article_quality': 'Stub'}
{'country': 'Solomon Islands', 'article_name': 'Template:Solomons-politician-stub', 'revision_id': '391863809', 'population': '641,900', 'article_quality': 'Stub'}
{'country': 'Palestinian Territory', 'article_name': 'Information Minister of the Palestinian National Authority', 'revision_id': '393276188', 'population': '4,481,195', 'article_quality': 'Stub'}
{'country': 'Somalia', 'article_name': 'Template:Somalia-politician-stub', 'revision_id': '393298432', 'population': '11,123,000', 'article_quality': 'Stub'}
{'country': 'Cambodia', 'article_name': 'Yos Por', 'revision_id': '393822005', 'population': '15,417,100', 'article_quality': 'Stub'}
{'country': 'Slovakia', 'article_name': 'Template:Slovakia-politician-stub', 'revision_id': '394482629', 'population': '5,424,051', 'article_quality': 'Stub'}
{'country': 'Slovenia', 'article_name': 'Template:Slovenia-politician-stub', 'revision_id': '394482891', 'population': '2,064,000', 'article_quality': 'Stub'}
{'country': 'Afghanistan', 'article_name': 'Template:Afghanistan-politician-stub', 'revision_id': '394580295', 'population': '32,247,000', 'article_quality': 'Stub'}
{'country': 'Iraq', 'article_name': 'Template:Iraq-politician-stub', 'revision_id': '394580630', 'population': '37,056,000', 'article_quality': 'Stub'}
{'country': 'Nepal', 'article_name': 'Template:Nepal-politician-stub', 'revision_id': '394580939', 'population': '28,039,000', 'article_quality': 'Stub'}
{'country': 'Sri Lanka', 'article_name': 'Template:SriLanka-politician-stub', 'revision_id': '394580993', 'population': '20,868,800', 'article_quality': 'Stub'}
{'country': 'Laos', 'article_name': 'Template:Laos-politician-stub', 'revision_id': '394581284', 'population': '6,903,049', 'article_quality': 'Stub'}
{'country': 'Albania', 'article_name': 'Template:Albania-politician-stub', 'revision_id': '394581557', 'population': '2,892,000', 'article_quality': 'Stub'}
{'country': 'Costa Rica', 'article_name': 'Template:CostaRica-politician-stub', 'revision_id': '394587483', 'population': '4,832,000', 'article_quality': 'Stub'}
{'country': 'Hondura', 'article_name': 'Template:Honduras-politician-stub', 'revision_id': '394587547', 'population': 0, 'article_quality': 'Stub'}
{'country': 'Czech Republic', 'article_name': 'Julius Gregr', 'revision_id': '395521877', 'population': '10,551,227', 'article_quality': 'Stub'}
{'country': 'Czech Republic', 'article_name': 'Edvard Gregr', 'revision_id': '395526568', 'population': '10,551,227', 'article_quality': 'Stub'}
{'country': 'Canada', 'article_name': 'Robert Douglas Cook', 'revision_id': '401577829', 'population': '35,833,000', 'article_quality': 'Stub'}
{'country': 'Tunisia', 'article_name': 'Template:Tunisia-politician-stub', 'revision_id': '413885084', 'population': '11,026,000', 'article_quality': 'Stub'}
{'country': 'Guatemala', 'article_name': 'Template:Guatemala-politician-stub', 'revision_id': '433871129', 'population': '16,183,752', 'article_quality': 'Stub'}
{'country': 'Burkina Faso', 'article_name': 'Template:BurkinaFaso-politician-stub', 'revision_id': '433871165', 'population': '18,450,400', 'article_quality': 'Stub'}
{'country': 'Angola', 'article_name': 'Template:Angola-politician-stub', 'revision_id': '435008715', 'population': '25,000,000', 'article_quality': 'Stub'}
{'country': 'Panama', 'article_name': 'Template:Panama-politician-stub', 'revision_id': '437454659', 'population': '3,980,000', 'article_quality': 'Stub'}
{'country': 'Japan', 'article_name': 'Template:Japan-politician-1980s-stub', 'revision_id': '437735138', 'population': '126,866,820', 'article_quality': 'Stub'}
{'country': 'Indonesia', 'article_name': 'Template:Indonesia-politician-stub', 'revision_id': '438305657', 'population': '255,741,973', 'article_quality': 'Stub'}
{'country': 'Madagascar', 'article_name': 'Template:Madagascar-politician-stub', 'revision_id': '439671509', 'population': '23,047,400', 'article_quality': 'Stub'}
{'country': 'Malaysia', 'article_name': 'Template:Malaysia-politician-stub', 'revision_id': '439708117', 'population': '30,788,840', 'article_quality': 'Stub'}
{'country': 'Gabon', 'article_name': 'Template:Gabon-politician-stub', 'revision_id': '440397578', 'population': '1,751,000', 'article_quality': 'Stub'}
{'country': 'Germany', 'article_name': 'Template:Germany-DDP-politician-stub', 'revision_id': '440594068', 'population': '81,132,000', 'article_quality': 'Stub'}
{'country': 'Germany', 'article_name': 'Template:Germany-Centre-politician-stub', 'revision_id': '440598656', 'population': '81,132,000', 'article_quality': 'Stub'}
{'country': 'Liberia', 'article_name': 'Template:Liberia-politician-stub', 'revision_id': '441172886', 'population': '4,503,000', 'article_quality': 'Stub'}
{'country': 'Ghana', 'article_name': 'Template:Ghana-politician-stub', 'revision_id': '441186581', 'population': '27,672,800', 'article_quality': 'Stub'}
{'country': 'Peru', 'article_name': 'Template:Peru-politician-stub', 'revision_id': '441771813', 'population': '31,151,643', 'article_quality': 'Stub'}
{'country': 'Argentina', 'article_name': 'Template:Argentina-politician-stub', 'revision_id': '441995465', 'population': '42,426,000', 'article_quality': 'Stub'}
{'country': 'Spain', 'article_name': 'Template:Catalonia-politician-stub', 'revision_id': '442411422', 'population': '46,368,000', 'article_quality': 'Stub'}
{'country': 'South Africa', 'article_name': 'Template:SouthAfrica-politician-stub', 'revision_id': '442913438', 'population': '55,041,000', 'article_quality': 'Stub'}
{'country': 'Egypt', 'article_name': 'List of Grand Viziers of Egypt', 'revision_id': '442937236', 'population': '89,074,000', 'article_quality': 'Stub'}
{'country': 'Costa Rica', 'article_name': 'Template:CostaRica-mayor-stub', 'revision_id': '443468553', 'population': '4,832,000', 'article_quality': 'Stub'}
{'country': 'Hondura', 'article_name': 'Template:Honduras-mayor-stub', 'revision_id': '443469862', 'population': 0, 'article_quality': 'Stub'}
{'country': 'Nicaragua', 'article_name': 'Template:Nicaragua-mayor-stub', 'revision_id': '443470532', 'population': '6,262,000', 'article_quality': 'Stub'}
{'country': 'Afghanistan', 'article_name': 'Template:Afghanistan-mayor-stub', 'revision_id': '443496992', 'population': '32,247,000', 'article_quality': 'Stub'}
{'country': 'Bangladesh', 'article_name': 'Template:Bangladesh-mayor-stub', 'revision_id': '443497423', 'population': '160,411,000', 'article_quality': 'Stub'}
{'country': 'Cambodia', 'article_name': 'Template:Cambodia-mayor-stub', 'revision_id': '443497605', 'population': '15,417,100', 'article_quality': 'Stub'}
{'country': 'India', 'article_name': 'Template:India-mayor-stub', 'revision_id': '443498496', 'population': '1,314,097,616', 'article_quality': 'Stub'}
{'country': 'Iran', 'article_name': 'Template:Iran-mayor-stub', 'revision_id': '443499145', 'population': '78,483,446', 'article_quality': 'Stub'}
{'country': 'Iraq', 'article_name': 'Template:Iraq-mayor-stub', 'revision_id': '443499367', 'population': '37,056,000', 'article_quality': 'Stub'}
{'country': 'Argentina', 'article_name': 'Template:Argentina-mayor-stub', 'revision_id': '443521996', 'population': '42,426,000', 'article_quality': 'Stub'}
{'country': 'Philippines', 'article_name': 'Template:Philippines-mayor-stub', 'revision_id': '443628777', 'population': '102,965,300', 'article_quality': 'Stub'}
{'country': 'Sri Lanka', 'article_name': 'Template:SriLanka-mayor-stub', 'revision_id': '443632023', 'population': '20,868,800', 'article_quality': 'Stub'}
{'country': 'Turkey', 'article_name': 'Template:Turkey-mayor-stub', 'revision_id': '443633073', 'population': '78,215,000', 'article_quality': 'Stub'}
{'country': 'Albania', 'article_name': 'Template:Albania-mayor-stub', 'revision_id': '443634517', 'population': '2,892,000', 'article_quality': 'Stub'}
{'country': 'Austria', 'article_name': 'Template:Austria-mayor-stub', 'revision_id': '443640939', 'population': '8,615,955', 'article_quality': 'Stub'}
{'country': 'Azerbaijan', 'article_name': 'Template:Azerbaijan-mayor-stub', 'revision_id': '443641865', 'population': '9,651,000', 'article_quality': 'Stub'}
{'country': 'Haiti', 'article_name': 'Template:Haiti-politician-stub', 'revision_id': '444002354', 'population': '10,924,000', 'article_quality': 'Stub'}
{'country': 'Greece', 'article_name': 'Template:Greece-mayor-stub', 'revision_id': '444023126', 'population': '11,520,785', 'article_quality': 'Stub'}
{'country': 'Hungary', 'article_name': 'Template:Hungary-mayor-stub', 'revision_id': '444023605', 'population': '9,835,030', 'article_quality': 'Stub'}
{'country': 'Iceland', 'article_name': 'Template:Iceland-mayor-stub', 'revision_id': '444023818', 'population': '330,828', 'article_quality': 'Stub'}
{'country': 'Moldova', 'article_name': 'Template:Moldova-mayor-stub', 'revision_id': '444031013', 'population': '4,109,000', 'article_quality': 'Stub'}
{'country': 'Romania', 'article_name': 'Template:Romania-mayor-stub', 'revision_id': '444036980', 'population': '19,838,662', 'article_quality': 'Stub'}
{'country': 'Czech Republic', 'article_name': 'Template:CzechRepublic-mayor-stub', 'revision_id': '444371643', 'population': '10,551,227', 'article_quality': 'Stub'}
{'country': 'Poland', 'article_name': 'Template:Poland-mayor-stub', 'revision_id': '444373997', 'population': '38,478,001', 'article_quality': 'Stub'}
{'country': 'Luxembourg', 'article_name': 'Template:Luxembourg-politician-stub', 'revision_id': '444377000', 'population': '569,202', 'article_quality': 'Stub'}
{'country': 'Luxembourg', 'article_name': 'Template:Luxembourg-mayor-stub', 'revision_id': '444381930', 'population': '569,202', 'article_quality': 'Stub'}
{'country': 'Denmark', 'article_name': 'Template:Denmark-mayor-stub', 'revision_id': '444431830', 'population': '5,676,025', 'article_quality': 'Stub'}
{'country': 'Namibia', 'article_name': 'Template:Namibia-mayor-stub', 'revision_id': '444756008', 'population': '2,482,100', 'article_quality': 'Stub'}
{'country': 'South Africa', 'article_name': 'Template:SouthAfrica-mayor-stub', 'revision_id': '444756369', 'population': '55,041,000', 'article_quality': 'Stub'}
{'country': 'Burkina Faso', 'article_name': 'Template:BurkinaFaso-mayor-stub', 'revision_id': '444759010', 'population': '18,450,400', 'article_quality': 'Stub'}
{'country': 'Gabon', 'article_name': 'Template:Gabon-mayor-stub', 'revision_id': '444762949', 'population': '1,751,000', 'article_quality': 'Stub'}
{'country': 'Kenya', 'article_name': 'Template:Kenya-mayor-stub', 'revision_id': '444766243', 'population': '44,306,000', 'article_quality': 'Stub'}
{'country': 'Liberia', 'article_name': 'Template:Liberia-mayor-stub', 'revision_id': '444766443', 'population': '4,503,000', 'article_quality': 'Stub'}
{'country': 'Somalia', 'article_name': 'Template:Somalia-mayor-stub', 'revision_id': '444767261', 'population': '11,123,000', 'article_quality': 'Stub'}
{'country': 'Tunisia', 'article_name': 'Template:Tunisia-mayor-stub', 'revision_id': '444767517', 'population': '11,026,000', 'article_quality': 'Stub'}
{'country': 'Uganda', 'article_name': 'Template:Uganda-mayor-stub', 'revision_id': '444767727', 'population': '40,141,000', 'article_quality': 'Stub'}
{'country': 'Zimbabwe', 'article_name': 'Template:Zimbabwe-mayor-stub', 'revision_id': '444767985', 'population': '17,354,000', 'article_quality': 'Stub'}
{'country': 'Ecuador', 'article_name': 'Template:Ecuador-mayor-stub', 'revision_id': '445199467', 'population': '16,279,000', 'article_quality': 'Stub'}
{'country': 'Ecuador', 'article_name': 'Template:Ecuador-politician-stub', 'revision_id': '445225417', 'population': '16,279,000', 'article_quality': 'Stub'}
{'country': 'Cambodia', 'article_name': 'Template:Cambodia-politician-stub', 'revision_id': '445365862', 'population': '15,417,100', 'article_quality': 'Stub'}
{'country': 'Salvadoran', 'article_name': 'Template:ElSalvador-politician-stub', 'revision_id': '445403015', 'population': 0, 'article_quality': 'Stub'}
{'country': 'Finland', 'article_name': 'Template:Finland-mayor-stub', 'revision_id': '445676138', 'population': '5,476,031', 'article_quality': 'Stub'}
{'country': 'Portugal', 'article_name': 'Template:Portugal-mayor-stub', 'revision_id': '446008048', 'population': '10,349,000', 'article_quality': 'Stub'}
{'country': 'Switzerland', 'article_name': 'Template:Switzerland-mayor-stub', 'revision_id': '446011535', 'population': '8,292,851', 'article_quality': 'Stub'}
{'country': 'Sweden', 'article_name': 'Template:Sweden-mayor-stub', 'revision_id': '446019004', 'population': '9,804,792', 'article_quality': 'Stub'}
{'country': 'Belgium', 'article_name': 'Template:Belgium-mayor-stub', 'revision_id': '446192353', 'population': '11,211,064', 'article_quality': 'Stub'}
{'country': 'Spain', 'article_name': 'Template:Spain-mayor-stub', 'revision_id': '446194245', 'population': '46,368,000', 'article_quality': 'Stub'}
{'country': 'Italy', 'article_name': 'Template:Italy-mayor-stub', 'revision_id': '446196171', 'population': '62,466,780', 'article_quality': 'Stub'}
{'country': 'Mexico', 'article_name': 'Template:Mexico-mayor-stub', 'revision_id': '446200358', 'population': '127,017,000', 'article_quality': 'Stub'}
{'country': 'Chile', 'article_name': 'Template:Chile-mayor-stub', 'revision_id': '446202781', 'population': '18,025,000', 'article_quality': 'Stub'}
{'country': 'Peru', 'article_name': 'Template:Peru-mayor-stub', 'revision_id': '446205420', 'population': '31,151,643', 'article_quality': 'Stub'}
{'country': 'Bolivia', 'article_name': 'Template:Bolivia-mayor-stub', 'revision_id': '446207794', 'population': '10,475,500', 'article_quality': 'Stub'}
{'country': 'Colombia', 'article_name': 'Template:Colombia-mayor-stub', 'revision_id': '446214240', 'population': '48,218,000', 'article_quality': 'Stub'}
{'country': 'Bulgaria', 'article_name': 'Template:Bulgaria-mayor-stub', 'revision_id': '446219671', 'population': '7,181,000', 'article_quality': 'Stub'}
{'country': 'Serbia', 'article_name': 'Template:Serbia-mayor-stub', 'revision_id': '446221243', 'population': '7,097,190', 'article_quality': 'Stub'}



In [9]:

    
# The final data file should be named: en-wikipedia_traffic_200801-201709.csv
import csv
CSVOut = "bias_in_data.csv"
with open(CSVOut, "w",encoding="utf-8") as ofile:
    ofile.write("country" + "," + "article_name" + "," + "revision_id"+"," + "article_quality"+"," + "population"+ "\n")
    for item in combined_data:
        #print(item["article_quality"])
        #print(str(item["population"]).replace(',',''))
        ofile.write(str(item["country"]).replace(',','') + "," + str(item["article_name"]).replace(',','') + "," + str(item["revision_id"])+"," + str(item["article_quality"])+"," + str(item["population"]).replace(',','') + "\n")
print("Done")









    



Done

Analysis

FA - Featured article GA - Good article B - B-class article C - C-class article Start - Start-class article Stub - Stub-class article

if a country has a population of 10,000 people, and you found 10 articles about politicians from that country, then the percentage of articles-per-population would be .1%.



In [10]:

    
# Creating dataset Country and Article Count  
'''
{
    "country_name_1":"Count of Articles",
    "country_name_1":"Count of Articles"
}

'''

country_articleCount = {}

for item in combined_data:
    country_name = str(item["country"]).replace(',','')
    if (country_name in country_articleCount.keys()):
        if(item["article_name"]) is not None:
            country_articleCount[country_name]=country_articleCount[country_name]+1
    else:
        country_articleCount[country_name]=0
        if(item["article_name"]) is not None:
            country_articleCount[country_name]=1
        
print(country_articleCount)









    



{'Zambia': 26, 'Chad': 100, 'Zimbabwe': 167, 'Uganda': 188, 'Namibia': 165, 'Nigeria': 684, 'Colombia': 288, 'Chile': 352, 'Fiji': 199, 'Solomon Islands': 98, 'Palestinian Territory': 183, 'Somalia': 339, 'Cambodia': 217, 'Slovakia': 119, 'Slovenia': 59, 'Afghanistan': 327, 'Iraq': 302, 'Nepal': 363, 'Sri Lanka': 465, 'Laos': 109, 'Albania': 460, 'Costa Rica': 150, 'Hondura': 189, 'Czech Republic': 254, 'Canada': 852, 'Tunisia': 140, 'Guatemala': 84, 'Burkina Faso': 97, 'Angola': 110, 'Panama': 109, 'Japan': 441, 'Indonesia': 215, 'Madagascar': 240, 'Malaysia': 391, 'Gabon': 103, 'Germany': 703, 'Liberia': 158, 'Ghana': 395, 'Peru': 354, 'Argentina': 496, 'Spain': 881, 'South Africa': 382, 'Egypt': 239, 'Nicaragua': 116, 'Bangladesh': 324, 'India': 990, 'Iran': 832, 'Philippines': 515, 'Turkey': 353, 'Austria': 340, 'Azerbaijan': 182, 'Haiti': 166, 'Greece': 311, 'Hungary': 614, 'Iceland': 206, 'Moldova': 426, 'Romania': 348, 'Poland': 809, 'Luxembourg': 180, 'Denmark': 291, 'Kenya': 379, 'Ecuador': 187, 'Salvadoran': 119, 'Finland': 572, 'Portugal': 323, 'Switzerland': 407, 'Sweden': 380, 'Belgium': 523, 'Italy': 828, 'Mexico': 1081, 'Bolivia': 187, 'Bulgaria': 226, 'Serbia': 220, 'Russia': 882, 'Tanzania': 408, 'Sierra Leone': 166, 'Pakistan': 1045, 'Croatia': 168, 'Ukraine': 304, 'South Sudan': 133, 'United States': 1098, 'Yemen': 122, 'China': 1138, 'New Zealand': 791, 'Venezuela': 135, 'Australia': 1566, 'Estonia': 153, 'Lebanon': 188, 'Armenia': 199, 'Taiwan': 503, 'Cuba': 176, 'Lithuania': 248, 'Malawi': 122, 'Saint Kitts and Nevis': 32, 'Vietnam': 191, 'France': 1689, 'Norway': 658, 'Ireland': 381, 'Israel': 498, 'Palauan': 23, 'Jamaica': 85, 'Kyrgyzstan': 72, 'San Marino': 82, 'Bosnia-Herzegovina': 178, 'Turkmenistan': 33, 'Algeria': 119, 'French Guiana': 28, 'Djibouti': 39, 'Vanuatu': 62, 'Ivorian': 79, 'Netherlands': 702, 'Libya': 111, 'Malta': 103, 'Paraguay': 149, 'Saint Vincent and the Grenadines': 22, 'Rhodesian': 76, 'Omani': 25, 'Papua New Guinea': 163, 'Congo Dem. Rep. of': 142, 'Togo': 65, 'United Arab Emirates': 60, 'Ethiopia': 105, 'Tuvalu': 55, 'Niuean': 23, 'Antigua and Barbuda': 25, 'Uruguay': 290, 'Senegal': 43, 'Brazil': 556, 'Korea North': 39, 'United Kingdom': 867, 'Botswana': 68, 'Qatar': 51, 'Rwanda': 105, 'Nauru': 53, 'Sudan': 98, 'East Timorese': 36, 'Korea South': 282, 'Mozambique': 60, 'Saudi Arabia': 119, 'Faroese': 74, 'Cape Colony': 82, 'Benin': 94, 'Syria': 132, 'Tajikistan': 40, 'Mauritius': 68, 'Jordan': 48, 'South Korean': 99, 'Monaco': 40, 'Morocco': 208, 'Grenada': 36, 'Samoan': 77, 'Mali': 116, 'Tonga': 63, 'Myanmar': 237, 'Congo': 149, 'Montserratian': 27, 'Guyana': 20, 'Liechtenstein': 29, 'Sao Tome and Principe': 22, 'Guinea-Bissau': 21, 'Belarus': 72, 'Singapore': 69, 'Guinea': 89, 'Pitcairn Islands': 43, 'Marshall Islands': 37, 'Dominican Republic': 64, 'Maldives': 84, 'Kazakhstan': 79, 'Macedonia': 65, 'Kiribati': 32, 'Mongolia': 92, 'Abkhazia': 16, 'Montenegro': 74, 'Bhutan': 33, 'Thailand': 112, 'Latvia': 56, 'Suriname': 40, 'Niger': 80, 'Martinique': 34, 'Mauritania': 52, 'Carniolan': 22, 'Cameroon': 106, 'Lesotho': 30, 'Saint Lucian': 48, 'South African Republic': 15, 'Cyprus': 102, 'Gambia': 82, 'Incan': 7, 'Uzbekistan': 29, 'Bahrain': 42, 'Chechen': 38, 'Eritrea': 16, 'Jersey': 61, 'Kuwait': 37, 'Burundi': 76, 'Guernsey': 25, 'Central African Republic': 68, 'Equatorial Guinea': 32, 'Guadeloupe': 49, 'Kosovo': 48, 'Cape Verde': 37, 'Andorra': 34, 'Comoros': 51, 'South Ossetian': 18, 'Cook Island': 67, 'Trinidad and Tobago': 28, 'Federated States of Micronesia': 38, 'Dominica': 12, 'Tokelauan': 18, 'Bahamas': 20, 'Swaziland': 32, 'Dagestani': 7, 'Greenlandic': 13, 'Barbados': 14, 'Belize': 16, 'Ossetian': 9, 'Seychelles': 22, 'Somaliland': 5, 'Rojava': 3}



In [11]:

    
# Population Data 
# print(population_data)
# Creating dataste containining Country and Population 
'''
{
    "country_name_1":"Population",
    "country_name_1":"Population"
}

'''

country_population= {}

for key, value in population_data.items():
    countryName = str(key).replace(',','')
    country_population[countryName] = str(population_data[key]).replace(',','')
        
print(country_population)









    



{'Afghanistan': '32247000', 'Albania': '2892000', 'Algeria': '39948000', 'Andorra': '78000', 'Angola': '25000000', 'Antigua and Barbuda': '90000', 'Argentina': '42426000', 'Armenia': '3017106', 'Australia': '23888000', 'Austria': '8615955', 'Azerbaijan': '9651000', 'Bahamas': '377000', 'Bahrain': '1412299', 'Bangladesh': '160411000', 'Barbados': '278000', 'Belarus': '9524247', 'Belgium': '11211064', 'Belize': '368000', 'Benin': '10583034', 'Bhutan': '757000', 'Bolivia': '10475500', 'Bosnia-Herzegovina': '3650114', 'Botswana': '2139900', 'Brazil': '204519398', 'Brunei': '413000', 'Bulgaria': '7181000', 'Burkina Faso': '18450400', 'Burundi': '10742000', 'Cambodia': '15417100', 'Cameroon': '23739000', 'Canada': '35833000', 'Cape Verde': '514000', 'Central African Republic': '5551900', 'Chad': '13707000', 'Channel Islands': '164000', 'Chile': '18025000', 'China': '1371920000', 'Colombia': '48218000', 'Comoros': '764000', 'Congo': '4755000', 'Congo Dem. Rep. of': '73340200', 'Costa Rica': '4832000', "Cote d'Ivoire": '23281300', 'Croatia': '4215000', 'Cuba': '11139000', 'Curacao': '158000', 'Cyprus': '1153000', 'Czech Republic': '10551227', 'Denmark': '5676025', 'Djibouti': '900000', 'Dominica': '68000', 'Dominican Republic': '10508000', 'Ecuador': '16279000', 'Egypt': '89074000', 'El Salvador': '6366000', 'Equatorial Guinea': '805000', 'Eritrea': '5200000', 'Estonia': '1310504', 'Ethiopia': '98148000', 'Federated States of Micronesia': '103000', 'Fiji': '867000', 'Finland': '5476031', 'France': '64346720', 'French Guiana': '251000', 'French Polynesia': '263000', 'Gabon': '1751000', 'Gambia': '2021893', 'Georgia': '3804000', 'Germany': '81132000', 'Ghana': '27672800', 'Greece': '11520785', 'Grenada': '111000', 'Guadeloupe': '407000', 'Guam': '184200', 'Guatemala': '16183752', 'Guinea': '10985600', 'Guinea-Bissau': '1788000', 'Guyana': '743000', 'Haiti': '10924000', 'Honduras': '8340000', 'Hong Kong SAR': '7286402', 'Hungary': '9835030', 'Iceland': '330828', 'India': '1314097616', 'Indonesia': '255741973', 'Iran': '78483446', 'Iraq': '37056000', 'Ireland': '4630308', 'Israel': '8375384', 'Italy': '62466780', 'Jamaica': '2727000', 'Japan': '126866820', 'Jordan': '8118000', 'Kazakhstan': '17544274', 'Kenya': '44306000', 'Kiribati': '113400', 'Korea North': '24983000', 'Korea South': '50713867', 'Kosovo': '1802000', 'Kuwait': '3837700', 'Kyrgyzstan': '5951000', 'Laos': '6903049', 'Latvia': '1978454', 'Lebanon': '6185000', 'Lesotho': '1924381', 'Liberia': '4503000', 'Libya': '6317000', 'Liechtenstein': '37570', 'Lithuania': '2911203', 'Luxembourg': '569202', 'Macao SAR': '658611', 'Macedonia': '2070100', 'Madagascar': '23047400', 'Malawi': '17174000', 'Malaysia': '30788840', 'Maldives': '346946', 'Mali': '16749000', 'Malta': '431486', 'Marshall Islands': '55000', 'Martinique': '379000', 'Mauritania': '3641288', 'Mauritius': '1262660', 'Mayotte': '229890', 'Mexico': '127017000', 'Moldova': '4109000', 'Monaco': '38088', 'Mongolia': '3029335', 'Montenegro': '622421', 'Morocco': '34121000', 'Mozambique': '25736000', 'Myanmar': '52147000', 'Namibia': '2482100', 'Nauru': '10860', 'Nepal': '28039000', 'Netherlands': '16942373', 'New Caledonia': '271974', 'New Zealand': '4598066', 'Nicaragua': '6262000', 'Niger': '18884462', 'Nigeria': '181839400', 'Norway': '5194411', 'Oman': '4201000', 'Pakistan': '199047300', 'Palau': '18000', 'Palestinian Territory': '4481195', 'Panama': '3980000', 'Papua New Guinea': '7744600', 'Paraguay': '7020000', 'Peru': '31151643', 'Philippines': '102965300', 'Poland': '38478001', 'Portugal': '10349000', 'Puerto Rico': '3502000', 'Qatar': '2394524', 'Reunion': '851060', 'Romania': '19838662', 'Russia': '144302000', 'Rwanda': '11331300', 'Samoa': '194210', 'San Marino': '33000', 'Sao Tome and Principe': '195570', 'Saudi Arabia': '31565109', 'Senegal': '14690400', 'Serbia': '7097190', 'Seychelles': '92833', 'Sierra Leone': '6502960', 'Singapore': '5541121', 'Slovakia': '5424051', 'Slovenia': '2064000', 'Solomon Islands': '641900', 'Somalia': '11123000', 'South Africa': '55041000', 'South Sudan': '12152000', 'Spain': '46368000', 'Sri Lanka': '20868800', 'St. Kitts-Nevis': '46000', 'St. Lucia': '175000', 'St. Vincent & the Grenadines': '110000', 'Sudan': '40883900', 'Suriname': '576000', 'Swaziland': '1286000', 'Sweden': '9804792', 'Switzerland': '8292851', 'Syria': '17065000', 'Taiwan': '23468000', 'Tajikistan': '8452153', 'Tanzania': '52291000', 'Thailand': '65121250', 'Timor-Leste': '1244759', 'Togo': '7231000', 'Tonga': '103300', 'Trinidad and Tobago': '1351000', 'Tunisia': '11026000', 'Turkey': '78215000', 'Turkmenistan': '5373000', 'Tuvalu': '11800', 'Uganda': '40141000', 'Ukraine': '42828300', 'United Arab Emirates': '9577000', 'United Kingdom': '65092000', 'United States': '321234172', 'Uruguay': '3562000', 'Uzbekistan': '31290791', 'Vanuatu': '277500', 'Venezuela': '30620000', 'Vietnam': '91714080', 'Western Sahara': '604000', 'Yemen': '26737000', 'Zambia': '15473900', 'Zimbabwe': '17354000'}



In [12]:

    
# Creating Country : Proportion  (Number of Article for politicians from a country /Country's Population )
# country_articleCount 
# country_population

'''
{
    "country_name_1":"Proportion",
    "country_name_2":"Proportion"
}

'''

article_proportion  = {}

for key, value in country_articleCount.items():
    if (key in country_population.keys()):
        article_proportion[key] = (int(country_articleCount[key])/int(country_population[key])*100)
    
print (article_proportion)









    



{'Zambia': 0.00016802486768041668, 'Chad': 0.0007295542423579193, 'Zimbabwe': 0.0009623141638815259, 'Uganda': 0.0004683490695299071, 'Namibia': 0.006647596793038153, 'Nigeria': 0.00037615610258282857, 'Colombia': 0.000597287320087934, 'Chile': 0.0019528432732316228, 'Fiji': 0.02295271049596309, 'Solomon Islands': 0.015267175572519085, 'Palestinian Territory': 0.004083732129487782, 'Somalia': 0.003047738919356289, 'Cambodia': 0.0014075280046182486, 'Slovakia': 0.002193932173572852, 'Slovenia': 0.0028585271317829457, 'Afghanistan': 0.0010140478184017118, 'Iraq': 0.0008149827288428326, 'Nepal': 0.0012946253432718714, 'Sri Lanka': 0.0022282067009123667, 'Laos': 0.001579012404518641, 'Albania': 0.015905947441217153, 'Costa Rica': 0.003104304635761589, 'Czech Republic': 0.00240730296106794, 'Canada': 0.0023776965367119695, 'Tunisia': 0.001269726101940867, 'Guatemala': 0.0005190390955076425, 'Burkina Faso': 0.0005257338594285219, 'Angola': 0.00044, 'Panama': 0.0027386934673366836, 'Japan': 0.0003476086182344604, 'Indonesia': 8.406910976635032e-05, 'Madagascar': 0.0010413322110086171, 'Malaysia': 0.0012699406668130401, 'Gabon': 0.0058823529411764705, 'Germany': 0.000866489178129468, 'Liberia': 0.003508771929824561, 'Ghana': 0.001427394408950305, 'Peru': 0.0011363766591701119, 'Argentina': 0.0011690944232310375, 'Spain': 0.0019000172532781228, 'South Africa': 0.0006940280881524681, 'Egypt': 0.0002683162314480095, 'Nicaragua': 0.0018524433088470138, 'Bangladesh': 0.00020198116089295621, 'India': 7.533686903819783e-05, 'Iran': 0.0010600961634635666, 'Philippines': 0.0005001685033695819, 'Turkey': 0.0004513200792686825, 'Austria': 0.003946167314012202, 'Azerbaijan': 0.0018858149414568437, 'Haiti': 0.0015195898938117906, 'Greece': 0.002699468829597983, 'Hungary': 0.006242990616195375, 'Iceland': 0.06226800633561851, 'Moldova': 0.010367486006327574, 'Romania': 0.0017541505571293066, 'Poland': 0.0021025000753027686, 'Luxembourg': 0.0316232198762478, 'Denmark': 0.0051268273131284655, 'Kenya': 0.0008554146165304924, 'Ecuador': 0.0011487192087966092, 'Finland': 0.01044552158305897, 'Portugal': 0.0031210744999516865, 'Switzerland': 0.004907841706067069, 'Sweden': 0.00387565590376624, 'Belgium': 0.004665034469520467, 'Italy': 0.0013255045321689384, 'Mexico': 0.0008510671799837816, 'Bolivia': 0.0017851176554818384, 'Bulgaria': 0.003147193984124774, 'Serbia': 0.003099818378823168, 'Russia': 0.000611218139734723, 'Tanzania': 0.0007802489912221989, 'Sierra Leone': 0.002552683700960793, 'Pakistan': 0.0005250008415085258, 'Croatia': 0.00398576512455516, 'Ukraine': 0.0007098110361606695, 'South Sudan': 0.0010944700460829493, 'United States': 0.0003418067240990787, 'Yemen': 0.00045629651793394917, 'China': 8.294944311621669e-05, 'New Zealand': 0.017202884865071533, 'Venezuela': 0.00044088830829523186, 'Australia': 0.006555592766242465, 'Estonia': 0.011674897596649839, 'Lebanon': 0.003039611964430073, 'Armenia': 0.006595724512164968, 'Taiwan': 0.002143344128174536, 'Cuba': 0.001580034114372924, 'Lithuania': 0.008518815074043274, 'Malawi': 0.0007103761499941772, 'Vietnam': 0.00020825591882947525, 'France': 0.0026248424162101814, 'Norway': 0.01266746123862744, 'Ireland': 0.008228394309838568, 'Israel': 0.005945996028361207, 'Jamaica': 0.0031169783645031168, 'Kyrgyzstan': 0.0012098806923206186, 'San Marino': 0.24848484848484848, 'Bosnia-Herzegovina': 0.0048765600197692455, 'Turkmenistan': 0.0006141820212171971, 'Algeria': 0.0002978872534294583, 'French Guiana': 0.011155378486055776, 'Djibouti': 0.004333333333333333, 'Vanuatu': 0.022342342342342343, 'Netherlands': 0.004143457353937373, 'Libya': 0.0017571632103846762, 'Malta': 0.023870994655678282, 'Paraguay': 0.0021225071225071225, 'Papua New Guinea': 0.00210469230173282, 'Congo Dem. Rep. of': 0.00019361823392900483, 'Togo': 0.0008989074816761167, 'United Arab Emirates': 0.000626500991959904, 'Ethiopia': 0.00010698129355666953, 'Tuvalu': 0.46610169491525427, 'Antigua and Barbuda': 0.027777777777777776, 'Uruguay': 0.008141493542953397, 'Senegal': 0.0002927081631541687, 'Brazil': 0.00027185685340223815, 'Korea North': 0.00015610615218348477, 'United Kingdom': 0.001331960916856142, 'Botswana': 0.003177718584980607, 'Qatar': 0.0021298596297218155, 'Rwanda': 0.0009266368377856026, 'Nauru': 0.4880294659300184, 'Sudan': 0.0002397031594343984, 'Korea South': 0.0005560609290551636, 'Mozambique': 0.00023313646254274168, 'Saudi Arabia': 0.0003769985397484292, 'Benin': 0.0008882140981499257, 'Syria': 0.0007735130383826547, 'Tajikistan': 0.0004732521997649593, 'Mauritius': 0.005385456100612991, 'Jordan': 0.0005912786400591279, 'Monaco': 0.10501995379122034, 'Morocco': 0.0006095952639137189, 'Grenada': 0.032432432432432434, 'Mali': 0.0006925786614126216, 'Tonga': 0.06098741529525654, 'Myanmar': 0.00045448443822271653, 'Congo': 0.0031335436382754996, 'Guyana': 0.0026917900403768506, 'Liechtenstein': 0.07718924673941975, 'Sao Tome and Principe': 0.011249169095464539, 'Guinea-Bissau': 0.0011744966442953021, 'Belarus': 0.000755965274735105, 'Singapore': 0.00124523539550932, 'Guinea': 0.0008101514710166035, 'Marshall Islands': 0.06727272727272728, 'Dominican Republic': 0.0006090597639893414, 'Maldives': 0.02421126054198636, 'Kazakhstan': 0.0004502893650657759, 'Macedonia': 0.003139944930196609, 'Kiribati': 0.02821869488536155, 'Mongolia': 0.0030369701601176496, 'Montenegro': 0.011889059013111703, 'Bhutan': 0.004359313077939233, 'Thailand': 0.0001719868706451427, 'Latvia': 0.002830492900011827, 'Suriname': 0.006944444444444444, 'Niger': 0.0004236286953793018, 'Martinique': 0.008970976253298154, 'Mauritania': 0.0014280661128699514, 'Cameroon': 0.0004465225999410253, 'Lesotho': 0.0015589428496747785, 'Cyprus': 0.008846487424111016, 'Gambia': 0.004055605316403984, 'Uzbekistan': 9.267902495657588e-05, 'Bahrain': 0.002973874512408491, 'Eritrea': 0.0003076923076923077, 'Kuwait': 0.000964119133856216, 'Burundi': 0.0007075032582386893, 'Central African Republic': 0.0012248059222968713, 'Equatorial Guinea': 0.003975155279503106, 'Guadeloupe': 0.01203931203931204, 'Kosovo': 0.002663706992230855, 'Cape Verde': 0.0071984435797665365, 'Andorra': 0.04358974358974359, 'Comoros': 0.006675392670157068, 'Trinidad and Tobago': 0.002072538860103627, 'Federated States of Micronesia': 0.036893203883495145, 'Dominica': 0.01764705882352941, 'Bahamas': 0.005305039787798409, 'Swaziland': 0.002488335925349922, 'Barbados': 0.005035971223021583, 'Belize': 0.004347826086956522, 'Seychelles': 0.023698469294324214}

if a country has 10 articles about politicians, and 2 of them are FA or GA class articles, then the percentage of high-quality articles would be 20%.



In [13]:

    
# Creating Creating Country : Proportion    
# Combined Data 
# country_hqarticle
# Creating dataste containining Country and Count of High Quality  Articles  
'''
{
    "country_name_1":"Count High Quality Article",
    "country_name_1":"Count High Quality Article"
}

'''

country_hqarticle = {}

for item in combined_data:
    country_name = str(item["country"]).replace(',','')
    #print(country_name)
    if (country_name in country_hqarticle.keys()):
        if(str(item["article_quality"]) is 'FA' or str(item["article_quality"]) == 'GA'):
            country_hqarticle[country_name]=country_hqarticle[country_name]+1
    else:
        country_hqarticle[country_name]=0
        
print(country_hqarticle)









    



{'Zambia': 0, 'Chad': 1, 'Zimbabwe': 1, 'Uganda': 1, 'Namibia': 1, 'Nigeria': 4, 'Colombia': 3, 'Chile': 3, 'Fiji': 0, 'Solomon Islands': 0, 'Palestinian Territory': 10, 'Somalia': 8, 'Cambodia': 4, 'Slovakia': 2, 'Slovenia': 1, 'Afghanistan': 12, 'Iraq': 7, 'Nepal': 0, 'Sri Lanka': 8, 'Laos': 1, 'Albania': 5, 'Costa Rica': 0, 'Hondura': 0, 'Czech Republic': 1, 'Canada': 18, 'Tunisia': 1, 'Guatemala': 5, 'Burkina Faso': 2, 'Angola': 1, 'Panama': 4, 'Japan': 8, 'Indonesia': 8, 'Madagascar': 2, 'Malaysia': 6, 'Gabon': 2, 'Germany': 8, 'Liberia': 2, 'Ghana': 4, 'Peru': 1, 'Argentina': 11, 'Spain': 9, 'South Africa': 11, 'Egypt': 7, 'Nicaragua': 0, 'Bangladesh': 3, 'India': 12, 'Iran': 15, 'Philippines': 17, 'Turkey': 4, 'Austria': 3, 'Azerbaijan': 2, 'Haiti': 5, 'Greece': 2, 'Hungary': 3, 'Iceland': 2, 'Moldova': 0, 'Romania': 10, 'Poland': 8, 'Luxembourg': 1, 'Denmark': 3, 'Kenya': 5, 'Ecuador': 2, 'Salvadoran': 1, 'Finland': 0, 'Portugal': 3, 'Switzerland': 0, 'Sweden': 3, 'Belgium': 0, 'Italy': 4, 'Mexico': 5, 'Bolivia': 0, 'Bulgaria': 3, 'Serbia': 1, 'Russia': 25, 'Tanzania': 1, 'Sierra Leone': 0, 'Pakistan': 9, 'Croatia': 2, 'Ukraine': 11, 'South Sudan': 1, 'United States': 57, 'Yemen': 1, 'China': 26, 'New Zealand': 9, 'Venezuela': 3, 'Australia': 32, 'Estonia': 1, 'Lebanon': 6, 'Armenia': 4, 'Taiwan': 6, 'Cuba': 2, 'Lithuania': 1, 'Malawi': 3, 'Saint Kitts and Nevis': 0, 'Vietnam': 10, 'France': 17, 'Norway': 3, 'Ireland': 20, 'Israel': 13, 'Palauan': 1, 'Jamaica': 4, 'Kyrgyzstan': 1, 'San Marino': 0, 'Bosnia-Herzegovina': 6, 'Turkmenistan': 0, 'Algeria': 2, 'French Guiana': 0, 'Djibouti': 0, 'Vanuatu': 3, 'Ivorian': 1, 'Netherlands': 7, 'Libya': 2, 'Malta': 0, 'Paraguay': 1, 'Saint Vincent and the Grenadines': 0, 'Rhodesian': 3, 'Omani': 0, 'Papua New Guinea': 9, 'Congo Dem. Rep. of': 7, 'Togo': 1, 'United Arab Emirates': 2, 'Ethiopia': 3, 'Tuvalu': 3, 'Niuean': 0, 'Antigua and Barbuda': 0, 'Uruguay': 2, 'Senegal': 1, 'Brazil': 4, 'Korea North': 7, 'United Kingdom': 34, 'Botswana': 1, 'Qatar': 2, 'Rwanda': 0, 'Nauru': 0, 'Sudan': 2, 'East Timorese': 0, 'Korea South': 4, 'Mozambique': 0, 'Saudi Arabia': 12, 'Faroese': 0, 'Cape Colony': 2, 'Benin': 4, 'Syria': 4, 'Tajikistan': 0, 'Mauritius': 1, 'Jordan': 1, 'South Korean': 0, 'Monaco': 0, 'Morocco': 1, 'Grenada': 2, 'Samoan': 2, 'Mali': 2, 'Tonga': 0, 'Myanmar': 7, 'Congo': 1, 'Montserratian': 0, 'Guyana': 1, 'Liechtenstein': 0, 'Sao Tome and Principe': 0, 'Guinea-Bissau': 2, 'Belarus': 0, 'Singapore': 4, 'Guinea': 2, 'Pitcairn Islands': 0, 'Marshall Islands': 0, 'Dominican Republic': 0, 'Maldives': 1, 'Kazakhstan': 0, 'Macedonia': 0, 'Kiribati': 0, 'Mongolia': 3, 'Abkhazia': 1, 'Montenegro': 2, 'Bhutan': 3, 'Thailand': 3, 'Latvia': 1, 'Suriname': 0, 'Niger': 3, 'Martinique': 1, 'Mauritania': 3, 'Carniolan': 0, 'Cameroon': 1, 'Lesotho': 0, 'Saint Lucian': 1, 'South African Republic': 1, 'Cyprus': 1, 'Gambia': 6, 'Incan': 0, 'Uzbekistan': 3, 'Bahrain': 0, 'Chechen': 2, 'Eritrea': 0, 'Jersey': 1, 'Kuwait': 1, 'Burundi': 1, 'Guernsey': 1, 'Central African Republic': 5, 'Equatorial Guinea': 1, 'Guadeloupe': 0, 'Kosovo': 1, 'Cape Verde': 0, 'Andorra': 0, 'Comoros': 0, 'South Ossetian': 1, 'Cook Island': 0, 'Trinidad and Tobago': 1, 'Federated States of Micronesia': 0, 'Dominica': 1, 'Tokelauan': 0, 'Bahamas': 0, 'Swaziland': 0, 'Dagestani': 0, 'Greenlandic': 0, 'Barbados': 0, 'Belize': 0, 'Ossetian': 0, 'Seychelles': 0, 'Somaliland': 0, 'Rojava': 0}



In [14]:

    
# Creating Country and High Quality Article proportion (Number of High quality Article /Country's Population )
# country_articleCount 
# country_hqarticle

'''
{
    "country_name_1":"HqProportion",
    "country_name_2":"hqProportion"
}

'''

hqarticle_proportion  = {}

for key, value in country_articleCount.items():
    if (key in country_hqarticle.keys() and str(country_hqarticle[key]) != '0' ):
        #print(country_hqarticle[key])
        #print(country_articleCount[key])
        hqarticle_proportion[key] = (int(country_hqarticle[key])/int(country_articleCount[key])*100)
        #hqarticle_proportion[key]
        
print (hqarticle_proportion)









    



{'Chad': 1.0, 'Zimbabwe': 0.5988023952095809, 'Uganda': 0.5319148936170213, 'Namibia': 0.6060606060606061, 'Nigeria': 0.5847953216374269, 'Colombia': 1.0416666666666665, 'Chile': 0.8522727272727272, 'Palestinian Territory': 5.46448087431694, 'Somalia': 2.359882005899705, 'Cambodia': 1.8433179723502304, 'Slovakia': 1.680672268907563, 'Slovenia': 1.694915254237288, 'Afghanistan': 3.669724770642202, 'Iraq': 2.3178807947019866, 'Sri Lanka': 1.7204301075268817, 'Laos': 0.9174311926605505, 'Albania': 1.0869565217391304, 'Czech Republic': 0.39370078740157477, 'Canada': 2.112676056338028, 'Tunisia': 0.7142857142857143, 'Guatemala': 5.952380952380952, 'Burkina Faso': 2.0618556701030926, 'Angola': 0.9090909090909091, 'Panama': 3.669724770642202, 'Japan': 1.8140589569160999, 'Indonesia': 3.7209302325581395, 'Madagascar': 0.8333333333333334, 'Malaysia': 1.5345268542199488, 'Gabon': 1.9417475728155338, 'Germany': 1.1379800853485065, 'Liberia': 1.2658227848101267, 'Ghana': 1.0126582278481013, 'Peru': 0.2824858757062147, 'Argentina': 2.217741935483871, 'Spain': 1.0215664018161181, 'South Africa': 2.8795811518324608, 'Egypt': 2.928870292887029, 'Bangladesh': 0.9259259259259258, 'India': 1.2121212121212122, 'Iran': 1.8028846153846152, 'Philippines': 3.300970873786408, 'Turkey': 1.13314447592068, 'Austria': 0.8823529411764706, 'Azerbaijan': 1.098901098901099, 'Haiti': 3.0120481927710845, 'Greece': 0.6430868167202572, 'Hungary': 0.4885993485342019, 'Iceland': 0.9708737864077669, 'Romania': 2.8735632183908044, 'Poland': 0.9888751545117428, 'Luxembourg': 0.5555555555555556, 'Denmark': 1.0309278350515463, 'Kenya': 1.3192612137203166, 'Ecuador': 1.06951871657754, 'Salvadoran': 0.8403361344537815, 'Portugal': 0.9287925696594427, 'Sweden': 0.7894736842105263, 'Italy': 0.4830917874396135, 'Mexico': 0.46253469010175763, 'Bulgaria': 1.3274336283185841, 'Serbia': 0.45454545454545453, 'Russia': 2.8344671201814062, 'Tanzania': 0.24509803921568626, 'Pakistan': 0.8612440191387559, 'Croatia': 1.1904761904761905, 'Ukraine': 3.618421052631579, 'South Sudan': 0.7518796992481203, 'United States': 5.191256830601093, 'Yemen': 0.819672131147541, 'China': 2.2847100175746924, 'New Zealand': 1.1378002528445006, 'Venezuela': 2.2222222222222223, 'Australia': 2.0434227330779056, 'Estonia': 0.6535947712418301, 'Lebanon': 3.1914893617021276, 'Armenia': 2.0100502512562812, 'Taiwan': 1.1928429423459244, 'Cuba': 1.1363636363636365, 'Lithuania': 0.4032258064516129, 'Malawi': 2.459016393442623, 'Vietnam': 5.2356020942408374, 'France': 1.0065127294256957, 'Norway': 0.4559270516717325, 'Ireland': 5.2493438320209975, 'Israel': 2.610441767068273, 'Palauan': 4.3478260869565215, 'Jamaica': 4.705882352941177, 'Kyrgyzstan': 1.3888888888888888, 'Bosnia-Herzegovina': 3.3707865168539324, 'Algeria': 1.680672268907563, 'Vanuatu': 4.838709677419355, 'Ivorian': 1.2658227848101267, 'Netherlands': 0.9971509971509971, 'Libya': 1.8018018018018018, 'Paraguay': 0.6711409395973155, 'Rhodesian': 3.9473684210526314, 'Papua New Guinea': 5.521472392638037, 'Congo Dem. Rep. of': 4.929577464788732, 'Togo': 1.5384615384615385, 'United Arab Emirates': 3.3333333333333335, 'Ethiopia': 2.857142857142857, 'Tuvalu': 5.454545454545454, 'Uruguay': 0.6896551724137931, 'Senegal': 2.3255813953488373, 'Brazil': 0.7194244604316548, 'Korea North': 17.94871794871795, 'United Kingdom': 3.9215686274509802, 'Botswana': 1.4705882352941175, 'Qatar': 3.9215686274509802, 'Sudan': 2.0408163265306123, 'Korea South': 1.4184397163120568, 'Saudi Arabia': 10.084033613445378, 'Cape Colony': 2.4390243902439024, 'Benin': 4.25531914893617, 'Syria': 3.0303030303030303, 'Mauritius': 1.4705882352941175, 'Jordan': 2.083333333333333, 'Morocco': 0.4807692307692308, 'Grenada': 5.555555555555555, 'Samoan': 2.5974025974025974, 'Mali': 1.7241379310344827, 'Myanmar': 2.9535864978902953, 'Congo': 0.6711409395973155, 'Guyana': 5.0, 'Guinea-Bissau': 9.523809523809524, 'Singapore': 5.797101449275362, 'Guinea': 2.247191011235955, 'Maldives': 1.1904761904761905, 'Mongolia': 3.260869565217391, 'Abkhazia': 6.25, 'Montenegro': 2.7027027027027026, 'Bhutan': 9.090909090909092, 'Thailand': 2.6785714285714284, 'Latvia': 1.7857142857142856, 'Niger': 3.75, 'Martinique': 2.941176470588235, 'Mauritania': 5.769230769230769, 'Cameroon': 0.9433962264150944, 'Saint Lucian': 2.083333333333333, 'South African Republic': 6.666666666666667, 'Cyprus': 0.9803921568627451, 'Gambia': 7.317073170731707, 'Uzbekistan': 10.344827586206897, 'Chechen': 5.263157894736842, 'Jersey': 1.639344262295082, 'Kuwait': 2.7027027027027026, 'Burundi': 1.3157894736842104, 'Guernsey': 4.0, 'Central African Republic': 7.352941176470589, 'Equatorial Guinea': 3.125, 'Kosovo': 2.083333333333333, 'South Ossetian': 5.555555555555555, 'Trinidad and Tobago': 3.571428571428571, 'Dominica': 8.333333333333332}

Visualization

The visualization should be pretty straightforward. Produce four visualizations that show:

10 highest-ranked countries in terms of number of politician articles as a proportion of country population 10 lowest-ranked countries in terms of number of politician articles as a proportion of country population 10 highest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country 10 lowest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country



In [15]:

    
# 10 highest-ranked countries in terms of number of politician articles as a proportion of country population

import pandas as pd
from collections import OrderedDict
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline



In [16]:

    
# sorts article_proportion dictionary in descending order
sorted_descending_article_proportion = [(k, article_proportion[k]) for k in sorted(article_proportion, key=article_proportion.get, reverse=True)]
#for k, v in sorted_top_10_article_proportion:
   # print(k, v)

sorted_top_10_article_proportion={}
count =0
for k, v in sorted_descending_article_proportion:
    count =count+1
    sorted_top_10_article_proportion[k] =v
    if count==10:
        break
print(sorted_top_10_article_proportion)









    



{'Nauru': 0.4880294659300184, 'Tuvalu': 0.46610169491525427, 'San Marino': 0.24848484848484848, 'Monaco': 0.10501995379122034, 'Liechtenstein': 0.07718924673941975, 'Marshall Islands': 0.06727272727272728, 'Iceland': 0.06226800633561851, 'Tonga': 0.06098741529525654, 'Andorra': 0.04358974358974359, 'Federated States of Micronesia': 0.036893203883495145}



In [17]:

    
# https://stackoverflow.com/questions/18837262/convert-python-dict-into-a-dataframe 
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html 
ad= pd.Series(sorted_top_10_article_proportion) 
adata = {'proportion ':ad}
adf = pd.DataFrame(adata)
adf
adf.plot(kind='bar',figsize=(12,10) )
plt.xlabel('Country')
plt.ylabel('Proportion [Total wikipedia Article/ Population]')

plt.title('10 highest-ranked countries in terms of number of politician articles as a proportion of country population ')
plt.show()



In [18]:

    
print("10 highest-ranked countries in terms of number of politician articles as a proportion of country population")
print("")



print("COUNTRY                          PROPORTION")
print("-------------------------------------------")
print (ad.to_string(index=True))









    



10 highest-ranked countries in terms of number of politician articles as a proportion of country population

COUNTRY                          PROPORTION
-------------------------------------------
Andorra                           0.043590
Federated States of Micronesia    0.036893
Iceland                           0.062268
Liechtenstein                     0.077189
Marshall Islands                  0.067273
Monaco                            0.105020
Nauru                             0.488029
San Marino                        0.248485
Tonga                             0.060987
Tuvalu                            0.466102



In [19]:

    
# 10 lowest-ranked countries in terms of number of politician articles as a proportion of country population
# sorts article_proportion dictionary in ascending order
sorted_ascending_article_proportion = [(k, article_proportion[k]) for k in sorted(article_proportion, key=article_proportion.get, reverse=False)]
#for k, v in sorted_bottom_10_article_proportion:
    #print(k, v)

sorted_bottom_10_article_proportion={}
count =0
for k, v in sorted_ascending_article_proportion:
    count =count+1
    sorted_bottom_10_article_proportion[k] =v
    if count==10:
        break
print(sorted_bottom_10_article_proportion)









    



{'India': 7.533686903819783e-05, 'China': 8.294944311621669e-05, 'Indonesia': 8.406910976635032e-05, 'Uzbekistan': 9.267902495657588e-05, 'Ethiopia': 0.00010698129355666953, 'Korea North': 0.00015610615218348477, 'Zambia': 0.00016802486768041668, 'Thailand': 0.0001719868706451427, 'Congo Dem. Rep. of': 0.00019361823392900483, 'Bangladesh': 0.00020198116089295621}



In [39]:

    
bd = pd.Series(sorted_bottom_10_article_proportion) 
bdata = {'proportion ':bd}
bdf = pd.DataFrame(bdata)
bdf
bdf.plot(kind='bar',figsize=(12,10), title='10 lowest-ranked countries in terms of number of politician articles as a proportion of country population' )
plt.xlabel('Country')
plt.ylabel('Proportion [Total wikipedia Article/ Population]')
plt.show()



In [21]:

    
print("10 lowest-ranked countries in terms of number of politician articles as a proportion of country population")
print("")


print("COUNTRY              PROPORTION")
print("--------------------------------")
print (bd.to_string(index=True))









    



10 lowest-ranked countries in terms of number of politician articles as a proportion of country population

COUNTRY              PROPORTION
--------------------------------
Bangladesh            0.000202
China                 0.000083
Congo Dem. Rep. of    0.000194
Ethiopia              0.000107
India                 0.000075
Indonesia             0.000084
Korea North           0.000156
Thailand              0.000172
Uzbekistan            0.000093
Zambia                0.000168



In [22]:

    
# 10 highest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles
# about politicians from that country
sorted_descending_articleQ_proportion = [(k, hqarticle_proportion[k]) for k in sorted(hqarticle_proportion, key=hqarticle_proportion.get, reverse=True)]
#for k, v in sorted_top_10_articleQ_proportion:
#      print(k, v)
        

sorted_top_10_articleQ_proportion={}
count =0
for k, v in sorted_descending_articleQ_proportion:
    count =count+1
    sorted_top_10_articleQ_proportion[k] =v
    if count==10:
        break
print(sorted_top_10_articleQ_proportion)









    



{'Korea North': 17.94871794871795, 'Uzbekistan': 10.344827586206897, 'Saudi Arabia': 10.084033613445378, 'Guinea-Bissau': 9.523809523809524, 'Bhutan': 9.090909090909092, 'Dominica': 8.333333333333332, 'Central African Republic': 7.352941176470589, 'Gambia': 7.317073170731707, 'South African Republic': 6.666666666666667, 'Abkhazia': 6.25}



In [40]:

    
cd = pd.Series(sorted_top_10_articleQ_proportion) 
cdata = {'Percentage ':cd}
cdf = pd.DataFrame(cdata)
#cdf = cdf.sort('propotion')
cdf.plot(kind='bar',figsize=(12,10), title='10 highest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country' )
plt.xlabel('Country')
plt.ylabel('Percentage [High Quality (FA or GA)wikipedia Article/ Total wikipedia Article *100]')
plt.show()



In [24]:

    
print("10 highest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country")
print("")
print("COUNTRY                      Percentage")
print("----------------------------------------")
print (cd.to_string(index=True))









    



10 highest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country

COUNTRY                      Percentage
----------------------------------------
Abkhazia                     6.250000
Bhutan                       9.090909
Central African Republic     7.352941
Dominica                     8.333333
Gambia                       7.317073
Guinea-Bissau                9.523810
Korea North                 17.948718
Saudi Arabia                10.084034
South African Republic       6.666667
Uzbekistan                  10.344828



In [25]:

    
# 10 lowest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles
# about politicians from that country
sorted_ascending_articleQ_proportion = [(k, hqarticle_proportion[k]) for k in sorted(hqarticle_proportion, key=hqarticle_proportion.get, reverse=False)]
#for k, v in sorted_bottom_10_articleQ_proportion:
      #print(k, v)
        
sorted_bottom_10_articleQ_proportion={}
count =0
for k, v in sorted_ascending_articleQ_proportion:
    count =count+1
    sorted_bottom_10_articleQ_proportion[k] =v
    if count==10:
        break
print(sorted_bottom_10_articleQ_proportion)









    



{'Tanzania': 0.24509803921568626, 'Peru': 0.2824858757062147, 'Czech Republic': 0.39370078740157477, 'Lithuania': 0.4032258064516129, 'Serbia': 0.45454545454545453, 'Norway': 0.4559270516717325, 'Mexico': 0.46253469010175763, 'Morocco': 0.4807692307692308, 'Italy': 0.4830917874396135, 'Hungary': 0.4885993485342019}



In [41]:

    
dd = pd.Series(sorted_bottom_10_articleQ_proportion) 
ddata = {'Percentage  ':dd}
ddf = pd.DataFrame(ddata)
ddf
ddf.plot(kind='bar' ,figsize=(12,10), title='10 lowest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country' )
plt.xlabel('Country')
plt.ylabel('Percentage [High Quality (FA or GA)wikipedia Article/ Total wikipedia Article *100]')
plt.show()



In [27]:

    
print("10 lowest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country")
print("")

print("COUNTRY         Percentage")
print("---------------------------")
print (dd.to_string(index=True))









    



10 lowest-ranked countries in terms of number of GA and FA-quality articles as a proportion of all articles about politicians from that country

COUNTRY         Percentage
---------------------------
Czech Republic    0.393701
Hungary           0.488599
Italy             0.483092
Lithuania         0.403226
Mexico            0.462535
Morocco           0.480769
Norway            0.455927
Peru              0.282486
Serbia            0.454545
Tanzania          0.245098

END

Below this block is Rough Work - Not for grading purpose .



In [28]:

    
Final_data = {'Top10':ad,
             'Bottom 10':bd,
              'Top10HQ':cd,
             'botton10HQ':dd}
Final_data_f = pd.DataFrame(Final_data)
Final_data_f









    Out[28]:







  
    
      
      Bottom 10
      Top10
      Top10HQ
      botton10HQ
    
  
  
    
      Abkhazia
      NaN
      NaN
      6.250000
      NaN
    
    
      Andorra
      NaN
      0.043590
      NaN
      NaN
    
    
      Bangladesh
      0.000202
      NaN
      NaN
      NaN
    
    
      Bhutan
      NaN
      NaN
      9.090909
      NaN
    
    
      Central African Republic
      NaN
      NaN
      7.352941
      NaN
    
    
      China
      0.000083
      NaN
      NaN
      NaN
    
    
      Congo Dem. Rep. of
      0.000194
      NaN
      NaN
      NaN
    
    
      Czech Republic
      NaN
      NaN
      NaN
      0.393701
    
    
      Dominica
      NaN
      NaN
      8.333333
      NaN
    
    
      Ethiopia
      0.000107
      NaN
      NaN
      NaN
    
    
      Federated States of Micronesia
      NaN
      0.036893
      NaN
      NaN
    
    
      Gambia
      NaN
      NaN
      7.317073
      NaN
    
    
      Guinea-Bissau
      NaN
      NaN
      9.523810
      NaN
    
    
      Hungary
      NaN
      NaN
      NaN
      0.488599
    
    
      Iceland
      NaN
      0.062268
      NaN
      NaN
    
    
      India
      0.000075
      NaN
      NaN
      NaN
    
    
      Indonesia
      0.000084
      NaN
      NaN
      NaN
    
    
      Italy
      NaN
      NaN
      NaN
      0.483092
    
    
      Korea North
      0.000156
      NaN
      17.948718
      NaN
    
    
      Liechtenstein
      NaN
      0.077189
      NaN
      NaN
    
    
      Lithuania
      NaN
      NaN
      NaN
      0.403226
    
    
      Marshall Islands
      NaN
      0.067273
      NaN
      NaN
    
    
      Mexico
      NaN
      NaN
      NaN
      0.462535
    
    
      Monaco
      NaN
      0.105020
      NaN
      NaN
    
    
      Morocco
      NaN
      NaN
      NaN
      0.480769
    
    
      Nauru
      NaN
      0.488029
      NaN
      NaN
    
    
      Norway
      NaN
      NaN
      NaN
      0.455927
    
    
      Peru
      NaN
      NaN
      NaN
      0.282486
    
    
      San Marino
      NaN
      0.248485
      NaN
      NaN
    
    
      Saudi Arabia
      NaN
      NaN
      10.084034
      NaN
    
    
      Serbia
      NaN
      NaN
      NaN
      0.454545
    
    
      South African Republic
      NaN
      NaN
      6.666667
      NaN
    
    
      Tanzania
      NaN
      NaN
      NaN
      0.245098
    
    
      Thailand
      0.000172
      NaN
      NaN
      NaN
    
    
      Tonga
      NaN
      0.060987
      NaN
      NaN
    
    
      Tuvalu
      NaN
      0.466102
      NaN
      NaN
    
    
      Uzbekistan
      0.000093
      NaN
      10.344828
      NaN
    
    
      Zambia
      0.000168
      NaN
      NaN
      NaN



In [29]:

    
Final_data_f.plot(figsize=(12, 10))









    Out[29]:





<matplotlib.axes._subplots.AxesSubplot at 0x13fa9f1f748>



In [30]:

    
Final_data_f.plot(kind='barh', figsize=(12, 30))









    Out[30]:





<matplotlib.axes._subplots.AxesSubplot at 0x13fa9f83b00>



In [31]:

    
fig, axes = plt.subplots(nrows=4, ncols=1)
for i, c in enumerate(Final_data_f.columns):
    Final_data_f[c].plot(kind='barh',ax=axes[i], figsize=(12,50), title=c)



In [33]:

    
#https://datasciencelab.wordpress.com/2013/12/21/beautiful-plots-with-pandas-and-matplotlib/
fig, axes = plt.subplots(nrows=4, ncols=1)
for i, c in enumerate(Final_data_f.columns):
    Final_data_f[c].plot(kind='bar', ax=axes[i], figsize=(12, 30), title=c)
#plt.savefig('All .png', bbox_inches='tight')

	Bottom 10	Top10	Top10HQ	botton10HQ
Abkhazia	NaN	NaN	6.250000	NaN
Andorra	NaN	0.043590	NaN	NaN
Bangladesh	0.000202	NaN	NaN	NaN
Bhutan	NaN	NaN	9.090909	NaN
Central African Republic	NaN	NaN	7.352941	NaN
China	0.000083	NaN	NaN	NaN
Congo Dem. Rep. of	0.000194	NaN	NaN	NaN
Czech Republic	NaN	NaN	NaN	0.393701
Dominica	NaN	NaN	8.333333	NaN
Ethiopia	0.000107	NaN	NaN	NaN
Federated States of Micronesia	NaN	0.036893	NaN	NaN
Gambia	NaN	NaN	7.317073	NaN
Guinea-Bissau	NaN	NaN	9.523810	NaN
Hungary	NaN	NaN	NaN	0.488599
Iceland	NaN	0.062268	NaN	NaN
India	0.000075	NaN	NaN	NaN
Indonesia	0.000084	NaN	NaN	NaN
Italy	NaN	NaN	NaN	0.483092
Korea North	0.000156	NaN	17.948718	NaN
Liechtenstein	NaN	0.077189	NaN	NaN
Lithuania	NaN	NaN	NaN	0.403226
Marshall Islands	NaN	0.067273	NaN	NaN
Mexico	NaN	NaN	NaN	0.462535
Monaco	NaN	0.105020	NaN	NaN
Morocco	NaN	NaN	NaN	0.480769
Nauru	NaN	0.488029	NaN	NaN
Norway	NaN	NaN	NaN	0.455927
Peru	NaN	NaN	NaN	0.282486
San Marino	NaN	0.248485	NaN	NaN
Saudi Arabia	NaN	NaN	10.084034	NaN
Serbia	NaN	NaN	NaN	0.454545
South African Republic	NaN	NaN	6.666667	NaN
Tanzania	NaN	NaN	NaN	0.245098
Thailand	0.000172	NaN	NaN	NaN
Tonga	NaN	0.060987	NaN	NaN
Tuvalu	NaN	0.466102	NaN	NaN
Uzbekistan	0.000093	NaN	10.344828	NaN
Zambia	0.000168	NaN	NaN	NaN